Message ID | 20191031093909.9228-2-dja@axtens.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | kasan: support backing vmalloc space with real shadow memory | expand |
On Thu, 2019-10-31 at 20:39 +1100, Daniel Axtens wrote: > /* > * In this function, newly allocated vm_struct has VM_UNINITIALIZED > * flag. It means that vm_struct is not fully initialized. > @@ -3377,6 +3411,9 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, > > setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, > pcpu_get_vm_areas); > + > + /* assume success here */ > + kasan_populate_vmalloc(sizes[area], vms[area]); > } > spin_unlock(&vmap_area_lock); Here it is all wrong. GFP_KERNEL with in_atomic(). [ 32.231000][ T1] BUG: sleeping function called from invalid context at mm/page_alloc.c:4681 [ 32.239934][ T1] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0 [ 32.248896][ T1] 2 locks held by swapper/0/1: [ 32.253580][ T1] #0: ffffffff880d6160 (pcpu_alloc_mutex){+.+.}, at: pcpu_alloc+0x707/0xbe0 [ 32.262305][ T1] #1: ffffffff88105558 (vmap_area_lock){+.+.}, at: pcpu_get_vm_areas+0xc4f/0x1e60 [ 32.271919][ T1] CPU: 4 PID: 1 Comm: swapper/0 Tainted: G W 5.4.0-rc7-next-20191115+ #6 [ 32.281555][ T1] Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 03/09/2018 [ 32.281896][ T1] Call Trace: [ 32.281896][ T1] dump_stack+0xa0/0xea [ 32.281896][ T1] ___might_sleep.cold.89+0xd2/0x122 [ 32.301996][ T1] __might_sleep+0x73/0xe0 [ 32.301996][ T1] __alloc_pages_nodemask+0x442/0x720 [ 32.311564][ T1] ? __kasan_check_read+0x11/0x20 [ 32.311564][ T1] ? __alloc_pages_slowpath+0x1870/0x1870 [ 32.321705][ T1] ? mark_held_locks+0x86/0xb0 [ 32.321705][ T1] ? _raw_spin_unlock_irqrestore+0x44/0x50 [ 32.331563][ T1] alloc_page_interleave+0x18/0x130 [ 32.331563][ T1] alloc_pages_current+0xf6/0x110 [ 32.341979][ T1] __get_free_pages+0x12/0x60 [ 32.341979][ T1] __pte_alloc_kernel+0x1b/0xc0 [ 32.351563][ T1] apply_to_page_range+0x5b5/0x690 [ 32.351563][ T1] ? memset+0x40/0x40 [ 32.361693][ T1] kasan_populate_vmalloc+0x6d/0xa0 [ 32.361693][ T1] pcpu_get_vm_areas+0xd49/0x1e60 [ 32.371425][ T1] ? vm_map_ram+0x10d0/0x10d0 [ 32.371425][ T1] ? pcpu_mem_zalloc+0x65/0x90 [ 32.371425][ T1] pcpu_create_chunk+0x152/0x3f0 [ 32.371425][ T1] pcpu_alloc+0xa2f/0xbe0 [ 32.391423][ T1] ? pcpu_balance_workfn+0xb00/0xb00 [ 32.391423][ T1] ? __kasan_kmalloc.constprop.11+0xc1/0xd0 [ 32.391423][ T1] ? kasan_kmalloc+0x9/0x10 [ 32.391423][ T1] ? kmem_cache_alloc_trace+0x1f8/0x470 [ 32.411421][ T1] ? iommu_dma_get_resv_regions+0x10/0x10 [ 32.411421][ T1] __alloc_percpu+0x15/0x20 [ 32.411421][ T1] init_iova_flush_queue+0x79/0x230 [ 32.411421][ T1] iommu_setup_dma_ops+0x87d/0x890 [ 32.431420][ T1] ? __kasan_check_write+0x14/0x20 [ 32.431420][ T1] ? refcount_sub_and_test_checked+0xba/0x170 [ 32.431420][ T1] ? __kasan_check_write+0x14/0x20 [ 32.431420][ T1] ? iommu_dma_alloc+0x1e0/0x1e0 [ 32.451420][ T1] ? iommu_group_get_for_dev+0x153/0x450 [ 32.451420][ T1] ? refcount_dec_and_test_checked+0x11/0x20 [ 32.451420][ T1] ? kobject_put+0x36/0x270 [ 32.451420][ T1] amd_iommu_add_device+0x560/0x710 [ 32.471423][ T1] ? iommu_probe_device+0x150/0x150 [ 32.471423][ T1] iommu_probe_device+0x8c/0x150 [ 32.471423][ T1] add_iommu_group+0xe/0x20 [ 32.471423][ T1] bus_for_each_dev+0xfe/0x160 [ 32.491421][ T1] ? subsys_dev_iter_init+0x80/0x80 [ 32.491421][ T1] ? blocking_notifier_chain_register+0x4f/0x70 [ 32.491421][ T1] bus_set_iommu+0xc6/0x100 [ 32.491421][ T1] ? e820__memblock_setup+0x10e/0x10e [ 32.511571][ T1] amd_iommu_init_api+0x25/0x3e [ 32.511571][ T1] state_next+0x214/0x7ea [ 32.511571][ T1] ? check_flags.part.25+0x86/0x220 [ 32.511571][ T1] ? early_amd_iommu_init+0x10c0/0x10c0 [ 32.531421][ T1] ? e820__memblock_setup+0x10e/0x10e [ 32.531421][ T1] ? rcu_read_lock_sched_held+0xac/0xe0 [ 32.531421][ T1] ? e820__memblock_setup+0x10e/0x10e [ 32.551423][ T1] amd_iommu_init+0x25/0x57 [ 32.551423][ T1] pci_iommu_init+0x26/0x62 [ 32.551423][ T1] do_one_initcall+0xfe/0x4fa [ 32.551423][ T1] ? perf_trace_initcall_level+0x240/0x240 [ 32.571420][ T1] ? rcu_read_lock_sched_held+0xac/0xe0 [ 32.571420][ T1] ? rcu_read_lock_bh_held+0xc0/0xc0 [ 32.571420][ T1] ? __kasan_check_read+0x11/0x20 [ 32.571420][ T1] kernel_init_freeable+0x420/0x4e4 [ 32.591420][ T1] ? start_kernel+0x6a9/0x6a9 [ 32.591420][ T1] ? lockdep_hardirqs_on+0x1b0/0x2a0 [ 32.591420][ T1] ? _raw_spin_unlock_irq+0x27/0x40 [ 32.591420][ T1] ? rest_init+0x307/0x307 [ 32.611557][ T1] kernel_init+0x11/0x139 [ 32.611557][ T1] ? rest_init+0x307/0x307 [ 32.611557][ T1] ret_from_fork+0x27/0x50 [ 32.054647][ T1] BUG: sleeping function called from invalid context at mm/page_alloc.c:4681 [ 32.063814][ T1] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0 [ 32.072444][ T1] 2 locks held by swapper/0/1: [ 32.077104][ T1] #0: ffffffffac0d6160 (pcpu_alloc_mutex){+.+.}, at: pcpu_alloc+0x707/0xbe0 [ 32.086227][ T1] #1: ffffffffac105558 (vmap_area_lock){+.+.}, at: pcpu_get_vm_areas+0xc4f/0x1e50 [ 32.095478][ T1] CPU: 53 PID: 1 Comm: swapper/0 Tainted: G W 5.4.0-rc7-next-20191115 #5 [ 32.105115][ T1] Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 03/09/2018 [ 32.105450][ T1] Call Trace: [ 32.105450][ T1] dump_stack+0xa0/0xea [ 32.105450][ T1] ___might_sleep.cold.89+0xd2/0x122 [ 32.105450][ T1] __might_sleep+0x73/0xe0 [ 32.105450][ T1] __alloc_pages_nodemask+0x442/0x720 [ 32.105450][ T1] ? add_iommu_group+0xe/0x20 [ 32.105450][ T1] ? bus_for_each_dev+0xfe/0x160 [ 32.105450][ T1] ? __alloc_pages_slowpath+0x1870/0x1870 [ 32.105450][ T1] ? check_chain_key+0x1df/0x2e0 [ 32.105450][ T1] alloc_page_interleave+0x18/0x130 [ 32.105450][ T1] alloc_pages_current+0xf6/0x110 [ 32.105450][ T1] __get_free_pages+0x12/0x60 [ 32.105450][ T1] kasan_populate_vmalloc_pte+0x2a/0x150 [ 32.105450][ T1] ? register_lock_class+0x940/0x940 [ 32.105450][ T1] apply_to_page_range+0x42d/0x690 [ 32.105450][ T1] ? memset+0x40/0x40 [ 32.105450][ T1] kasan_populate_vmalloc+0x69/0xa0 [ 32.105450][ T1] pcpu_get_vm_areas+0xd44/0x1e50 [ 32.105450][ T1] ? vm_map_ram+0x10d0/0x10d0 [ 32.105450][ T1] ? pcpu_mem_zalloc+0x65/0x90 [ 32.105450][ T1] pcpu_create_chunk+0x152/0x3f0 [ 32.105450][ T1] pcpu_alloc+0xa2f/0xbe0 [ 32.105450][ T1] ? pcpu_balance_workfn+0xb00/0xb00 [ 32.105450][ T1] ? __kasan_kmalloc.constprop.11+0xc1/0xd0 [ 32.105450][ T1] ? kasan_kmalloc+0x9/0x10 [ 32.105450][ T1] ? kmem_cache_alloc_trace+0x1f8/0x470 [ 32.105450][ T1] ? iommu_dma_get_resv_regions+0x10/0x10 [ 32.105450][ T1] __alloc_percpu+0x15/0x20 [ 32.105450][ T1] init_iova_flush_queue+0x79/0x230 [ 32.105450][ T1] iommu_setup_dma_ops+0x87d/0x890 [ 32.105450][ T1] ? __kasan_check_write+0x14/0x20 [ 32.105450][ T1] ? refcount_sub_and_test_checked+0xba/0x170 [ 32.105450][ T1] ? __kasan_check_write+0x14/0x20 [ 32.105450][ T1] ? iommu_dma_alloc+0x1e0/0x1e0 [ 32.105450][ T1] ? iommu_group_get_for_dev+0x153/0x450 [ 32.105450][ T1] ? refcount_dec_and_test_checked+0x11/0x20 [ 32.105450][ T1] ? kobject_put+0x36/0x270 [ 32.105450][ T1] amd_iommu_add_device+0x560/0x710 [ 32.105450][ T1] ? iommu_probe_device+0x150/0x150 [ 32.105450][ T1] iommu_probe_device+0x8c/0x150 [ 32.105450][ T1] add_iommu_group+0xe/0x20 [ 32.105450][ T1] bus_for_each_dev+0xfe/0x160 [ 32.105450][ T1] ? subsys_dev_iter_init+0x80/0x80 [ 32.105450][ T1] ? blocking_notifier_chain_register+0x4f/0x70 [ 32.105450][ T1] bus_set_iommu+0xc6/0x100 [ 32.105450][ T1] ? e820__memblock_setup+0x10e/0x10e [ 32.105450][ T1] amd_iommu_init_api+0x25/0x3e [ 32.105450][ T1] state_next+0x214/0x7ea [ 32.105450][ T1] ? check_flags.part.25+0x86/0x220 [ 32.105450][ T1] ? early_amd_iommu_init+0x10c0/0x10c0 [ 32.105450][ T1] ? e820__memblock_setup+0x10e/0x10e [ 32.105450][ T1] ? rcu_read_lock_sched_held+0xac/0xe0 [ 32.105450][ T1] ? e820__memblock_setup+0x10e/0x10e [ 32.105450][ T1] amd_iommu_init+0x25/0x57 [ 32.105450][ T1] pci_iommu_init+0x26/0x62 [ 32.105450][ T1] do_one_initcall+0xfe/0x4fa [ 32.105450][ T1].781281][ T1] pci 0000:60:08.0: Adding to iommu group 63 [ 32.831700][ T1] pci 0000:60:08.1: Adding to iommu group 64 [ 32.883138][ T1] pci 0000:63:00.0: Adding to iommu group 65 [ 32.933084][ T1] pci 0000:63:00.1: Adding to iommu group 65 [ 32.940474][ T1] pci 0000:62:00.0: Adding to iommu group 66 [ 32.991631][ T1] pci 0000:62:00.2: Adding to iommu group 67 [ 33.042553][ T1] pci 0000:61:00.0: Adding to iommu group 68
Qian Cai <cai@lca.pw> writes: > On Thu, 2019-10-31 at 20:39 +1100, Daniel Axtens wrote: >> /* >> * In this function, newly allocated vm_struct has VM_UNINITIALIZED >> * flag. It means that vm_struct is not fully initialized. >> @@ -3377,6 +3411,9 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, >> >> setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, >> pcpu_get_vm_areas); >> + >> + /* assume success here */ >> + kasan_populate_vmalloc(sizes[area], vms[area]); >> } >> spin_unlock(&vmap_area_lock); > > Here it is all wrong. GFP_KERNEL with in_atomic(). I think this fix will work, I will do a v12 with it included. diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a4b950a02d0b..bf030516258c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3417,11 +3417,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, pcpu_get_vm_areas); + } + spin_unlock(&vmap_area_lock); + /* populate the shadow space outside of the lock */ + for (area = 0; area < nr_vms; area++) { /* assume success here */ kasan_populate_vmalloc(sizes[area], vms[area]); } - spin_unlock(&vmap_area_lock); kfree(vas); return vms;
On 11/18/19 6:29 AM, Daniel Axtens wrote: > Qian Cai <cai@lca.pw> writes: > >> On Thu, 2019-10-31 at 20:39 +1100, Daniel Axtens wrote: >>> /* >>> * In this function, newly allocated vm_struct has VM_UNINITIALIZED >>> * flag. It means that vm_struct is not fully initialized. >>> @@ -3377,6 +3411,9 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, >>> >>> setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, >>> pcpu_get_vm_areas); >>> + >>> + /* assume success here */ >>> + kasan_populate_vmalloc(sizes[area], vms[area]); >>> } >>> spin_unlock(&vmap_area_lock); >> >> Here it is all wrong. GFP_KERNEL with in_atomic(). > > I think this fix will work, I will do a v12 with it included. You can send just the fix. Andrew will fold it into the original patch before sending it to Linus. > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > index a4b950a02d0b..bf030516258c 100644 > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -3417,11 +3417,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, > > setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, > pcpu_get_vm_areas); > + } > + spin_unlock(&vmap_area_lock); > > + /* populate the shadow space outside of the lock */ > + for (area = 0; area < nr_vms; area++) { > /* assume success here */ > kasan_populate_vmalloc(sizes[area], vms[area]); > } > - spin_unlock(&vmap_area_lock); > > kfree(vas); > return vms; > >
On Tue, Nov 19, 2019 at 10:54 AM Andrey Ryabinin <aryabinin@virtuozzo.com> wrote: > On 11/18/19 6:29 AM, Daniel Axtens wrote: > > Qian Cai <cai@lca.pw> writes: > > > >> On Thu, 2019-10-31 at 20:39 +1100, Daniel Axtens wrote: > >>> /* > >>> * In this function, newly allocated vm_struct has VM_UNINITIALIZED > >>> * flag. It means that vm_struct is not fully initialized. > >>> @@ -3377,6 +3411,9 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, > >>> > >>> setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, > >>> pcpu_get_vm_areas); > >>> + > >>> + /* assume success here */ > >>> + kasan_populate_vmalloc(sizes[area], vms[area]); > >>> } > >>> spin_unlock(&vmap_area_lock); > >> > >> Here it is all wrong. GFP_KERNEL with in_atomic(). > > > > I think this fix will work, I will do a v12 with it included. > > You can send just the fix. Andrew will fold it into the original patch before sending it to Linus. > > > > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > > index a4b950a02d0b..bf030516258c 100644 > > --- a/mm/vmalloc.c > > +++ b/mm/vmalloc.c > > @@ -3417,11 +3417,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, > > > > setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, > > pcpu_get_vm_areas); > > + } > > + spin_unlock(&vmap_area_lock); > > > > + /* populate the shadow space outside of the lock */ > > + for (area = 0; area < nr_vms; area++) { > > /* assume success here */ > > kasan_populate_vmalloc(sizes[area], vms[area]); > > } > > - spin_unlock(&vmap_area_lock); > > > > kfree(vas); > > return vms; Hi, I am testing this support on next-20191129 and seeing the following warnings: BUG: sleeping function called from invalid context at mm/page_alloc.c:4681 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 44, name: kworker/1:1 4 locks held by kworker/1:1/44: #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: __write_once_size include/linux/compiler.h:247 [inline] #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: arch_atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline] #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: atomic64_set include/asm-generic/atomic-instrumented.h:868 [inline] #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: atomic_long_set include/asm-generic/atomic-long.h:40 [inline] #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: set_work_data kernel/workqueue.c:615 [inline] #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: set_work_pool_and_clear_pending kernel/workqueue.c:642 [inline] #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: process_one_work+0x88b/0x1750 kernel/workqueue.c:2235 #1: ffffc900002afdf0 (pcpu_balance_work){+.+.}, at: process_one_work+0x8c0/0x1750 kernel/workqueue.c:2239 #2: ffffffff8943f080 (pcpu_alloc_mutex){+.+.}, at: pcpu_balance_workfn+0xcc/0x13e0 mm/percpu.c:1845 #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: spin_lock include/linux/spinlock.h:338 [inline] #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 Preemption disabled at: [<ffffffff81a84199>] spin_lock include/linux/spinlock.h:338 [inline] [<ffffffff81a84199>] pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 CPU: 1 PID: 44 Comm: kworker/1:1 Not tainted 5.4.0-next-20191129+ #5 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-1 04/01/2014 Workqueue: events pcpu_balance_workfn Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x199/0x216 lib/dump_stack.c:118 ___might_sleep.cold.97+0x1f5/0x238 kernel/sched/core.c:6800 __might_sleep+0x95/0x190 kernel/sched/core.c:6753 prepare_alloc_pages mm/page_alloc.c:4681 [inline] __alloc_pages_nodemask+0x3cd/0x890 mm/page_alloc.c:4730 alloc_pages_current+0x10c/0x210 mm/mempolicy.c:2211 alloc_pages include/linux/gfp.h:532 [inline] __get_free_pages+0xc/0x40 mm/page_alloc.c:4786 kasan_populate_vmalloc_pte mm/kasan/common.c:762 [inline] kasan_populate_vmalloc_pte+0x2f/0x1b0 mm/kasan/common.c:753 apply_to_pte_range mm/memory.c:2041 [inline] apply_to_pmd_range mm/memory.c:2068 [inline] apply_to_pud_range mm/memory.c:2088 [inline] apply_to_p4d_range mm/memory.c:2108 [inline] apply_to_page_range+0x5ca/0xa00 mm/memory.c:2133 kasan_populate_vmalloc+0x69/0xa0 mm/kasan/common.c:791 pcpu_get_vm_areas+0x1596/0x3df0 mm/vmalloc.c:3439 pcpu_create_chunk+0x240/0x7f0 mm/percpu-vm.c:340 pcpu_balance_workfn+0x1033/0x13e0 mm/percpu.c:1934 process_one_work+0x9b5/0x1750 kernel/workqueue.c:2264 worker_thread+0x8b/0xd20 kernel/workqueue.c:2410 kthread+0x365/0x450 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Not sure if it's the same or not. Is it addressed by something in flight? My config: https://gist.githubusercontent.com/dvyukov/36c7be311fdec9cd51c649f7c3cb2ddb/raw/39c6f864fdd0ffc53f0822b14c354a73c1695fa1/gistfile1.txt
On Fri, Nov 29, 2019 at 11:43 AM Dmitry Vyukov <dvyukov@google.com> wrote: > > On Tue, Nov 19, 2019 at 10:54 AM Andrey Ryabinin > <aryabinin@virtuozzo.com> wrote: > > On 11/18/19 6:29 AM, Daniel Axtens wrote: > > > Qian Cai <cai@lca.pw> writes: > > > > > >> On Thu, 2019-10-31 at 20:39 +1100, Daniel Axtens wrote: > > >>> /* > > >>> * In this function, newly allocated vm_struct has VM_UNINITIALIZED > > >>> * flag. It means that vm_struct is not fully initialized. > > >>> @@ -3377,6 +3411,9 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, > > >>> > > >>> setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, > > >>> pcpu_get_vm_areas); > > >>> + > > >>> + /* assume success here */ > > >>> + kasan_populate_vmalloc(sizes[area], vms[area]); > > >>> } > > >>> spin_unlock(&vmap_area_lock); > > >> > > >> Here it is all wrong. GFP_KERNEL with in_atomic(). > > > > > > I think this fix will work, I will do a v12 with it included. > > > > You can send just the fix. Andrew will fold it into the original patch before sending it to Linus. > > > > > > > > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > > > index a4b950a02d0b..bf030516258c 100644 > > > --- a/mm/vmalloc.c > > > +++ b/mm/vmalloc.c > > > @@ -3417,11 +3417,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, > > > > > > setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, > > > pcpu_get_vm_areas); > > > + } > > > + spin_unlock(&vmap_area_lock); > > > > > > + /* populate the shadow space outside of the lock */ > > > + for (area = 0; area < nr_vms; area++) { > > > /* assume success here */ > > > kasan_populate_vmalloc(sizes[area], vms[area]); > > > } > > > - spin_unlock(&vmap_area_lock); > > > > > > kfree(vas); > > > return vms; > > Hi, > > I am testing this support on next-20191129 and seeing the following warnings: > > BUG: sleeping function called from invalid context at mm/page_alloc.c:4681 > in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 44, name: kworker/1:1 > 4 locks held by kworker/1:1/44: > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > __write_once_size include/linux/compiler.h:247 [inline] > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > arch_atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline] > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: atomic64_set > include/asm-generic/atomic-instrumented.h:868 [inline] > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > atomic_long_set include/asm-generic/atomic-long.h:40 [inline] > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: set_work_data > kernel/workqueue.c:615 [inline] > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > set_work_pool_and_clear_pending kernel/workqueue.c:642 [inline] > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > process_one_work+0x88b/0x1750 kernel/workqueue.c:2235 > #1: ffffc900002afdf0 (pcpu_balance_work){+.+.}, at: > process_one_work+0x8c0/0x1750 kernel/workqueue.c:2239 > #2: ffffffff8943f080 (pcpu_alloc_mutex){+.+.}, at: > pcpu_balance_workfn+0xcc/0x13e0 mm/percpu.c:1845 > #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: spin_lock > include/linux/spinlock.h:338 [inline] > #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: > pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 > Preemption disabled at: > [<ffffffff81a84199>] spin_lock include/linux/spinlock.h:338 [inline] > [<ffffffff81a84199>] pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 > CPU: 1 PID: 44 Comm: kworker/1:1 Not tainted 5.4.0-next-20191129+ #5 > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-1 04/01/2014 > Workqueue: events pcpu_balance_workfn > Call Trace: > __dump_stack lib/dump_stack.c:77 [inline] > dump_stack+0x199/0x216 lib/dump_stack.c:118 > ___might_sleep.cold.97+0x1f5/0x238 kernel/sched/core.c:6800 > __might_sleep+0x95/0x190 kernel/sched/core.c:6753 > prepare_alloc_pages mm/page_alloc.c:4681 [inline] > __alloc_pages_nodemask+0x3cd/0x890 mm/page_alloc.c:4730 > alloc_pages_current+0x10c/0x210 mm/mempolicy.c:2211 > alloc_pages include/linux/gfp.h:532 [inline] > __get_free_pages+0xc/0x40 mm/page_alloc.c:4786 > kasan_populate_vmalloc_pte mm/kasan/common.c:762 [inline] > kasan_populate_vmalloc_pte+0x2f/0x1b0 mm/kasan/common.c:753 > apply_to_pte_range mm/memory.c:2041 [inline] > apply_to_pmd_range mm/memory.c:2068 [inline] > apply_to_pud_range mm/memory.c:2088 [inline] > apply_to_p4d_range mm/memory.c:2108 [inline] > apply_to_page_range+0x5ca/0xa00 mm/memory.c:2133 > kasan_populate_vmalloc+0x69/0xa0 mm/kasan/common.c:791 > pcpu_get_vm_areas+0x1596/0x3df0 mm/vmalloc.c:3439 > pcpu_create_chunk+0x240/0x7f0 mm/percpu-vm.c:340 > pcpu_balance_workfn+0x1033/0x13e0 mm/percpu.c:1934 > process_one_work+0x9b5/0x1750 kernel/workqueue.c:2264 > worker_thread+0x8b/0xd20 kernel/workqueue.c:2410 > kthread+0x365/0x450 kernel/kthread.c:255 > ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 > > > Not sure if it's the same or not. Is it addressed by something in flight? > > My config: > https://gist.githubusercontent.com/dvyukov/36c7be311fdec9cd51c649f7c3cb2ddb/raw/39c6f864fdd0ffc53f0822b14c354a73c1695fa1/gistfile1.txt I've tried this fix for pcpu_get_vm_areas: https://groups.google.com/d/msg/kasan-dev/t_F2X1MWKwk/h152Z3q2AgAJ and it helps. But this will break syzbot on linux-next soon.
On Fri, Nov 29, 2019 at 11:58 AM Dmitry Vyukov <dvyukov@google.com> wrote: > > On Fri, Nov 29, 2019 at 11:43 AM Dmitry Vyukov <dvyukov@google.com> wrote: > > > > On Tue, Nov 19, 2019 at 10:54 AM Andrey Ryabinin > > <aryabinin@virtuozzo.com> wrote: > > > On 11/18/19 6:29 AM, Daniel Axtens wrote: > > > > Qian Cai <cai@lca.pw> writes: > > > > > > > >> On Thu, 2019-10-31 at 20:39 +1100, Daniel Axtens wrote: > > > >>> /* > > > >>> * In this function, newly allocated vm_struct has VM_UNINITIALIZED > > > >>> * flag. It means that vm_struct is not fully initialized. > > > >>> @@ -3377,6 +3411,9 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, > > > >>> > > > >>> setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, > > > >>> pcpu_get_vm_areas); > > > >>> + > > > >>> + /* assume success here */ > > > >>> + kasan_populate_vmalloc(sizes[area], vms[area]); > > > >>> } > > > >>> spin_unlock(&vmap_area_lock); > > > >> > > > >> Here it is all wrong. GFP_KERNEL with in_atomic(). > > > > > > > > I think this fix will work, I will do a v12 with it included. > > > > > > You can send just the fix. Andrew will fold it into the original patch before sending it to Linus. > > > > > > > > > > > > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > > > > index a4b950a02d0b..bf030516258c 100644 > > > > --- a/mm/vmalloc.c > > > > +++ b/mm/vmalloc.c > > > > @@ -3417,11 +3417,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, > > > > > > > > setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, > > > > pcpu_get_vm_areas); > > > > + } > > > > + spin_unlock(&vmap_area_lock); > > > > > > > > + /* populate the shadow space outside of the lock */ > > > > + for (area = 0; area < nr_vms; area++) { > > > > /* assume success here */ > > > > kasan_populate_vmalloc(sizes[area], vms[area]); > > > > } > > > > - spin_unlock(&vmap_area_lock); > > > > > > > > kfree(vas); > > > > return vms; > > > > Hi, > > > > I am testing this support on next-20191129 and seeing the following warnings: > > > > BUG: sleeping function called from invalid context at mm/page_alloc.c:4681 > > in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 44, name: kworker/1:1 > > 4 locks held by kworker/1:1/44: > > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > > __write_once_size include/linux/compiler.h:247 [inline] > > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > > arch_atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline] > > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: atomic64_set > > include/asm-generic/atomic-instrumented.h:868 [inline] > > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > > atomic_long_set include/asm-generic/atomic-long.h:40 [inline] > > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: set_work_data > > kernel/workqueue.c:615 [inline] > > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > > set_work_pool_and_clear_pending kernel/workqueue.c:642 [inline] > > #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > > process_one_work+0x88b/0x1750 kernel/workqueue.c:2235 > > #1: ffffc900002afdf0 (pcpu_balance_work){+.+.}, at: > > process_one_work+0x8c0/0x1750 kernel/workqueue.c:2239 > > #2: ffffffff8943f080 (pcpu_alloc_mutex){+.+.}, at: > > pcpu_balance_workfn+0xcc/0x13e0 mm/percpu.c:1845 > > #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: spin_lock > > include/linux/spinlock.h:338 [inline] > > #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: > > pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 > > Preemption disabled at: > > [<ffffffff81a84199>] spin_lock include/linux/spinlock.h:338 [inline] > > [<ffffffff81a84199>] pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 > > CPU: 1 PID: 44 Comm: kworker/1:1 Not tainted 5.4.0-next-20191129+ #5 > > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-1 04/01/2014 > > Workqueue: events pcpu_balance_workfn > > Call Trace: > > __dump_stack lib/dump_stack.c:77 [inline] > > dump_stack+0x199/0x216 lib/dump_stack.c:118 > > ___might_sleep.cold.97+0x1f5/0x238 kernel/sched/core.c:6800 > > __might_sleep+0x95/0x190 kernel/sched/core.c:6753 > > prepare_alloc_pages mm/page_alloc.c:4681 [inline] > > __alloc_pages_nodemask+0x3cd/0x890 mm/page_alloc.c:4730 > > alloc_pages_current+0x10c/0x210 mm/mempolicy.c:2211 > > alloc_pages include/linux/gfp.h:532 [inline] > > __get_free_pages+0xc/0x40 mm/page_alloc.c:4786 > > kasan_populate_vmalloc_pte mm/kasan/common.c:762 [inline] > > kasan_populate_vmalloc_pte+0x2f/0x1b0 mm/kasan/common.c:753 > > apply_to_pte_range mm/memory.c:2041 [inline] > > apply_to_pmd_range mm/memory.c:2068 [inline] > > apply_to_pud_range mm/memory.c:2088 [inline] > > apply_to_p4d_range mm/memory.c:2108 [inline] > > apply_to_page_range+0x5ca/0xa00 mm/memory.c:2133 > > kasan_populate_vmalloc+0x69/0xa0 mm/kasan/common.c:791 > > pcpu_get_vm_areas+0x1596/0x3df0 mm/vmalloc.c:3439 > > pcpu_create_chunk+0x240/0x7f0 mm/percpu-vm.c:340 > > pcpu_balance_workfn+0x1033/0x13e0 mm/percpu.c:1934 > > process_one_work+0x9b5/0x1750 kernel/workqueue.c:2264 > > worker_thread+0x8b/0xd20 kernel/workqueue.c:2410 > > kthread+0x365/0x450 kernel/kthread.c:255 > > ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 > > > > > > Not sure if it's the same or not. Is it addressed by something in flight? > > > > My config: > > https://gist.githubusercontent.com/dvyukov/36c7be311fdec9cd51c649f7c3cb2ddb/raw/39c6f864fdd0ffc53f0822b14c354a73c1695fa1/gistfile1.txt > > > I've tried this fix for pcpu_get_vm_areas: > https://groups.google.com/d/msg/kasan-dev/t_F2X1MWKwk/h152Z3q2AgAJ > and it helps. But this will break syzbot on linux-next soon. Can this be related as well? Crashes on accesses to shadow on the ion memory... BUG: unable to handle page fault for address: fffff52006000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 7ffcd067 P4D 7ffcd067 PUD 2cd10067 PMD 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 2 PID: 3472 Comm: ion_system_heap Not tainted 5.4.0-next-20191129+ #6 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 RIP: 0010:memory_is_nonzero mm/kasan/generic.c:121 [inline] RIP: 0010:memory_is_poisoned_n mm/kasan/generic.c:135 [inline] RIP: 0010:memory_is_poisoned mm/kasan/generic.c:166 [inline] RIP: 0010:check_memory_region_inline mm/kasan/generic.c:182 [inline] RIP: 0010:check_memory_region+0x83/0x1d0 mm/kasan/generic.c:192 Code: 83 fb 10 0f 8e a9 00 00 00 45 89 c8 41 83 e0 07 75 66 4c 8d 43 07 48 85 db 4c 0f 49 c3 49 c1 f8 03 45 85 c0 0f 84 3f 01 00 00 <48> 83 38 00 75 1c 41 83 e8 01 4e 8d 44 c0 08 48 83 c0 08 49 39 c0 RSP: 0018:ffffc900011c7b10 EFLAGS: 00010206 RAX: fffff52006000000 RBX: 0000000000004000 RCX: ffffffff85988df8 RDX: 0000000000000001 RSI: 0000000000020000 RDI: ffffc90030000000 RBP: ffffc900011c7b28 R08: 0000000000000800 R09: fffff52006000000 R10: fffff52006003fff R11: ffffc9003001ffff R12: fffff52006004000 R13: 0000000000000000 R14: dffffc0000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffff52006000000 CR3: 00000000680fb004 CR4: 0000000000760ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: memset+0x23/0x40 mm/kasan/common.c:107 memset include/linux/string.h:410 [inline] ion_heap_clear_pages+0x48/0x70 drivers/staging/android/ion/ion_heap.c:106 ion_heap_sglist_zero+0x1f9/0x260 drivers/staging/android/ion/ion_heap.c:123 ion_heap_buffer_zero+0xf8/0x150 drivers/staging/android/ion/ion_heap.c:145 ion_system_heap_free+0x227/0x290 drivers/staging/android/ion/ion_system_heap.c:163 ion_buffer_destroy+0x15a/0x2d0 drivers/staging/android/ion/ion.c:93 ion_heap_deferred_free+0x267/0x5e0 drivers/staging/android/ion/ion_heap.c:239 kthread+0x365/0x450 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Modules linked in: Dumping ftrace buffer: (ftrace buffer empty) CR2: fffff52006000000 ---[ end trace c101f19526ce3d42 ]--- RIP: 0010:memory_is_nonzero mm/kasan/generic.c:121 [inline] RIP: 0010:memory_is_poisoned_n mm/kasan/generic.c:135 [inline] RIP: 0010:memory_is_poisoned mm/kasan/generic.c:166 [inline] RIP: 0010:check_memory_region_inline mm/kasan/generic.c:182 [inline] RIP: 0010:check_memory_region+0x83/0x1d0 mm/kasan/generic.c:192 Code: 83 fb 10 0f 8e a9 00 00 00 45 89 c8 41 83 e0 07 75 66 4c 8d 43 07 48 85 db 4c 0f 49 c3 49 c1 f8 03 45 85 c0 0f 84 3f 01 00 00 <48> 83 38 00 75 1c 41 83 e8 01 4e 8d 44 c0 08 48 83 c0 08 49 39 c0 RSP: 0018:ffffc900011c7b10 EFLAGS: 00010206 RAX: fffff52006000000 RBX: 0000000000004000 RCX: ffffffff85988df8 RDX: 0000000000000001 RSI: 0000000000020000 RDI: ffffc90030000000 RBP: ffffc900011c7b28 R08: 0000000000000800 R09: fffff52006000000 R10: fffff52006003fff R11: ffffc9003001ffff R12: fffff52006004000 R13: 0000000000000000 R14: dffffc0000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffff52006000000 CR3: 00000000680fb004 CR4: 0000000000760ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554
On 11/29/19 2:02 PM, Dmitry Vyukov wrote: > On Fri, Nov 29, 2019 at 11:58 AM Dmitry Vyukov <dvyukov@google.com> wrote: >> >> On Fri, Nov 29, 2019 at 11:43 AM Dmitry Vyukov <dvyukov@google.com> wrote: >>> >>> On Tue, Nov 19, 2019 at 10:54 AM Andrey Ryabinin >>> <aryabinin@virtuozzo.com> wrote: >>>> On 11/18/19 6:29 AM, Daniel Axtens wrote: >>>>> Qian Cai <cai@lca.pw> writes: >>>>> >>>>>> On Thu, 2019-10-31 at 20:39 +1100, Daniel Axtens wrote: >>>>>>> /* >>>>>>> * In this function, newly allocated vm_struct has VM_UNINITIALIZED >>>>>>> * flag. It means that vm_struct is not fully initialized. >>>>>>> @@ -3377,6 +3411,9 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, >>>>>>> >>>>>>> setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, >>>>>>> pcpu_get_vm_areas); >>>>>>> + >>>>>>> + /* assume success here */ >>>>>>> + kasan_populate_vmalloc(sizes[area], vms[area]); >>>>>>> } >>>>>>> spin_unlock(&vmap_area_lock); >>>>>> >>>>>> Here it is all wrong. GFP_KERNEL with in_atomic(). >>>>> >>>>> I think this fix will work, I will do a v12 with it included. >>>> >>>> You can send just the fix. Andrew will fold it into the original patch before sending it to Linus. >>>> >>>> >>>> >>>>> diff --git a/mm/vmalloc.c b/mm/vmalloc.c >>>>> index a4b950a02d0b..bf030516258c 100644 >>>>> --- a/mm/vmalloc.c >>>>> +++ b/mm/vmalloc.c >>>>> @@ -3417,11 +3417,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, >>>>> >>>>> setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, >>>>> pcpu_get_vm_areas); >>>>> + } >>>>> + spin_unlock(&vmap_area_lock); >>>>> >>>>> + /* populate the shadow space outside of the lock */ >>>>> + for (area = 0; area < nr_vms; area++) { >>>>> /* assume success here */ >>>>> kasan_populate_vmalloc(sizes[area], vms[area]); >>>>> } >>>>> - spin_unlock(&vmap_area_lock); >>>>> >>>>> kfree(vas); >>>>> return vms; >>> >>> Hi, >>> >>> I am testing this support on next-20191129 and seeing the following warnings: >>> >>> BUG: sleeping function called from invalid context at mm/page_alloc.c:4681 >>> in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 44, name: kworker/1:1 >>> 4 locks held by kworker/1:1/44: >>> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: >>> __write_once_size include/linux/compiler.h:247 [inline] >>> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: >>> arch_atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline] >>> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: atomic64_set >>> include/asm-generic/atomic-instrumented.h:868 [inline] >>> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: >>> atomic_long_set include/asm-generic/atomic-long.h:40 [inline] >>> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: set_work_data >>> kernel/workqueue.c:615 [inline] >>> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: >>> set_work_pool_and_clear_pending kernel/workqueue.c:642 [inline] >>> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: >>> process_one_work+0x88b/0x1750 kernel/workqueue.c:2235 >>> #1: ffffc900002afdf0 (pcpu_balance_work){+.+.}, at: >>> process_one_work+0x8c0/0x1750 kernel/workqueue.c:2239 >>> #2: ffffffff8943f080 (pcpu_alloc_mutex){+.+.}, at: >>> pcpu_balance_workfn+0xcc/0x13e0 mm/percpu.c:1845 >>> #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: spin_lock >>> include/linux/spinlock.h:338 [inline] >>> #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: >>> pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 >>> Preemption disabled at: >>> [<ffffffff81a84199>] spin_lock include/linux/spinlock.h:338 [inline] >>> [<ffffffff81a84199>] pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 >>> CPU: 1 PID: 44 Comm: kworker/1:1 Not tainted 5.4.0-next-20191129+ #5 >>> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-1 04/01/2014 >>> Workqueue: events pcpu_balance_workfn >>> Call Trace: >>> __dump_stack lib/dump_stack.c:77 [inline] >>> dump_stack+0x199/0x216 lib/dump_stack.c:118 >>> ___might_sleep.cold.97+0x1f5/0x238 kernel/sched/core.c:6800 >>> __might_sleep+0x95/0x190 kernel/sched/core.c:6753 >>> prepare_alloc_pages mm/page_alloc.c:4681 [inline] >>> __alloc_pages_nodemask+0x3cd/0x890 mm/page_alloc.c:4730 >>> alloc_pages_current+0x10c/0x210 mm/mempolicy.c:2211 >>> alloc_pages include/linux/gfp.h:532 [inline] >>> __get_free_pages+0xc/0x40 mm/page_alloc.c:4786 >>> kasan_populate_vmalloc_pte mm/kasan/common.c:762 [inline] >>> kasan_populate_vmalloc_pte+0x2f/0x1b0 mm/kasan/common.c:753 >>> apply_to_pte_range mm/memory.c:2041 [inline] >>> apply_to_pmd_range mm/memory.c:2068 [inline] >>> apply_to_pud_range mm/memory.c:2088 [inline] >>> apply_to_p4d_range mm/memory.c:2108 [inline] >>> apply_to_page_range+0x5ca/0xa00 mm/memory.c:2133 >>> kasan_populate_vmalloc+0x69/0xa0 mm/kasan/common.c:791 >>> pcpu_get_vm_areas+0x1596/0x3df0 mm/vmalloc.c:3439 >>> pcpu_create_chunk+0x240/0x7f0 mm/percpu-vm.c:340 >>> pcpu_balance_workfn+0x1033/0x13e0 mm/percpu.c:1934 >>> process_one_work+0x9b5/0x1750 kernel/workqueue.c:2264 >>> worker_thread+0x8b/0xd20 kernel/workqueue.c:2410 >>> kthread+0x365/0x450 kernel/kthread.c:255 >>> ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 >>> >>> >>> Not sure if it's the same or not. Is it addressed by something in flight? >>> >>> My config: >>> https://gist.githubusercontent.com/dvyukov/36c7be311fdec9cd51c649f7c3cb2ddb/raw/39c6f864fdd0ffc53f0822b14c354a73c1695fa1/gistfile1.txt >> >> >> I've tried this fix for pcpu_get_vm_areas: >> https://groups.google.com/d/msg/kasan-dev/t_F2X1MWKwk/h152Z3q2AgAJ >> and it helps. But this will break syzbot on linux-next soon. > > > Can this be related as well? > Crashes on accesses to shadow on the ion memory... Nope, it's vm_map_ram() not being handled
On Fri, Nov 29, 2019 at 12:38 PM Andrey Ryabinin <aryabinin@virtuozzo.com> wrote: > >>> > >>> > >>> Not sure if it's the same or not. Is it addressed by something in flight? > >>> > >>> My config: > >>> https://gist.githubusercontent.com/dvyukov/36c7be311fdec9cd51c649f7c3cb2ddb/raw/39c6f864fdd0ffc53f0822b14c354a73c1695fa1/gistfile1.txt > >> > >> > >> I've tried this fix for pcpu_get_vm_areas: > >> https://groups.google.com/d/msg/kasan-dev/t_F2X1MWKwk/h152Z3q2AgAJ > >> and it helps. But this will break syzbot on linux-next soon. > > > > > > Can this be related as well? > > Crashes on accesses to shadow on the ion memory... > > Nope, it's vm_map_ram() not being handled Another suspicious one. Related to kasan/vmalloc? BUG: unable to handle page fault for address: fffff52005b80000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 7ffcd067 P4D 7ffcd067 PUD 2cd10067 PMD 66d76067 PTE 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 2 PID: 9211 Comm: syz-executor.2 Not tainted 5.4.0-next-20191129+ #6 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: xfs_buf_ioend+0x228/0xdc0 fs/xfs/xfs_buf.c:1162 __xfs_buf_submit+0x38b/0xe50 fs/xfs/xfs_buf.c:1485 xfs_buf_submit fs/xfs/xfs_buf.h:268 [inline] xfs_buf_read_uncached+0x15c/0x560 fs/xfs/xfs_buf.c:897 xfs_readsb+0x2d0/0x540 fs/xfs/xfs_mount.c:298 xfs_fc_fill_super+0x3e6/0x11f0 fs/xfs/xfs_super.c:1415 get_tree_bdev+0x444/0x620 fs/super.c:1340 xfs_fc_get_tree+0x1c/0x20 fs/xfs/xfs_super.c:1550 vfs_get_tree+0x8e/0x300 fs/super.c:1545 do_new_mount fs/namespace.c:2822 [inline] do_mount+0x152d/0x1b50 fs/namespace.c:3142 ksys_mount+0x114/0x130 fs/namespace.c:3351 __do_sys_mount fs/namespace.c:3365 [inline] __se_sys_mount fs/namespace.c:3362 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3362 do_syscall_64+0xfa/0x780 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x46736a Code: 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fb49bda8a78 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 00007fb49bda8af0 RCX: 000000000046736a RDX: 00007fb49bda8ad0 RSI: 0000000020000140 RDI: 00007fb49bda8af0 RBP: 00007fb49bda8ad0 R08: 00007fb49bda8b30 R09: 00007fb49bda8ad0 R10: 0000000000000000 R11: 0000000000000202 R12: 00007fb49bda8b30 R13: 00000000004b1c60 R14: 00000000004b006d R15: 00007fb49bda96bc Modules linked in: Dumping ftrace buffer: (ftrace buffer empty) CR2: fffff52005b80000 ---[ end trace eddd8949d4c898df ]--- RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554
On 11/29/19 2:47 PM, Dmitry Vyukov wrote: > On Fri, Nov 29, 2019 at 12:38 PM Andrey Ryabinin > <aryabinin@virtuozzo.com> wrote: >>>>> >>>>> >>>>> Not sure if it's the same or not. Is it addressed by something in flight? >>>>> >>>>> My config: >>>>> https://gist.githubusercontent.com/dvyukov/36c7be311fdec9cd51c649f7c3cb2ddb/raw/39c6f864fdd0ffc53f0822b14c354a73c1695fa1/gistfile1.txt >>>> >>>> >>>> I've tried this fix for pcpu_get_vm_areas: >>>> https://groups.google.com/d/msg/kasan-dev/t_F2X1MWKwk/h152Z3q2AgAJ >>>> and it helps. But this will break syzbot on linux-next soon. >>> >>> >>> Can this be related as well? >>> Crashes on accesses to shadow on the ion memory... >> >> Nope, it's vm_map_ram() not being handled > > > Another suspicious one. Related to kasan/vmalloc? Very likely the same as with ion: # git grep vm_map_ram|grep xfs fs/xfs/xfs_buf.c: * vm_map_ram() will allocate auxiliary structures (e.g. fs/xfs/xfs_buf.c: bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, > > BUG: unable to handle page fault for address: fffff52005b80000 > #PF: supervisor read access in kernel mode > #PF: error_code(0x0000) - not-present page > PGD 7ffcd067 P4D 7ffcd067 PUD 2cd10067 PMD 66d76067 PTE 0 > Oops: 0000 [#1] PREEMPT SMP KASAN > CPU: 2 PID: 9211 Comm: syz-executor.2 Not tainted 5.4.0-next-20191129+ #6 > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS > rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 > RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 > Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 > 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 > 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 > RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 > RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 > RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 > RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 > R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 > R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 > FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > PKRU: 55555554 > Call Trace: > xfs_buf_ioend+0x228/0xdc0 fs/xfs/xfs_buf.c:1162 > __xfs_buf_submit+0x38b/0xe50 fs/xfs/xfs_buf.c:1485 > xfs_buf_submit fs/xfs/xfs_buf.h:268 [inline] > xfs_buf_read_uncached+0x15c/0x560 fs/xfs/xfs_buf.c:897 > xfs_readsb+0x2d0/0x540 fs/xfs/xfs_mount.c:298 > xfs_fc_fill_super+0x3e6/0x11f0 fs/xfs/xfs_super.c:1415 > get_tree_bdev+0x444/0x620 fs/super.c:1340 > xfs_fc_get_tree+0x1c/0x20 fs/xfs/xfs_super.c:1550 > vfs_get_tree+0x8e/0x300 fs/super.c:1545 > do_new_mount fs/namespace.c:2822 [inline] > do_mount+0x152d/0x1b50 fs/namespace.c:3142 > ksys_mount+0x114/0x130 fs/namespace.c:3351 > __do_sys_mount fs/namespace.c:3365 [inline] > __se_sys_mount fs/namespace.c:3362 [inline] > __x64_sys_mount+0xbe/0x150 fs/namespace.c:3362 > do_syscall_64+0xfa/0x780 arch/x86/entry/common.c:294 > entry_SYSCALL_64_after_hwframe+0x49/0xbe > RIP: 0033:0x46736a > Code: 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f > 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d > 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 > RSP: 002b:00007fb49bda8a78 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 > RAX: ffffffffffffffda RBX: 00007fb49bda8af0 RCX: 000000000046736a > RDX: 00007fb49bda8ad0 RSI: 0000000020000140 RDI: 00007fb49bda8af0 > RBP: 00007fb49bda8ad0 R08: 00007fb49bda8b30 R09: 00007fb49bda8ad0 > R10: 0000000000000000 R11: 0000000000000202 R12: 00007fb49bda8b30 > R13: 00000000004b1c60 R14: 00000000004b006d R15: 00007fb49bda96bc > Modules linked in: > Dumping ftrace buffer: > (ftrace buffer empty) > CR2: fffff52005b80000 > ---[ end trace eddd8949d4c898df ]--- > RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 > Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 > 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 > 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 > RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 > RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 > RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 > RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 > R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 > R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 > FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > PKRU: 55555554 >
Hi Dmitry, >> I am testing this support on next-20191129 and seeing the following warnings: >> >> BUG: sleeping function called from invalid context at mm/page_alloc.c:4681 >> in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 44, name: kworker/1:1 >> 4 locks held by kworker/1:1/44: >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: >> __write_once_size include/linux/compiler.h:247 [inline] >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: >> arch_atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline] >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: atomic64_set >> include/asm-generic/atomic-instrumented.h:868 [inline] >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: >> atomic_long_set include/asm-generic/atomic-long.h:40 [inline] >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: set_work_data >> kernel/workqueue.c:615 [inline] >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: >> set_work_pool_and_clear_pending kernel/workqueue.c:642 [inline] >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: >> process_one_work+0x88b/0x1750 kernel/workqueue.c:2235 >> #1: ffffc900002afdf0 (pcpu_balance_work){+.+.}, at: >> process_one_work+0x8c0/0x1750 kernel/workqueue.c:2239 >> #2: ffffffff8943f080 (pcpu_alloc_mutex){+.+.}, at: >> pcpu_balance_workfn+0xcc/0x13e0 mm/percpu.c:1845 >> #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: spin_lock >> include/linux/spinlock.h:338 [inline] >> #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: >> pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 >> Preemption disabled at: >> [<ffffffff81a84199>] spin_lock include/linux/spinlock.h:338 [inline] >> [<ffffffff81a84199>] pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 >> CPU: 1 PID: 44 Comm: kworker/1:1 Not tainted 5.4.0-next-20191129+ #5 >> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-1 04/01/2014 >> Workqueue: events pcpu_balance_workfn >> Call Trace: >> __dump_stack lib/dump_stack.c:77 [inline] >> dump_stack+0x199/0x216 lib/dump_stack.c:118 >> ___might_sleep.cold.97+0x1f5/0x238 kernel/sched/core.c:6800 >> __might_sleep+0x95/0x190 kernel/sched/core.c:6753 >> prepare_alloc_pages mm/page_alloc.c:4681 [inline] >> __alloc_pages_nodemask+0x3cd/0x890 mm/page_alloc.c:4730 >> alloc_pages_current+0x10c/0x210 mm/mempolicy.c:2211 >> alloc_pages include/linux/gfp.h:532 [inline] >> __get_free_pages+0xc/0x40 mm/page_alloc.c:4786 >> kasan_populate_vmalloc_pte mm/kasan/common.c:762 [inline] >> kasan_populate_vmalloc_pte+0x2f/0x1b0 mm/kasan/common.c:753 >> apply_to_pte_range mm/memory.c:2041 [inline] >> apply_to_pmd_range mm/memory.c:2068 [inline] >> apply_to_pud_range mm/memory.c:2088 [inline] >> apply_to_p4d_range mm/memory.c:2108 [inline] >> apply_to_page_range+0x5ca/0xa00 mm/memory.c:2133 >> kasan_populate_vmalloc+0x69/0xa0 mm/kasan/common.c:791 >> pcpu_get_vm_areas+0x1596/0x3df0 mm/vmalloc.c:3439 >> pcpu_create_chunk+0x240/0x7f0 mm/percpu-vm.c:340 >> pcpu_balance_workfn+0x1033/0x13e0 mm/percpu.c:1934 >> process_one_work+0x9b5/0x1750 kernel/workqueue.c:2264 >> worker_thread+0x8b/0xd20 kernel/workqueue.c:2410 >> kthread+0x365/0x450 kernel/kthread.c:255 >> ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 >> >> >> Not sure if it's the same or not. Is it addressed by something in flight? It looks like this one is the same. There is a patch to fix it: https://lore.kernel.org/linux-mm/20191120052719.7201-1-dja@axtens.net/ Andrew said he had picked it up on the 22nd: https://marc.info/?l=linux-mm-commits&m=157438241512561&w=2 It's landed in mmots but not mmotm, so hopefully that will happen and then it will land in -next very soon! I will look into your other bug report shortly. Regards, Daniel >> >> My config: >> https://gist.githubusercontent.com/dvyukov/36c7be311fdec9cd51c649f7c3cb2ddb/raw/39c6f864fdd0ffc53f0822b14c354a73c1695fa1/gistfile1.txt > > > I've tried this fix for pcpu_get_vm_areas: > https://groups.google.com/d/msg/kasan-dev/t_F2X1MWKwk/h152Z3q2AgAJ > and it helps. But this will break syzbot on linux-next soon.
On Fri, Nov 29, 2019 at 1:09 PM Daniel Axtens <dja@axtens.net> wrote: > > Hi Dmitry, > > >> I am testing this support on next-20191129 and seeing the following warnings: > >> > >> BUG: sleeping function called from invalid context at mm/page_alloc.c:4681 > >> in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 44, name: kworker/1:1 > >> 4 locks held by kworker/1:1/44: > >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > >> __write_once_size include/linux/compiler.h:247 [inline] > >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > >> arch_atomic64_set arch/x86/include/asm/atomic64_64.h:34 [inline] > >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: atomic64_set > >> include/asm-generic/atomic-instrumented.h:868 [inline] > >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > >> atomic_long_set include/asm-generic/atomic-long.h:40 [inline] > >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: set_work_data > >> kernel/workqueue.c:615 [inline] > >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > >> set_work_pool_and_clear_pending kernel/workqueue.c:642 [inline] > >> #0: ffff888067c26d28 ((wq_completion)events){+.+.}, at: > >> process_one_work+0x88b/0x1750 kernel/workqueue.c:2235 > >> #1: ffffc900002afdf0 (pcpu_balance_work){+.+.}, at: > >> process_one_work+0x8c0/0x1750 kernel/workqueue.c:2239 > >> #2: ffffffff8943f080 (pcpu_alloc_mutex){+.+.}, at: > >> pcpu_balance_workfn+0xcc/0x13e0 mm/percpu.c:1845 > >> #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: spin_lock > >> include/linux/spinlock.h:338 [inline] > >> #3: ffffffff89450c78 (vmap_area_lock){+.+.}, at: > >> pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 > >> Preemption disabled at: > >> [<ffffffff81a84199>] spin_lock include/linux/spinlock.h:338 [inline] > >> [<ffffffff81a84199>] pcpu_get_vm_areas+0x1449/0x3df0 mm/vmalloc.c:3431 > >> CPU: 1 PID: 44 Comm: kworker/1:1 Not tainted 5.4.0-next-20191129+ #5 > >> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-1 04/01/2014 > >> Workqueue: events pcpu_balance_workfn > >> Call Trace: > >> __dump_stack lib/dump_stack.c:77 [inline] > >> dump_stack+0x199/0x216 lib/dump_stack.c:118 > >> ___might_sleep.cold.97+0x1f5/0x238 kernel/sched/core.c:6800 > >> __might_sleep+0x95/0x190 kernel/sched/core.c:6753 > >> prepare_alloc_pages mm/page_alloc.c:4681 [inline] > >> __alloc_pages_nodemask+0x3cd/0x890 mm/page_alloc.c:4730 > >> alloc_pages_current+0x10c/0x210 mm/mempolicy.c:2211 > >> alloc_pages include/linux/gfp.h:532 [inline] > >> __get_free_pages+0xc/0x40 mm/page_alloc.c:4786 > >> kasan_populate_vmalloc_pte mm/kasan/common.c:762 [inline] > >> kasan_populate_vmalloc_pte+0x2f/0x1b0 mm/kasan/common.c:753 > >> apply_to_pte_range mm/memory.c:2041 [inline] > >> apply_to_pmd_range mm/memory.c:2068 [inline] > >> apply_to_pud_range mm/memory.c:2088 [inline] > >> apply_to_p4d_range mm/memory.c:2108 [inline] > >> apply_to_page_range+0x5ca/0xa00 mm/memory.c:2133 > >> kasan_populate_vmalloc+0x69/0xa0 mm/kasan/common.c:791 > >> pcpu_get_vm_areas+0x1596/0x3df0 mm/vmalloc.c:3439 > >> pcpu_create_chunk+0x240/0x7f0 mm/percpu-vm.c:340 > >> pcpu_balance_workfn+0x1033/0x13e0 mm/percpu.c:1934 > >> process_one_work+0x9b5/0x1750 kernel/workqueue.c:2264 > >> worker_thread+0x8b/0xd20 kernel/workqueue.c:2410 > >> kthread+0x365/0x450 kernel/kthread.c:255 > >> ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 > >> > >> > >> Not sure if it's the same or not. Is it addressed by something in flight? > > It looks like this one is the same. > > There is a patch to fix it: > https://lore.kernel.org/linux-mm/20191120052719.7201-1-dja@axtens.net/ > > Andrew said he had picked it up on the 22nd: > https://marc.info/?l=linux-mm-commits&m=157438241512561&w=2 > It's landed in mmots but not mmotm, so hopefully that will happen and > then it will land in -next very soon! > > I will look into your other bug report shortly. Thanks for the quick responses, Andrey, Daniel. > Regards, > Daniel > > >> > >> My config: > >> https://gist.githubusercontent.com/dvyukov/36c7be311fdec9cd51c649f7c3cb2ddb/raw/39c6f864fdd0ffc53f0822b14c354a73c1695fa1/gistfile1.txt > > > > > > I've tried this fix for pcpu_get_vm_areas: > > https://groups.google.com/d/msg/kasan-dev/t_F2X1MWKwk/h152Z3q2AgAJ > > and it helps. But this will break syzbot on linux-next soon.
>>> Nope, it's vm_map_ram() not being handled >> >> >> Another suspicious one. Related to kasan/vmalloc? > > Very likely the same as with ion: > > # git grep vm_map_ram|grep xfs > fs/xfs/xfs_buf.c: * vm_map_ram() will allocate auxiliary structures (e.g. > fs/xfs/xfs_buf.c: bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, Aaargh, that's an embarassing miss. It's a bit intricate because kasan_vmalloc_populate function is currently set up to take a vm_struct not a vmap_area, but I'll see if I can get something simple out this evening - I'm away for the first part of next week. Do you have to do anything interesting to get it to explode with xfs? Is it as simple as mounting a drive and doing some I/O? Or do you need to do something more involved? Regards, Daniel > >> >> BUG: unable to handle page fault for address: fffff52005b80000 >> #PF: supervisor read access in kernel mode >> #PF: error_code(0x0000) - not-present page >> PGD 7ffcd067 P4D 7ffcd067 PUD 2cd10067 PMD 66d76067 PTE 0 >> Oops: 0000 [#1] PREEMPT SMP KASAN >> CPU: 2 PID: 9211 Comm: syz-executor.2 Not tainted 5.4.0-next-20191129+ #6 >> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS >> rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 >> RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 >> Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 >> 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 >> 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 >> RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 >> RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 >> RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 >> RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 >> R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 >> R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 >> FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 >> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >> CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 >> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 >> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 >> PKRU: 55555554 >> Call Trace: >> xfs_buf_ioend+0x228/0xdc0 fs/xfs/xfs_buf.c:1162 >> __xfs_buf_submit+0x38b/0xe50 fs/xfs/xfs_buf.c:1485 >> xfs_buf_submit fs/xfs/xfs_buf.h:268 [inline] >> xfs_buf_read_uncached+0x15c/0x560 fs/xfs/xfs_buf.c:897 >> xfs_readsb+0x2d0/0x540 fs/xfs/xfs_mount.c:298 >> xfs_fc_fill_super+0x3e6/0x11f0 fs/xfs/xfs_super.c:1415 >> get_tree_bdev+0x444/0x620 fs/super.c:1340 >> xfs_fc_get_tree+0x1c/0x20 fs/xfs/xfs_super.c:1550 >> vfs_get_tree+0x8e/0x300 fs/super.c:1545 >> do_new_mount fs/namespace.c:2822 [inline] >> do_mount+0x152d/0x1b50 fs/namespace.c:3142 >> ksys_mount+0x114/0x130 fs/namespace.c:3351 >> __do_sys_mount fs/namespace.c:3365 [inline] >> __se_sys_mount fs/namespace.c:3362 [inline] >> __x64_sys_mount+0xbe/0x150 fs/namespace.c:3362 >> do_syscall_64+0xfa/0x780 arch/x86/entry/common.c:294 >> entry_SYSCALL_64_after_hwframe+0x49/0xbe >> RIP: 0033:0x46736a >> Code: 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f >> 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d >> 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 >> RSP: 002b:00007fb49bda8a78 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 >> RAX: ffffffffffffffda RBX: 00007fb49bda8af0 RCX: 000000000046736a >> RDX: 00007fb49bda8ad0 RSI: 0000000020000140 RDI: 00007fb49bda8af0 >> RBP: 00007fb49bda8ad0 R08: 00007fb49bda8b30 R09: 00007fb49bda8ad0 >> R10: 0000000000000000 R11: 0000000000000202 R12: 00007fb49bda8b30 >> R13: 00000000004b1c60 R14: 00000000004b006d R15: 00007fb49bda96bc >> Modules linked in: >> Dumping ftrace buffer: >> (ftrace buffer empty) >> CR2: fffff52005b80000 >> ---[ end trace eddd8949d4c898df ]--- >> RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 >> Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 >> 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 >> 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 >> RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 >> RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 >> RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 >> RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 >> R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 >> R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 >> FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 >> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >> CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 >> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 >> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 >> PKRU: 55555554 >> > > -- > You received this message because you are subscribed to the Google Groups "kasan-dev" group. > To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com. > To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/56cf8aab-c61b-156c-f681-d2354aed22bb%40virtuozzo.com.
On Fri, Nov 29, 2019 at 1:29 PM Daniel Axtens <dja@axtens.net> wrote: > >>> Nope, it's vm_map_ram() not being handled > >> Another suspicious one. Related to kasan/vmalloc? > > Very likely the same as with ion: > > > > # git grep vm_map_ram|grep xfs > > fs/xfs/xfs_buf.c: * vm_map_ram() will allocate auxiliary structures (e.g. > > fs/xfs/xfs_buf.c: bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, > > Aaargh, that's an embarassing miss. > > It's a bit intricate because kasan_vmalloc_populate function is > currently set up to take a vm_struct not a vmap_area, but I'll see if I > can get something simple out this evening - I'm away for the first part > of next week. > > Do you have to do anything interesting to get it to explode with xfs? Is > it as simple as mounting a drive and doing some I/O? Or do you need to > do something more involved? As simple as running syzkaller :) with this config https://github.com/google/syzkaller/blob/master/dashboard/config/upstream-kasan.config > Regards, > Daniel > > > > >> > >> BUG: unable to handle page fault for address: fffff52005b80000 > >> #PF: supervisor read access in kernel mode > >> #PF: error_code(0x0000) - not-present page > >> PGD 7ffcd067 P4D 7ffcd067 PUD 2cd10067 PMD 66d76067 PTE 0 > >> Oops: 0000 [#1] PREEMPT SMP KASAN > >> CPU: 2 PID: 9211 Comm: syz-executor.2 Not tainted 5.4.0-next-20191129+ #6 > >> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS > >> rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 > >> RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 > >> Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 > >> 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 > >> 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 > >> RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 > >> RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 > >> RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 > >> RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 > >> R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 > >> R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 > >> FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 > >> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > >> CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 > >> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > >> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > >> PKRU: 55555554 > >> Call Trace: > >> xfs_buf_ioend+0x228/0xdc0 fs/xfs/xfs_buf.c:1162 > >> __xfs_buf_submit+0x38b/0xe50 fs/xfs/xfs_buf.c:1485 > >> xfs_buf_submit fs/xfs/xfs_buf.h:268 [inline] > >> xfs_buf_read_uncached+0x15c/0x560 fs/xfs/xfs_buf.c:897 > >> xfs_readsb+0x2d0/0x540 fs/xfs/xfs_mount.c:298 > >> xfs_fc_fill_super+0x3e6/0x11f0 fs/xfs/xfs_super.c:1415 > >> get_tree_bdev+0x444/0x620 fs/super.c:1340 > >> xfs_fc_get_tree+0x1c/0x20 fs/xfs/xfs_super.c:1550 > >> vfs_get_tree+0x8e/0x300 fs/super.c:1545 > >> do_new_mount fs/namespace.c:2822 [inline] > >> do_mount+0x152d/0x1b50 fs/namespace.c:3142 > >> ksys_mount+0x114/0x130 fs/namespace.c:3351 > >> __do_sys_mount fs/namespace.c:3365 [inline] > >> __se_sys_mount fs/namespace.c:3362 [inline] > >> __x64_sys_mount+0xbe/0x150 fs/namespace.c:3362 > >> do_syscall_64+0xfa/0x780 arch/x86/entry/common.c:294 > >> entry_SYSCALL_64_after_hwframe+0x49/0xbe > >> RIP: 0033:0x46736a > >> Code: 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f > >> 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d > >> 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 > >> RSP: 002b:00007fb49bda8a78 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 > >> RAX: ffffffffffffffda RBX: 00007fb49bda8af0 RCX: 000000000046736a > >> RDX: 00007fb49bda8ad0 RSI: 0000000020000140 RDI: 00007fb49bda8af0 > >> RBP: 00007fb49bda8ad0 R08: 00007fb49bda8b30 R09: 00007fb49bda8ad0 > >> R10: 0000000000000000 R11: 0000000000000202 R12: 00007fb49bda8b30 > >> R13: 00000000004b1c60 R14: 00000000004b006d R15: 00007fb49bda96bc > >> Modules linked in: > >> Dumping ftrace buffer: > >> (ftrace buffer empty) > >> CR2: fffff52005b80000 > >> ---[ end trace eddd8949d4c898df ]--- > >> RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 > >> Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 > >> 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 > >> 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 > >> RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 > >> RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 > >> RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 > >> RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 > >> R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 > >> R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 > >> FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 > >> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > >> CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 > >> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > >> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > >> PKRU: 55555554 > >> > > > > -- > > You received this message because you are subscribed to the Google Groups "kasan-dev" group. > > To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com. > > To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/56cf8aab-c61b-156c-f681-d2354aed22bb%40virtuozzo.com. > > -- > You received this message because you are subscribed to the Google Groups "kasan-dev" group. > To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com. > To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/871rtqg91q.fsf%40dja-thinkpad.axtens.net.
On Fri, Nov 29, 2019 at 1:45 PM Dmitry Vyukov <dvyukov@google.com> wrote: > > On Fri, Nov 29, 2019 at 1:29 PM Daniel Axtens <dja@axtens.net> wrote: > > >>> Nope, it's vm_map_ram() not being handled > > >> Another suspicious one. Related to kasan/vmalloc? > > > Very likely the same as with ion: > > > > > > # git grep vm_map_ram|grep xfs > > > fs/xfs/xfs_buf.c: * vm_map_ram() will allocate auxiliary structures (e.g. > > > fs/xfs/xfs_buf.c: bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, > > > > Aaargh, that's an embarassing miss. > > > > It's a bit intricate because kasan_vmalloc_populate function is > > currently set up to take a vm_struct not a vmap_area, but I'll see if I > > can get something simple out this evening - I'm away for the first part > > of next week. > > > > Do you have to do anything interesting to get it to explode with xfs? Is > > it as simple as mounting a drive and doing some I/O? Or do you need to > > do something more involved? > > As simple as running syzkaller :) > with this config > https://github.com/google/syzkaller/blob/master/dashboard/config/upstream-kasan.config > > > Regards, > > Daniel > > > > > > > >> > > >> BUG: unable to handle page fault for address: fffff52005b80000 > > >> #PF: supervisor read access in kernel mode > > >> #PF: error_code(0x0000) - not-present page > > >> PGD 7ffcd067 P4D 7ffcd067 PUD 2cd10067 PMD 66d76067 PTE 0 > > >> Oops: 0000 [#1] PREEMPT SMP KASAN > > >> CPU: 2 PID: 9211 Comm: syz-executor.2 Not tainted 5.4.0-next-20191129+ #6 > > >> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS > > >> rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 > > >> RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 > > >> Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 > > >> 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 > > >> 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 > > >> RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 > > >> RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 > > >> RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 > > >> RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 > > >> R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 > > >> R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 > > >> FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 > > >> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > >> CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 > > >> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > > >> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > > >> PKRU: 55555554 > > >> Call Trace: > > >> xfs_buf_ioend+0x228/0xdc0 fs/xfs/xfs_buf.c:1162 > > >> __xfs_buf_submit+0x38b/0xe50 fs/xfs/xfs_buf.c:1485 > > >> xfs_buf_submit fs/xfs/xfs_buf.h:268 [inline] > > >> xfs_buf_read_uncached+0x15c/0x560 fs/xfs/xfs_buf.c:897 > > >> xfs_readsb+0x2d0/0x540 fs/xfs/xfs_mount.c:298 > > >> xfs_fc_fill_super+0x3e6/0x11f0 fs/xfs/xfs_super.c:1415 > > >> get_tree_bdev+0x444/0x620 fs/super.c:1340 > > >> xfs_fc_get_tree+0x1c/0x20 fs/xfs/xfs_super.c:1550 > > >> vfs_get_tree+0x8e/0x300 fs/super.c:1545 > > >> do_new_mount fs/namespace.c:2822 [inline] > > >> do_mount+0x152d/0x1b50 fs/namespace.c:3142 > > >> ksys_mount+0x114/0x130 fs/namespace.c:3351 > > >> __do_sys_mount fs/namespace.c:3365 [inline] > > >> __se_sys_mount fs/namespace.c:3362 [inline] > > >> __x64_sys_mount+0xbe/0x150 fs/namespace.c:3362 > > >> do_syscall_64+0xfa/0x780 arch/x86/entry/common.c:294 > > >> entry_SYSCALL_64_after_hwframe+0x49/0xbe > > >> RIP: 0033:0x46736a > > >> Code: 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f > > >> 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d > > >> 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 > > >> RSP: 002b:00007fb49bda8a78 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 > > >> RAX: ffffffffffffffda RBX: 00007fb49bda8af0 RCX: 000000000046736a > > >> RDX: 00007fb49bda8ad0 RSI: 0000000020000140 RDI: 00007fb49bda8af0 > > >> RBP: 00007fb49bda8ad0 R08: 00007fb49bda8b30 R09: 00007fb49bda8ad0 > > >> R10: 0000000000000000 R11: 0000000000000202 R12: 00007fb49bda8b30 > > >> R13: 00000000004b1c60 R14: 00000000004b006d R15: 00007fb49bda96bc > > >> Modules linked in: > > >> Dumping ftrace buffer: > > >> (ftrace buffer empty) > > >> CR2: fffff52005b80000 > > >> ---[ end trace eddd8949d4c898df ]--- > > >> RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 > > >> Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 > > >> 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 > > >> 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 > > >> RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 > > >> RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 > > >> RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 > > >> RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 > > >> R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 > > >> R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 > > >> FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 > > >> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > >> CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 > > >> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > > >> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 > > >> PKRU: 55555554 Another one that looks related: BUG: sleeping function called from invalid context at mm/page_alloc.c:4681 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 15087, name: syz-executor.7 3 locks held by syz-executor.7/15087: #0: ffff888024542110 (sk_lock-AF_PACKET){+.+.}, at: lock_sock include/net/sock.h:1526 [inline] #0: ffff888024542110 (sk_lock-AF_PACKET){+.+.}, at: packet_setsockopt+0xdf1/0x2d90 net/packet/af_packet.c:3678 #1: ffffffff89850a80 (vmap_purge_lock){+.+.}, at: try_purge_vmap_area_lazy mm/vmalloc.c:1331 [inline] #1: ffffffff89850a80 (vmap_purge_lock){+.+.}, at: free_vmap_area_noflush+0x2a8/0x390 mm/vmalloc.c:1368 #2: ffffffff89850c18 (free_vmap_area_lock){+.+.}, at: spin_lock include/linux/spinlock.h:338 [inline] #2: ffffffff89850c18 (free_vmap_area_lock){+.+.}, at: __purge_vmap_area_lazy+0x19c/0x1f30 mm/vmalloc.c:1298 Preemption disabled at: [<ffffffff81a78ddc>] spin_lock include/linux/spinlock.h:338 [inline] [<ffffffff81a78ddc>] __purge_vmap_area_lazy+0x19c/0x1f30 mm/vmalloc.c:1298 CPU: 3 PID: 15087 Comm: syz-executor.7 Not tainted 5.4.0-next-20191129+ #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x199/0x216 lib/dump_stack.c:118 ___might_sleep.cold.97+0x1f5/0x238 kernel/sched/core.c:6800 __might_sleep+0x95/0x190 kernel/sched/core.c:6753 prepare_alloc_pages mm/page_alloc.c:4681 [inline] __alloc_pages_nodemask+0x3cd/0x890 mm/page_alloc.c:4730 alloc_pages_current+0x10c/0x210 mm/mempolicy.c:2211 alloc_pages include/linux/gfp.h:532 [inline] __get_free_pages+0xc/0x40 mm/page_alloc.c:4786 __pte_alloc_one_kernel include/asm-generic/pgalloc.h:21 [inline] pte_alloc_one_kernel include/asm-generic/pgalloc.h:33 [inline] __pte_alloc_kernel+0x1d/0x200 mm/memory.c:459 apply_to_pte_range mm/memory.c:2031 [inline] apply_to_pmd_range mm/memory.c:2068 [inline] apply_to_pud_range mm/memory.c:2088 [inline] apply_to_p4d_range mm/memory.c:2108 [inline] apply_to_page_range+0x77d/0xa00 mm/memory.c:2133 kasan_release_vmalloc+0xa7/0xc0 mm/kasan/common.c:970 __purge_vmap_area_lazy+0xcbb/0x1f30 mm/vmalloc.c:1313 try_purge_vmap_area_lazy mm/vmalloc.c:1332 [inline] free_vmap_area_noflush+0x2ca/0x390 mm/vmalloc.c:1368 free_unmap_vmap_area mm/vmalloc.c:1381 [inline] remove_vm_area+0x1cc/0x230 mm/vmalloc.c:2209 vm_remove_mappings mm/vmalloc.c:2236 [inline] __vunmap+0x223/0xa20 mm/vmalloc.c:2299 __vfree+0x3f/0xd0 mm/vmalloc.c:2356 __vmalloc_area_node mm/vmalloc.c:2507 [inline] __vmalloc_node_range+0x5d5/0x810 mm/vmalloc.c:2547 __vmalloc_node mm/vmalloc.c:2607 [inline] __vmalloc_node_flags mm/vmalloc.c:2621 [inline] vzalloc+0x6f/0x80 mm/vmalloc.c:2666 alloc_one_pg_vec_page net/packet/af_packet.c:4233 [inline] alloc_pg_vec net/packet/af_packet.c:4258 [inline] packet_set_ring+0xbc0/0x1b50 net/packet/af_packet.c:4342 packet_setsockopt+0xed7/0x2d90 net/packet/af_packet.c:3695 __sys_setsockopt+0x29b/0x4d0 net/socket.c:2117 __do_sys_setsockopt net/socket.c:2133 [inline] __se_sys_setsockopt net/socket.c:2130 [inline] __x64_sys_setsockopt+0xbe/0x150 net/socket.c:2130 do_syscall_64+0xfa/0x780 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x465fe9 Code: Bad RIP value. RSP: 002b:00007ff70087dc68 EFLAGS: 00000246 ORIG_RAX: 0000000000000036 RAX: ffffffffffffffda RBX: 000000000052bf00 RCX: 0000000000465fe9 RDX: 0000000000000005 RSI: 0000000000000107 RDI: 0000000000000004 RBP: 00000000ffffffff R08: 000000000000001c R09: 0000000000000000 R10: 0000000020000040 R11: 0000000000000246 R12: 00000000004a643a R13: 00000000004f2620 R14: 00000000004af7e6 R15: 00007ff70087e6bc BUG: scheduling while atomic: syz-executor.7/15087/0x00000002 3 locks held by syz-executor.7/15087: #0: ffff888024542110 (sk_lock-AF_PACKET){+.+.}, at: lock_sock include/net/sock.h:1526 [inline] #0: ffff888024542110 (sk_lock-AF_PACKET){+.+.}, at: packet_setsockopt+0xdf1/0x2d90 net/packet/af_packet.c:3678 #1: ffffffff89850a80 (vmap_purge_lock){+.+.}, at: try_purge_vmap_area_lazy mm/vmalloc.c:1331 [inline] #1: ffffffff89850a80 (vmap_purge_lock){+.+.}, at: free_vmap_area_noflush+0x2a8/0x390 mm/vmalloc.c:1368 #2: ffffffff89850c18 (free_vmap_area_lock){+.+.}, at: spin_lock include/linux/spinlock.h:338 [inline] #2: ffffffff89850c18 (free_vmap_area_lock){+.+.}, at: __purge_vmap_area_lazy+0x19c/0x1f30 mm/vmalloc.c:1298 Modules linked in: Preemption disabled at: [<ffffffff81a78ddc>] spin_lock include/linux/spinlock.h:338 [inline] [<ffffffff81a78ddc>] __purge_vmap_area_lazy+0x19c/0x1f30 mm/vmalloc.c:1298
> On Nov 29, 2019, at 7:29 AM, Daniel Axtens <dja@axtens.net> wrote: > >>>> >>>> Nope, it's vm_map_ram() not being handled >>> >>> >>> Another suspicious one. Related to kasan/vmalloc? >> >> Very likely the same as with ion: >> >> # git grep vm_map_ram|grep xfs >> fs/xfs/xfs_buf.c: * vm_map_ram() will allocate auxiliary structures (e.g. >> fs/xfs/xfs_buf.c: bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, > > Aaargh, that's an embarassing miss. > > It's a bit intricate because kasan_vmalloc_populate function is > currently set up to take a vm_struct not a vmap_area, but I'll see if I > can get something simple out this evening - I'm away for the first part > of next week. > > Do you have to do anything interesting to get it to explode with xfs? Is > it as simple as mounting a drive and doing some I/O? Or do you need to > do something more involved? I instead trigger something a bit different by manually triggering a crash first to make the XFS partition uncleanly shutdown. # echo c >/proc/sysrq-trigger and then reboot the same kernel where it will crash while checking the XFS. This can be workaround by rebooting to an older kernel (v4.18) first where xfs_repair will be successfully there, and then rebooting to the new linux-next kernel will be fine. [ OK ] Started File System Check on /dev/mapper/rhel_hpe--sy680gen9--01-root. Mounting /sysroot... [ 141.177726][ T1730] SGI XFS with security attributes, no debug enabled [ 141.432382][ T1720] XFS (dm-0): Mounting V5 Filesystem [** ] A start job is running for /sysroot (39s / 1min 51s)[ 158.738816][ T1720] XFS (dm-0): Starting recovery (logdev: internal) [ 158.792010][ T844] BUG: unable to handle page fault for address: fffff52001f0000c [ 158.830913][ T844] #PF: supervisor read access in kernel mode [ 158.859680][ T844] #PF: error_code(0x0000) - not-present page [ 158.886057][ T844] PGD 207ffe3067 P4D 207ffe3067 PUD 2071f2067 PMD f68e08067 PTE 0 [ 158.922065][ T844] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 158.949620][ T844] CPU: 112 PID: 844 Comm: kworker/112:1 Not tainted 5.4.0-next-20191127+ #3 [ 158.988759][ T844] Hardware name: HP Synergy 680 Gen9/Synergy 680 Gen9 Compute Module, BIOS I40 05/23/2018 [ 159.033380][ T844] Workqueue: xfs-buf/dm-0 xfs_buf_ioend_work [xfs] [ 159.061935][ T844] RIP: 0010:__asan_load4+0x3a/0xa0 [ 159.061941][ T844] Code: 00 00 00 00 00 00 ff 48 39 f8 77 6d 48 8d 47 03 48 89 c2 83 e2 07 48 83 fa 02 76 30 48 be 00 00 00 00 00 fc ff df 48 c1 e8 03 <0f> b6 04 30 84 c0 75 3e 5d c3 48 b8 00 00 00 00 00 80 ff ff eb c7 [ 159.061944][ T844] RSP: 0018:ffffc9000a4b7cb0 EFLAGS: 00010a06 [ 159.061949][ T844] RAX: 1ffff92001f0000c RBX: ffffc9000f800000 RCX: ffffffffc06d10ae [ 159.061952][ T844] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffffc9000f800060 [ 159.061955][ T844] RBP: ffffc9000a4b7cb0 R08: ffffed130bee89e5 R09: 0000000000000001 [ 159.061958][ T844] R10: ffffed130bee89e4 R11: ffff88985f744f23 R12: 0000000000000000 [ 159.061961][ T844] R13: ffff889724be0040 R14: ffff88836c8e5000 R15: 00000000000c8000 [ 159.061965][ T844] FS: 0000000000000000(0000) GS:ffff88985f700000(0000) knlGS:0000000000000000 [ 159.061968][ T844] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 159.061971][ T844] CR2: fffff52001f0000c CR3: 0000001f615b8004 CR4: 00000000003606e0 [ 159.061974][ T844] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 159.061976][ T844] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 159.061978][ T844] Call Trace: [ 159.062118][ T844] xfs_inode_buf_verify+0x13e/0x230 [xfs] [ 159.062264][ T844] xfs_inode_buf_readahead_verify+0x13/0x20 [xfs] [ 159.634441][ T844] xfs_buf_ioend+0x153/0x6b0 [xfs] [ 159.634455][ T844] ? trace_hardirqs_on+0x3a/0x160 [ 159.679087][ T844] xfs_buf_ioend_work+0x15/0x20 [xfs] [ 159.702689][ T844] process_one_work+0x579/0xb90 [ 159.723898][ T844] ? pwq_dec_nr_in_flight+0x170/0x170 [ 159.747499][ T844] worker_thread+0x63/0x5b0 [ 159.767531][ T844] ? process_one_work+0xb90/0xb90 [ 159.789549][ T844] kthread+0x1e6/0x210 [ 159.807166][ T844] ? kthread_create_worker_on_cpu+0xc0/0xc0 [ 159.833064][ T844] ret_from_fork+0x3a/0x50 [ 159.852200][ T844] Modules linked in: xfs sd_mod bnx2x mdio firmware_class hpsa scsi_transport_sas dm_mirror dm_region_hash dm_log dm_mod [ 159.915273][ T844] CR2: fffff52001f0000c [ 159.934029][ T844] ---[ end trace 3f3b30f5fc34bbf1 ]--- [ 159.957937][ T844] RIP: 0010:__asan_load4+0x3a/0xa0 [ 159.980316][ T844] Code: 00 00 00 00 00 00 ff 48 39 f8 77 6d 48 8d 47 03 48 89 c2 83 e2 07 48 83 fa 02 76 30 48 be 00 00 00 00 00 fc ff df 48 c1 e8 03 <0f> b6 04 30 84 c0 75 3e 5d c3 48 b8 00 00 00 00 00 80 ff ff eb c7 [ 160.068386][ T844] RSP: 0018:ffffc9000a4b7cb0 EFLAGS: 00010a06 [ 160.068389][ T844] RAX: 1ffff92001f0000c RBX: ffffc9000f800000 RCX: ffffffffc06d10ae [ 160.068391][ T844] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffffc9000f800060 [ 160.068393][ T844] RBP: ffffc9000a4b7cb0 R08: ffffed130bee89e5 R09: 0000000000000001 [ 160.068395][ T844] R10: ffffed130bee89e4 R11: ffff88985f744f23 R12: 0000000000000000 [ 160.068397][ T844] R13: ffff889724be0040 R14: ffff88836c8e5000 R15: 00000000000c8000 [ 160.068399][ T844] FS: 0000000000000000(0000) GS:ffff88985f700000(0000) knlGS:0000000000000000 [ 160.068401][ T844] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 160.068404][ T844] CR2: fffff52001f0000c CR3: 0000001f615b8004 CR4: 00000000003606e0 [ 160.068405][ T844] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 160.068407][ T844] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 160.068410][ T844] Kernel panic - not syncing: Fatal exception [ 160.095178][ T844] Kernel Offset: 0x21c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 160.541027][ T844] ---[ end Kernel panic - not syncing: Fatal exception ]--- > > Regards, > Daniel > >> >>> >>> BUG: unable to handle page fault for address: fffff52005b80000 >>> #PF: supervisor read access in kernel mode >>> #PF: error_code(0x0000) - not-present page >>> PGD 7ffcd067 P4D 7ffcd067 PUD 2cd10067 PMD 66d76067 PTE 0 >>> Oops: 0000 [#1] PREEMPT SMP KASAN >>> CPU: 2 PID: 9211 Comm: syz-executor.2 Not tainted 5.4.0-next-20191129+ #6 >>> Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS >>> rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 >>> RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 >>> Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 >>> 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 >>> 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 >>> RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 >>> RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 >>> RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 >>> RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 >>> R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 >>> R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 >>> FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 >>> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >>> CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 >>> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 >>> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 >>> PKRU: 55555554 >>> Call Trace: >>> xfs_buf_ioend+0x228/0xdc0 fs/xfs/xfs_buf.c:1162 >>> __xfs_buf_submit+0x38b/0xe50 fs/xfs/xfs_buf.c:1485 >>> xfs_buf_submit fs/xfs/xfs_buf.h:268 [inline] >>> xfs_buf_read_uncached+0x15c/0x560 fs/xfs/xfs_buf.c:897 >>> xfs_readsb+0x2d0/0x540 fs/xfs/xfs_mount.c:298 >>> xfs_fc_fill_super+0x3e6/0x11f0 fs/xfs/xfs_super.c:1415 >>> get_tree_bdev+0x444/0x620 fs/super.c:1340 >>> xfs_fc_get_tree+0x1c/0x20 fs/xfs/xfs_super.c:1550 >>> vfs_get_tree+0x8e/0x300 fs/super.c:1545 >>> do_new_mount fs/namespace.c:2822 [inline] >>> do_mount+0x152d/0x1b50 fs/namespace.c:3142 >>> ksys_mount+0x114/0x130 fs/namespace.c:3351 >>> __do_sys_mount fs/namespace.c:3365 [inline] >>> __se_sys_mount fs/namespace.c:3362 [inline] >>> __x64_sys_mount+0xbe/0x150 fs/namespace.c:3362 >>> do_syscall_64+0xfa/0x780 arch/x86/entry/common.c:294 >>> entry_SYSCALL_64_after_hwframe+0x49/0xbe >>> RIP: 0033:0x46736a >>> Code: 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f >>> 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d >>> 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 >>> RSP: 002b:00007fb49bda8a78 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 >>> RAX: ffffffffffffffda RBX: 00007fb49bda8af0 RCX: 000000000046736a >>> RDX: 00007fb49bda8ad0 RSI: 0000000020000140 RDI: 00007fb49bda8af0 >>> RBP: 00007fb49bda8ad0 R08: 00007fb49bda8b30 R09: 00007fb49bda8ad0 >>> R10: 0000000000000000 R11: 0000000000000202 R12: 00007fb49bda8b30 >>> R13: 00000000004b1c60 R14: 00000000004b006d R15: 00007fb49bda96bc >>> Modules linked in: >>> Dumping ftrace buffer: >>> (ftrace buffer empty) >>> CR2: fffff52005b80000 >>> ---[ end trace eddd8949d4c898df ]--- >>> RIP: 0010:xfs_sb_read_verify+0xe9/0x540 fs/xfs/libxfs/xfs_sb.c:691 >>> Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 1e 04 00 00 4d 8b ac 24 >>> 30 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 ea 03 <0f> b6 >>> 04 02 84 c0 74 08 3c 03 0f 8e ad 03 00 00 41 8b 45 00 bf 58 >>> RSP: 0018:ffffc9000a58f8d0 EFLAGS: 00010a06 >>> RAX: dffffc0000000000 RBX: 1ffff920014b1f1d RCX: ffffc9000af42000 >>> RDX: 1ffff92005b80000 RSI: ffffffff82914404 RDI: ffff88805cdb1460 >>> RBP: ffffc9000a58fab0 R08: ffff8880610cd380 R09: ffffed1005a87045 >>> R10: ffffed1005a87044 R11: ffff88802d438223 R12: ffff88805cdb1340 >>> R13: ffffc9002dc00000 R14: ffffc9000a58fa88 R15: ffff888061b5c000 >>> FS: 00007fb49bda9700(0000) GS:ffff88802d400000(0000) knlGS:0000000000000000 >>> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >>> CR2: fffff52005b80000 CR3: 0000000060769006 CR4: 0000000000760ee0 >>> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 >>> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 >>> PKRU: 55555554 >>> >> >> -- >> You received this message because you are subscribed to the Google Groups "kasan-dev" group. >> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com. >> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/56cf8aab-c61b-156c-f681-d2354aed22bb%40virtuozzo.com.
>>>>> >>>>> Nope, it's vm_map_ram() not being handled >>>> >>>> >>>> Another suspicious one. Related to kasan/vmalloc? >>> >>> Very likely the same as with ion: >>> >>> # git grep vm_map_ram|grep xfs >>> fs/xfs/xfs_buf.c: * vm_map_ram() will allocate auxiliary structures (e.g. >>> fs/xfs/xfs_buf.c: bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, >> >> Aaargh, that's an embarassing miss. >> >> It's a bit intricate because kasan_vmalloc_populate function is >> currently set up to take a vm_struct not a vmap_area, but I'll see if I >> can get something simple out this evening - I'm away for the first part >> of next week. For crashes in XFS, binder etc that implicate vm_map_ram, see: https://lore.kernel.org/linux-mm/20191129154519.30964-1-dja@axtens.net/ The easiest way I found to repro the bug is sudo modprobe i915 mock_selftest=-1 For lock warns, one that goes through the percpu alloc path, the patch is already queued in mmots. For Dmitry's latest one where there's an allocation in the purge_vmap_area_lazy path that triggers a locking warning, you'll have to wait until next week, sorry. Regards, Daniel
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index 525296121d89..e4d66e7c50de 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -218,3 +218,66 @@ brk handler is used to print bug reports. A potential expansion of this mode is a hardware tag-based mode, which would use hardware memory tagging support instead of compiler instrumentation and manual shadow memory manipulation. + +What memory accesses are sanitised by KASAN? +-------------------------------------------- + +The kernel maps memory in a number of different parts of the address +space. This poses something of a problem for KASAN, which requires +that all addresses accessed by instrumented code have a valid shadow +region. + +The range of kernel virtual addresses is large: there is not enough +real memory to support a real shadow region for every address that +could be accessed by the kernel. + +By default +~~~~~~~~~~ + +By default, architectures only map real memory over the shadow region +for the linear mapping (and potentially other small areas). For all +other areas - such as vmalloc and vmemmap space - a single read-only +page is mapped over the shadow area. This read-only shadow page +declares all memory accesses as permitted. + +This presents a problem for modules: they do not live in the linear +mapping, but in a dedicated module space. By hooking in to the module +allocator, KASAN can temporarily map real shadow memory to cover +them. This allows detection of invalid accesses to module globals, for +example. + +This also creates an incompatibility with ``VMAP_STACK``: if the stack +lives in vmalloc space, it will be shadowed by the read-only page, and +the kernel will fault when trying to set up the shadow data for stack +variables. + +CONFIG_KASAN_VMALLOC +~~~~~~~~~~~~~~~~~~~~ + +With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the +cost of greater memory usage. Currently this is only supported on x86. + +This works by hooking into vmalloc and vmap, and dynamically +allocating real shadow memory to back the mappings. + +Most mappings in vmalloc space are small, requiring less than a full +page of shadow space. Allocating a full shadow page per mapping would +therefore be wasteful. Furthermore, to ensure that different mappings +use different shadow pages, mappings would have to be aligned to +``KASAN_SHADOW_SCALE_SIZE * PAGE_SIZE``. + +Instead, we share backing space across multiple mappings. We allocate +a backing page when a mapping in vmalloc space uses a particular page +of the shadow region. This page can be shared by other vmalloc +mappings later on. + +We hook in to the vmap infrastructure to lazily clean up unused shadow +memory. + +To avoid the difficulties around swapping mappings around, we expect +that the part of the shadow region that covers the vmalloc space will +not be covered by the early shadow page, but will be left +unmapped. This will require changes in arch-specific code. + +This allows ``VMAP_STACK`` support on x86, and can simplify support of +architectures that do not have a fixed module region. diff --git a/include/linux/kasan.h b/include/linux/kasan.h index cc8a03cc9674..4f404c565db1 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -70,8 +70,18 @@ struct kasan_cache { int free_meta_offset; }; +/* + * These functions provide a special case to support backing module + * allocations with real shadow memory. With KASAN vmalloc, the special + * case is unnecessary, as the work is handled in the generic case. + */ +#ifndef CONFIG_KASAN_VMALLOC int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); +#else +static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline void kasan_free_shadow(const struct vm_struct *vm) {} +#endif int kasan_add_zero_shadow(void *start, unsigned long size); void kasan_remove_zero_shadow(void *start, unsigned long size); @@ -194,4 +204,25 @@ static inline void *kasan_reset_tag(const void *addr) #endif /* CONFIG_KASAN_SW_TAGS */ +#ifdef CONFIG_KASAN_VMALLOC +int kasan_populate_vmalloc(unsigned long requested_size, + struct vm_struct *area); +void kasan_poison_vmalloc(void *start, unsigned long size); +void kasan_release_vmalloc(unsigned long start, unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end); +#else +static inline int kasan_populate_vmalloc(unsigned long requested_size, + struct vm_struct *area) +{ + return 0; +} + +static inline void kasan_poison_vmalloc(void *start, unsigned long size) {} +static inline void kasan_release_vmalloc(unsigned long start, + unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end) {} +#endif + #endif /* LINUX_KASAN_H */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 5229c18025e9..ca92aea8a6bd 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -91,7 +91,7 @@ void module_arch_cleanup(struct module *mod); /* Any cleanup before freeing mod->module_init */ void module_arch_freeing_init(struct module *mod); -#ifdef CONFIG_KASAN +#if defined(CONFIG_KASAN) && !defined(CONFIG_KASAN_VMALLOC) #include <linux/kasan.h> #define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) #else diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 4e7809408073..61c43d1a29ca 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -22,6 +22,18 @@ struct notifier_block; /* in notifier.h */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ + +/* + * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. + * + * If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after + * shadow memory has been mapped. It's used to handle allocation errors so that + * we don't try to poision shadow on free if it was never allocated. + * + * Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to + * determine which allocations need the module shadow freed. + */ + /* * Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with * vfree_atomic(). diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 6c9682ce0254..81f5464ea9e1 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -6,6 +6,9 @@ config HAVE_ARCH_KASAN config HAVE_ARCH_KASAN_SW_TAGS bool +config HAVE_ARCH_KASAN_VMALLOC + bool + config CC_HAS_KASAN_GENERIC def_bool $(cc-option, -fsanitize=kernel-address) @@ -142,6 +145,19 @@ config KASAN_SW_TAGS_IDENTIFY (use-after-free or out-of-bounds) at the cost of increased memory consumption. +config KASAN_VMALLOC + bool "Back mappings in vmalloc space with real shadow memory" + depends on KASAN && HAVE_ARCH_KASAN_VMALLOC + help + By default, the shadow region for vmalloc space is the read-only + zero page. This means that KASAN cannot detect errors involving + vmalloc space. + + Enabling this option will hook in to vmap/vmalloc and back those + mappings with real shadow memory allocated on demand. This allows + for KASAN to detect more sorts of errors (and to support vmapped + stacks), but at the cost of higher memory usage. + config TEST_KASAN tristate "Module for testing KASAN for bug detection" depends on m && KASAN diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 6814d6d6a023..6e7bc5d3fa83 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -36,6 +36,8 @@ #include <linux/bug.h> #include <linux/uaccess.h> +#include <asm/tlbflush.h> + #include "kasan.h" #include "../slab.h" @@ -590,6 +592,7 @@ void kasan_kfree_large(void *ptr, unsigned long ip) /* The object will be poisoned by page_alloc. */ } +#ifndef CONFIG_KASAN_VMALLOC int kasan_module_alloc(void *addr, size_t size) { void *ret; @@ -625,6 +628,7 @@ void kasan_free_shadow(const struct vm_struct *vm) if (vm->flags & VM_KASAN) vfree(kasan_mem_to_shadow(vm->addr)); } +#endif extern void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); @@ -744,3 +748,230 @@ static int __init kasan_memhotplug_init(void) core_initcall(kasan_memhotplug_init); #endif + +#ifdef CONFIG_KASAN_VMALLOC +static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, + void *unused) +{ + unsigned long page; + pte_t pte; + + if (likely(!pte_none(*ptep))) + return 0; + + page = __get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); + pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); + + spin_lock(&init_mm.page_table_lock); + if (likely(pte_none(*ptep))) { + set_pte_at(&init_mm, addr, ptep, pte); + page = 0; + } + spin_unlock(&init_mm.page_table_lock); + if (page) + free_page(page); + return 0; +} + +int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area) +{ + unsigned long shadow_start, shadow_end; + int ret; + + shadow_start = (unsigned long)kasan_mem_to_shadow(area->addr); + shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE); + shadow_end = (unsigned long)kasan_mem_to_shadow(area->addr + + area->size); + shadow_end = ALIGN(shadow_end, PAGE_SIZE); + + ret = apply_to_page_range(&init_mm, shadow_start, + shadow_end - shadow_start, + kasan_populate_vmalloc_pte, NULL); + if (ret) + return ret; + + kasan_unpoison_shadow(area->addr, requested_size); + + area->flags |= VM_KASAN; + + /* + * We need to be careful about inter-cpu effects here. Consider: + * + * CPU#0 CPU#1 + * WRITE_ONCE(p, vmalloc(100)); while (x = READ_ONCE(p)) ; + * p[99] = 1; + * + * With compiler instrumentation, that ends up looking like this: + * + * CPU#0 CPU#1 + * // vmalloc() allocates memory + * // let a = area->addr + * // we reach kasan_populate_vmalloc + * // and call kasan_unpoison_shadow: + * STORE shadow(a), unpoison_val + * ... + * STORE shadow(a+99), unpoison_val x = LOAD p + * // rest of vmalloc process <data dependency> + * STORE p, a LOAD shadow(x+99) + * + * If there is no barrier between the end of unpoisioning the shadow + * and the store of the result to p, the stores could be committed + * in a different order by CPU#0, and CPU#1 could erroneously observe + * poison in the shadow. + * + * We need some sort of barrier between the stores. + * + * In the vmalloc() case, this is provided by a smp_wmb() in + * clear_vm_uninitialized_flag(). In the per-cpu allocator and in + * get_vm_area() and friends, the caller gets shadow allocated but + * doesn't have any pages mapped into the virtual address space that + * has been reserved. Mapping those pages in will involve taking and + * releasing a page-table lock, which will provide the barrier. + */ + + return 0; +} + +/* + * Poison the shadow for a vmalloc region. Called as part of the + * freeing process at the time the region is freed. + */ +void kasan_poison_vmalloc(void *start, unsigned long size) +{ + size = round_up(size, KASAN_SHADOW_SCALE_SIZE); + kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID); +} + +static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, + void *unused) +{ + unsigned long page; + + page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT); + + spin_lock(&init_mm.page_table_lock); + + if (likely(!pte_none(*ptep))) { + pte_clear(&init_mm, addr, ptep); + free_page(page); + } + spin_unlock(&init_mm.page_table_lock); + + return 0; +} + +/* + * Release the backing for the vmalloc region [start, end), which + * lies within the free region [free_region_start, free_region_end). + * + * This can be run lazily, long after the region was freed. It runs + * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap + * infrastructure. + * + * How does this work? + * ------------------- + * + * We have a region that is page aligned, labelled as A. + * That might not map onto the shadow in a way that is page-aligned: + * + * start end + * v v + * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc + * -------- -------- -------- -------- -------- + * | | | | | + * | | | /-------/ | + * \-------\|/------/ |/---------------/ + * ||| || + * |??AAAAAA|AAAAAAAA|AA??????| < shadow + * (1) (2) (3) + * + * First we align the start upwards and the end downwards, so that the + * shadow of the region aligns with shadow page boundaries. In the + * example, this gives us the shadow page (2). This is the shadow entirely + * covered by this allocation. + * + * Then we have the tricky bits. We want to know if we can free the + * partially covered shadow pages - (1) and (3) in the example. For this, + * we are given the start and end of the free region that contains this + * allocation. Extending our previous example, we could have: + * + * free_region_start free_region_end + * | start end | + * v v v v + * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc + * -------- -------- -------- -------- -------- + * | | | | | + * | | | /-------/ | + * \-------\|/------/ |/---------------/ + * ||| || + * |FFAAAAAA|AAAAAAAA|AAF?????| < shadow + * (1) (2) (3) + * + * Once again, we align the start of the free region up, and the end of + * the free region down so that the shadow is page aligned. So we can free + * page (1) - we know no allocation currently uses anything in that page, + * because all of it is in the vmalloc free region. But we cannot free + * page (3), because we can't be sure that the rest of it is unused. + * + * We only consider pages that contain part of the original region for + * freeing: we don't try to free other pages from the free region or we'd + * end up trying to free huge chunks of virtual address space. + * + * Concurrency + * ----------- + * + * How do we know that we're not freeing a page that is simultaneously + * being used for a fresh allocation in kasan_populate_vmalloc(_pte)? + * + * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running + * at the same time. While we run under free_vmap_area_lock, the population + * code does not. + * + * free_vmap_area_lock instead operates to ensure that the larger range + * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and + * the per-cpu region-finding algorithm both run under free_vmap_area_lock, + * no space identified as free will become used while we are running. This + * means that so long as we are careful with alignment and only free shadow + * pages entirely covered by the free region, we will not run in to any + * trouble - any simultaneous allocations will be for disjoint regions. + */ +void kasan_release_vmalloc(unsigned long start, unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end) +{ + void *shadow_start, *shadow_end; + unsigned long region_start, region_end; + + region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); + region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); + + free_region_start = ALIGN(free_region_start, + PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); + + if (start != region_start && + free_region_start < region_start) + region_start -= PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE; + + free_region_end = ALIGN_DOWN(free_region_end, + PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE); + + if (end != region_end && + free_region_end > region_end) + region_end += PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE; + + shadow_start = kasan_mem_to_shadow((void *)region_start); + shadow_end = kasan_mem_to_shadow((void *)region_end); + + if (shadow_end > shadow_start) { + apply_to_page_range(&init_mm, (unsigned long)shadow_start, + (unsigned long)(shadow_end - shadow_start), + kasan_depopulate_vmalloc_pte, NULL); + flush_tlb_kernel_range((unsigned long)shadow_start, + (unsigned long)shadow_end); + } +} +#endif diff --git a/mm/kasan/generic_report.c b/mm/kasan/generic_report.c index 36c645939bc9..2d97efd4954f 100644 --- a/mm/kasan/generic_report.c +++ b/mm/kasan/generic_report.c @@ -86,6 +86,9 @@ static const char *get_shadow_bug_type(struct kasan_access_info *info) case KASAN_ALLOCA_RIGHT: bug_type = "alloca-out-of-bounds"; break; + case KASAN_VMALLOC_INVALID: + bug_type = "vmalloc-out-of-bounds"; + break; } return bug_type; diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 35cff6bbb716..3a083274628e 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -25,6 +25,7 @@ #endif #define KASAN_GLOBAL_REDZONE 0xFA /* redzone for global variable */ +#define KASAN_VMALLOC_INVALID 0xF9 /* unallocated space in vmapped page */ /* * Stack redzone shadow values diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f48f64c8d200..72d0aa039e68 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -683,7 +683,7 @@ insert_vmap_area_augment(struct vmap_area *va, * free area is inserted. If VA has been merged, it is * freed. */ -static __always_inline void +static __always_inline struct vmap_area * merge_or_add_vmap_area(struct vmap_area *va, struct rb_root *root, struct list_head *head) { @@ -750,7 +750,10 @@ merge_or_add_vmap_area(struct vmap_area *va, /* Free vmap_area object. */ kmem_cache_free(vmap_area_cachep, va); - return; + + /* Point to the new merged area. */ + va = sibling; + merged = true; } } @@ -759,6 +762,8 @@ merge_or_add_vmap_area(struct vmap_area *va, link_va(va, root, parent, link, head); augment_tree_propagate_from(va); } + + return va; } static __always_inline bool @@ -1196,8 +1201,7 @@ static void free_vmap_area(struct vmap_area *va) * Insert/Merge it back to the free tree/list. */ spin_lock(&free_vmap_area_lock); - merge_or_add_vmap_area(va, - &free_vmap_area_root, &free_vmap_area_list); + merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list); spin_unlock(&free_vmap_area_lock); } @@ -1294,14 +1298,20 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) spin_lock(&free_vmap_area_lock); llist_for_each_entry_safe(va, n_va, valist, purge_list) { unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT; + unsigned long orig_start = va->va_start; + unsigned long orig_end = va->va_end; /* * Finally insert or merge lazily-freed area. It is * detached and there is no need to "unlink" it from * anything. */ - merge_or_add_vmap_area(va, - &free_vmap_area_root, &free_vmap_area_list); + va = merge_or_add_vmap_area(va, &free_vmap_area_root, + &free_vmap_area_list); + + if (is_vmalloc_or_module_addr((void *)orig_start)) + kasan_release_vmalloc(orig_start, orig_end, + va->va_start, va->va_end); atomic_long_sub(nr, &vmap_lazy_nr); @@ -2090,6 +2100,22 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, setup_vmalloc_vm(area, va, flags, caller); + /* + * For KASAN, if we are in vmalloc space, we need to cover the shadow + * area with real memory. If we come here through VM_ALLOC, this is + * done by a higher level function that has access to the true size, + * which might not be a full page. + * + * We assume module space comes via VM_ALLOC path. + */ + if (is_vmalloc_addr(area->addr) && !(area->flags & VM_ALLOC)) { + if (kasan_populate_vmalloc(area->size, area)) { + unmap_vmap_area(va); + kfree(area); + return NULL; + } + } + return area; } @@ -2267,6 +2293,9 @@ static void __vunmap(const void *addr, int deallocate_pages) debug_check_no_locks_freed(area->addr, get_vm_area_size(area)); debug_check_no_obj_freed(area->addr, get_vm_area_size(area)); + if (area->flags & VM_KASAN) + kasan_poison_vmalloc(area->addr, area->size); + vm_remove_mappings(area, deallocate_pages); if (deallocate_pages) { @@ -2519,6 +2548,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!addr) return NULL; + if (is_vmalloc_or_module_addr(area->addr)) { + if (kasan_populate_vmalloc(real_size, area)) + return NULL; + } + /* * In this function, newly allocated vm_struct has VM_UNINITIALIZED * flag. It means that vm_struct is not fully initialized. @@ -3377,6 +3411,9 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, pcpu_get_vm_areas); + + /* assume success here */ + kasan_populate_vmalloc(sizes[area], vms[area]); } spin_unlock(&vmap_area_lock); @@ -3391,8 +3428,8 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, * and when pcpu_get_vm_areas() is success. */ while (area--) { - merge_or_add_vmap_area(vas[area], - &free_vmap_area_root, &free_vmap_area_list); + merge_or_add_vmap_area(vas[area], &free_vmap_area_root, + &free_vmap_area_list); vas[area] = NULL; }