Message ID | 20240822192543.3359552-7-Liam.Howlett@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Avoid MAP_FIXED gap exposure | expand |
Am Donnerstag, dem 22.08.2024 um 15:25 -0400 schrieb Liam R. Howlett: > From: "Liam R. Howlett" <Liam.Howlett@Oracle.com> > > Clean up the code by changing the munmap operation to use a structure > for the accounting and munmap variables. > > Since remove_mt() is only called in one location and the contents will > be reduced to almost nothing. The remains of the function can be added > to vms_complete_munmap_vmas(). > > Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com> > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > Reviewed-by: Suren Baghdasaryan <surenb@google.com> > --- > mm/vma.c | 83 +++++++++++++++++++++++++++++--------------------------- > mm/vma.h | 6 ++++ > 2 files changed, 49 insertions(+), 40 deletions(-) > > diff --git a/mm/vma.c b/mm/vma.c > index e1aee43a3dc4..58604fe3bd03 100644 > --- a/mm/vma.c > +++ b/mm/vma.c > @@ -103,7 +103,8 @@ static inline void init_vma_munmap(struct vma_munmap_struct *vms, > vms->unlock = unlock; > vms->uf = uf; > vms->vma_count = 0; > - vms->nr_pages = vms->locked_vm = 0; > + vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0; > + vms->exec_vm = vms->stack_vm = vms->data_vm = 0; > } > > /* > @@ -299,30 +300,6 @@ static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, > return __split_vma(vmi, vma, addr, new_below); > } > > -/* > - * Ok - we have the memory areas we should free on a maple tree so release them, > - * and do the vma updates. > - * > - * Called with the mm semaphore held. > - */ > -static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) > -{ > - unsigned long nr_accounted = 0; > - struct vm_area_struct *vma; > - > - /* Update high watermark before we lower total_vm */ > - update_hiwater_vm(mm); > - mas_for_each(mas, vma, ULONG_MAX) { > - long nrpages = vma_pages(vma); > - > - if (vma->vm_flags & VM_ACCOUNT) > - nr_accounted += nrpages; > - vm_stat_account(mm, vma->vm_flags, -nrpages); > - remove_vma(vma, false); > - } > - vm_unacct_memory(nr_accounted); > -} > - > /* > * init_vma_prep() - Initializer wrapper for vma_prepare struct > * @vp: The vma_prepare struct > @@ -722,7 +699,7 @@ static inline void abort_munmap_vmas(struct ma_state *mas_detach) > static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, > struct ma_state *mas_detach) > { > - struct vm_area_struct *prev, *next; > + struct vm_area_struct *vma; > struct mm_struct *mm; > > mm = vms->mm; > @@ -731,21 +708,31 @@ static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, > if (vms->unlock) > mmap_write_downgrade(mm); > > - prev = vma_iter_prev_range(vms->vmi); > - next = vma_next(vms->vmi); > - if (next) > - vma_iter_prev_range(vms->vmi); > - > /* > * We can free page tables without write-locking mmap_lock because VMAs > * were isolated before we downgraded mmap_lock. > */ > mas_set(mas_detach, 1); > - unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end, > - vms->vma_count, !vms->unlock); > - /* Statistics and freeing VMAs */ > + unmap_region(mm, mas_detach, vms->vma, vms->prev, vms->next, > + vms->start, vms->end, vms->vma_count, !vms->unlock); > + /* Update high watermark before we lower total_vm */ > + update_hiwater_vm(mm); > + /* Stat accounting */ > + WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm) - vms->nr_pages); > + mm->exec_vm -= vms->exec_vm; > + mm->stack_vm -= vms->stack_vm; > + mm->data_vm -= vms->data_vm; > + /* Paranoid bookkeeping */ > + VM_WARN_ON(vms->exec_vm > mm->exec_vm); > + VM_WARN_ON(vms->stack_vm > mm->stack_vm); > + VM_WARN_ON(vms->data_vm > mm->data_vm); > + I'm running the v7 Patchset on linux-next-20240822 and I get lots of these errors (right on boot) (both when using the complete patchset and when using only the patches up to this): [ T620] WARNING: CPU: 6 PID: 620 at mm/vma.c:725 vms_complete_munmap_vmas+0x1d8/0x200 [ T620] Modules linked in: amd_atl ecc mc sparse_keymap wmi_bmof edac_mce_amd snd snd_pci_acp3x k10temp soundcore ccp battery ac button hid_sensor_gyro_3d hid_sensor_als hid_sensor_magn_3d hid_sensor_prox hid_sensor_accel_3d hid_sensor_trigger industrialio_triggered_buffer kfifo_buf industrialio amd_pmc hid_sensor_iio_common joydev evdev serio_raw mt7921e mt7921_common mt792x_lib mt76_connac_lib mt76 mac80211 libarc4 cfg80211 rfkill msr nvme_fabrics fuse efi_pstore configfs efivarfs autofs4 ext4 crc32c_generic mbcache jbd2 usbhid amdgpu i2c_algo_bit drm_ttm_helper ttm drm_exec drm_suballoc_helper amdxcp xhci_pci drm_buddy hid_sensor_hub xhci_hcd nvme mfd_core gpu_sched hid_multitouch hid_generic crc32c_intel psmouse usbcore i2c_piix4 drm_display_helper amd_sfh i2c_hid_acpi i2c_smbus usb_common crc16 nvme_core r8169 i2c_hid hid i2c_designware_platform i2c_designware_core [ T620] CPU: 6 UID: 0 PID: 620 Comm: fsck.vfat Not tainted 6.11.0-rc4-next- 20240822-liamh-v7-00021-gc6686c81601f #322 [ T620] Hardware name: Micro-Star International Co., Ltd. Alpha 15 B5EEK/MS- 158L, BIOS E158LAMS.107 11/10/2021 [ T620] RIP: 0010:vms_complete_munmap_vmas+0x1d8/0x200 [ T620] Code: 8b 85 a8 00 00 00 a8 01 74 35 8b 85 e0 00 00 00 48 8d bd a8 00 00 00 83 c0 01 89 85 e0 00 00 00 e8 7d 39 e8 ff e9 63 fe ff ff <0f> 0b e9 eb fe ff ff 0f 0b e9 d0 fe ff ff 0f 0b e9 d3 fe ff ff 0f [ T620] RSP: 0018:ffffa415c09d7d10 EFLAGS: 00010283 [ T620] RAX: 00000000000000cd RBX: ffffa415c09d7d90 RCX: 000000000000018e [ T620] RDX: 0000000000000021 RSI: 00000000000019d9 RDI: ffff9073ee7a6400 [ T620] RBP: ffff906541341f80 R08: 0000000000000000 R09: 000000000000080a [ T620] R10: 000000000001d4de R11: 0000000000000140 R12: ffffa415c09d7d48 [ T620] R13: 00007fbd5ea5f000 R14: 00007fbd5eb5efff R15: ffffa415c09d7d90 [ T620] FS: 00007fbd5ec38740(0000) GS:ffff9073ee780000(0000) knlGS:0000000000000000 [ T620] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ T620] CR2: 00007fc336339c90 CR3: 000000010a39e000 CR4: 0000000000750ef0 [ T620] PKRU: 55555554 [ T620] Call Trace: [ T620] <TASK> [ T620] ? __warn.cold+0x90/0x9e [ T620] ? vms_complete_munmap_vmas+0x1d8/0x200 [ T620] ? report_bug+0xfa/0x140 [ T620] ? handle_bug+0x53/0x90 [ T620] ? exc_invalid_op+0x17/0x70 [ T620] ? asm_exc_invalid_op+0x1a/0x20 [ T620] ? vms_complete_munmap_vmas+0x1d8/0x200 [ T620] do_vmi_align_munmap+0x1e0/0x260 [ T620] do_vmi_munmap+0xbe/0x160 [ T620] __vm_munmap+0x96/0x110 [ T620] __x64_sys_munmap+0x16/0x20 [ T620] do_syscall_64+0x5f/0x170 [ T620] entry_SYSCALL_64_after_hwframe+0x55/0x5d [ T620] RIP: 0033:0x7fbd5ed3ec57 [ T620] Code: 73 01 c3 48 8b 0d c1 71 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 0b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 71 0d 00 f7 d8 64 89 01 48 [ T620] RSP: 002b:00007fff0b04d298 EFLAGS: 00000202 ORIG_RAX: 000000000000000b [ T620] RAX: ffffffffffffffda RBX: ffffffffffffff88 RCX: 00007fbd5ed3ec57 [ T620] RDX: 0000000000000000 RSI: 0000000000100000 RDI: 00007fbd5ea5f000 [ T620] RBP: 0000000000000002 R08: 0000000000100000 R09: 0000000000000007 [ T620] R10: 0000000000000007 R11: 0000000000000202 R12: 00007fff0b04d588 [ T620] R13: 000055b76c789fc6 R14: 00007fff0b04d360 R15: 00007fff0b04d3c0 [ T620] </TASK> [ T620] ---[ end trace 0000000000000000 ]--- Bert Karwatzki
On Fri, Aug 23, 2024 at 10:43:11AM GMT, Bert Karwatzki wrote: [snip] > > @@ -731,21 +708,31 @@ static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, > > if (vms->unlock) > > mmap_write_downgrade(mm); > > > > - prev = vma_iter_prev_range(vms->vmi); > > - next = vma_next(vms->vmi); > > - if (next) > > - vma_iter_prev_range(vms->vmi); > > - > > /* > > * We can free page tables without write-locking mmap_lock because VMAs > > * were isolated before we downgraded mmap_lock. > > */ > > mas_set(mas_detach, 1); > > - unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end, > > - vms->vma_count, !vms->unlock); > > - /* Statistics and freeing VMAs */ > > + unmap_region(mm, mas_detach, vms->vma, vms->prev, vms->next, > > + vms->start, vms->end, vms->vma_count, !vms->unlock); > > + /* Update high watermark before we lower total_vm */ > > + update_hiwater_vm(mm); > > + /* Stat accounting */ > > + WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm) - vms->nr_pages); > > + mm->exec_vm -= vms->exec_vm; > > + mm->stack_vm -= vms->stack_vm; > > + mm->data_vm -= vms->data_vm; > > + /* Paranoid bookkeeping */ > > + VM_WARN_ON(vms->exec_vm > mm->exec_vm); > > + VM_WARN_ON(vms->stack_vm > mm->stack_vm); > > + VM_WARN_ON(vms->data_vm > mm->data_vm); > > + > > I'm running the v7 Patchset on linux-next-20240822 and I get lots of these > errors (right on boot) (both when using the complete patchset and when using > only the patches up to this): Hm curious, I'm running this in qemu with CONFIG_DEBUG_VM set and don't see this at lesat on next-20240823. Liam's series is based on the mseal series by Pedro, not sure if that wasn't in 22 somehow? Can you try with 23, from tip and: b4 shazam 20240822192543.3359552-1-Liam.Howlett@oracle.com To grab this series just to be sure? Because that'd definitely be very weird + concerning and something we hadn't seen before (I don't think?) for the mm->data_vm to be incorrect... > > [ T620] WARNING: CPU: 6 PID: 620 at mm/vma.c:725 > vms_complete_munmap_vmas+0x1d8/0x200 > [ T620] Modules linked in: amd_atl ecc mc sparse_keymap wmi_bmof edac_mce_amd > snd snd_pci_acp3x k10temp soundcore ccp battery ac button hid_sensor_gyro_3d > hid_sensor_als hid_sensor_magn_3d hid_sensor_prox hid_sensor_accel_3d > hid_sensor_trigger industrialio_triggered_buffer kfifo_buf industrialio amd_pmc > hid_sensor_iio_common joydev evdev serio_raw mt7921e mt7921_common mt792x_lib > mt76_connac_lib mt76 mac80211 libarc4 cfg80211 rfkill msr nvme_fabrics fuse > efi_pstore configfs efivarfs autofs4 ext4 crc32c_generic mbcache jbd2 usbhid > amdgpu i2c_algo_bit drm_ttm_helper ttm drm_exec drm_suballoc_helper amdxcp > xhci_pci drm_buddy hid_sensor_hub xhci_hcd nvme mfd_core gpu_sched > hid_multitouch hid_generic crc32c_intel psmouse usbcore i2c_piix4 > drm_display_helper amd_sfh i2c_hid_acpi i2c_smbus usb_common crc16 nvme_core > r8169 i2c_hid hid i2c_designware_platform i2c_designware_core > [ T620] CPU: 6 UID: 0 PID: 620 Comm: fsck.vfat Not tainted 6.11.0-rc4-next- > 20240822-liamh-v7-00021-gc6686c81601f #322 > [ T620] Hardware name: Micro-Star International Co., Ltd. Alpha 15 B5EEK/MS- > 158L, BIOS E158LAMS.107 11/10/2021 > [ T620] RIP: 0010:vms_complete_munmap_vmas+0x1d8/0x200 > [ T620] Code: 8b 85 a8 00 00 00 a8 01 74 35 8b 85 e0 00 00 00 48 8d bd a8 00 00 > 00 83 c0 01 89 85 e0 00 00 00 e8 7d 39 e8 ff e9 63 fe ff ff <0f> 0b e9 eb fe ff > ff 0f 0b e9 d0 fe ff ff 0f 0b e9 d3 fe ff ff 0f > [ T620] RSP: 0018:ffffa415c09d7d10 EFLAGS: 00010283 > [ T620] RAX: 00000000000000cd RBX: ffffa415c09d7d90 RCX: 000000000000018e > [ T620] RDX: 0000000000000021 RSI: 00000000000019d9 RDI: ffff9073ee7a6400 > [ T620] RBP: ffff906541341f80 R08: 0000000000000000 R09: 000000000000080a > [ T620] R10: 000000000001d4de R11: 0000000000000140 R12: ffffa415c09d7d48 > [ T620] R13: 00007fbd5ea5f000 R14: 00007fbd5eb5efff R15: ffffa415c09d7d90 > [ T620] FS: 00007fbd5ec38740(0000) GS:ffff9073ee780000(0000) > knlGS:0000000000000000 > [ T620] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ T620] CR2: 00007fc336339c90 CR3: 000000010a39e000 CR4: 0000000000750ef0 > [ T620] PKRU: 55555554 > [ T620] Call Trace: > [ T620] <TASK> > [ T620] ? __warn.cold+0x90/0x9e > [ T620] ? vms_complete_munmap_vmas+0x1d8/0x200 > [ T620] ? report_bug+0xfa/0x140 > [ T620] ? handle_bug+0x53/0x90 > [ T620] ? exc_invalid_op+0x17/0x70 > [ T620] ? asm_exc_invalid_op+0x1a/0x20 > [ T620] ? vms_complete_munmap_vmas+0x1d8/0x200 > [ T620] do_vmi_align_munmap+0x1e0/0x260 > [ T620] do_vmi_munmap+0xbe/0x160 > [ T620] __vm_munmap+0x96/0x110 > [ T620] __x64_sys_munmap+0x16/0x20 > [ T620] do_syscall_64+0x5f/0x170 > [ T620] entry_SYSCALL_64_after_hwframe+0x55/0x5d > [ T620] RIP: 0033:0x7fbd5ed3ec57 > [ T620] Code: 73 01 c3 48 8b 0d c1 71 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e > 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 0b 00 00 00 0f 05 <48> 3d 01 f0 ff ff > 73 01 c3 48 8b 0d 91 71 0d 00 f7 d8 64 89 01 48 > [ T620] RSP: 002b:00007fff0b04d298 EFLAGS: 00000202 ORIG_RAX: 000000000000000b > [ T620] RAX: ffffffffffffffda RBX: ffffffffffffff88 RCX: 00007fbd5ed3ec57 > [ T620] RDX: 0000000000000000 RSI: 0000000000100000 RDI: 00007fbd5ea5f000 > [ T620] RBP: 0000000000000002 R08: 0000000000100000 R09: 0000000000000007 > [ T620] R10: 0000000000000007 R11: 0000000000000202 R12: 00007fff0b04d588 > [ T620] R13: 000055b76c789fc6 R14: 00007fff0b04d360 R15: 00007fff0b04d3c0 > [ T620] </TASK> > [ T620] ---[ end trace 0000000000000000 ]--- > > > Bert Karwatzki
Am Freitag, dem 23.08.2024 um 10:55 +0100 schrieb Lorenzo Stoakes: > On Fri, Aug 23, 2024 at 10:43:11AM GMT, Bert Karwatzki wrote: > > [snip] > > > > @@ -731,21 +708,31 @@ static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, > > > if (vms->unlock) > > > mmap_write_downgrade(mm); > > > > > > - prev = vma_iter_prev_range(vms->vmi); > > > - next = vma_next(vms->vmi); > > > - if (next) > > > - vma_iter_prev_range(vms->vmi); > > > - > > > /* > > > * We can free page tables without write-locking mmap_lock because VMAs > > > * were isolated before we downgraded mmap_lock. > > > */ > > > mas_set(mas_detach, 1); > > > - unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end, > > > - vms->vma_count, !vms->unlock); > > > - /* Statistics and freeing VMAs */ > > > + unmap_region(mm, mas_detach, vms->vma, vms->prev, vms->next, > > > + vms->start, vms->end, vms->vma_count, !vms->unlock); > > > + /* Update high watermark before we lower total_vm */ > > > + update_hiwater_vm(mm); > > > + /* Stat accounting */ > > > + WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm) - vms->nr_pages); > > > + mm->exec_vm -= vms->exec_vm; > > > + mm->stack_vm -= vms->stack_vm; > > > + mm->data_vm -= vms->data_vm; > > > + /* Paranoid bookkeeping */ > > > + VM_WARN_ON(vms->exec_vm > mm->exec_vm); > > > + VM_WARN_ON(vms->stack_vm > mm->stack_vm); > > > + VM_WARN_ON(vms->data_vm > mm->data_vm); > > > + > > > > I'm running the v7 Patchset on linux-next-20240822 and I get lots of these > > errors (right on boot) (both when using the complete patchset and when using > > only the patches up to this): > > Hm curious, I'm running this in qemu with CONFIG_DEBUG_VM set and don't see > this at lesat on next-20240823. > > Liam's series is based on the mseal series by Pedro, not sure if that wasn't in > 22 somehow? > > Can you try with 23, from tip and: > > b4 shazam 20240822192543.3359552-1-Liam.Howlett@oracle.com > > To grab this series just to be sure? > > Because that'd definitely be very weird + concerning and something we hadn't > seen before (I don't think?) for the mm->data_vm to be incorrect... > > > > > [ T620] WARNING: CPU: 6 PID: 620 at mm/vma.c:725 > > vms_complete_munmap_vmas+0x1d8/0x200 > > [ T620] Modules linked in: amd_atl ecc mc sparse_keymap wmi_bmof edac_mce_amd > > snd snd_pci_acp3x k10temp soundcore ccp battery ac button hid_sensor_gyro_3d > > hid_sensor_als hid_sensor_magn_3d hid_sensor_prox hid_sensor_accel_3d > > hid_sensor_trigger industrialio_triggered_buffer kfifo_buf industrialio amd_pmc > > hid_sensor_iio_common joydev evdev serio_raw mt7921e mt7921_common mt792x_lib > > mt76_connac_lib mt76 mac80211 libarc4 cfg80211 rfkill msr nvme_fabrics fuse > > efi_pstore configfs efivarfs autofs4 ext4 crc32c_generic mbcache jbd2 usbhid > > amdgpu i2c_algo_bit drm_ttm_helper ttm drm_exec drm_suballoc_helper amdxcp > > xhci_pci drm_buddy hid_sensor_hub xhci_hcd nvme mfd_core gpu_sched > > hid_multitouch hid_generic crc32c_intel psmouse usbcore i2c_piix4 > > drm_display_helper amd_sfh i2c_hid_acpi i2c_smbus usb_common crc16 nvme_core > > r8169 i2c_hid hid i2c_designware_platform i2c_designware_core > > [ T620] CPU: 6 UID: 0 PID: 620 Comm: fsck.vfat Not tainted 6.11.0-rc4-next- > > 20240822-liamh-v7-00021-gc6686c81601f #322 > > [ T620] Hardware name: Micro-Star International Co., Ltd. Alpha 15 B5EEK/MS- > > 158L, BIOS E158LAMS.107 11/10/2021 > > [ T620] RIP: 0010:vms_complete_munmap_vmas+0x1d8/0x200 > > [ T620] Code: 8b 85 a8 00 00 00 a8 01 74 35 8b 85 e0 00 00 00 48 8d bd a8 00 00 > > 00 83 c0 01 89 85 e0 00 00 00 e8 7d 39 e8 ff e9 63 fe ff ff <0f> 0b e9 eb fe ff > > ff 0f 0b e9 d0 fe ff ff 0f 0b e9 d3 fe ff ff 0f > > [ T620] RSP: 0018:ffffa415c09d7d10 EFLAGS: 00010283 > > [ T620] RAX: 00000000000000cd RBX: ffffa415c09d7d90 RCX: 000000000000018e > > [ T620] RDX: 0000000000000021 RSI: 00000000000019d9 RDI: ffff9073ee7a6400 > > [ T620] RBP: ffff906541341f80 R08: 0000000000000000 R09: 000000000000080a > > [ T620] R10: 000000000001d4de R11: 0000000000000140 R12: ffffa415c09d7d48 > > [ T620] R13: 00007fbd5ea5f000 R14: 00007fbd5eb5efff R15: ffffa415c09d7d90 > > [ T620] FS: 00007fbd5ec38740(0000) GS:ffff9073ee780000(0000) > > knlGS:0000000000000000 > > [ T620] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > [ T620] CR2: 00007fc336339c90 CR3: 000000010a39e000 CR4: 0000000000750ef0 > > [ T620] PKRU: 55555554 > > [ T620] Call Trace: > > [ T620] <TASK> > > [ T620] ? __warn.cold+0x90/0x9e > > [ T620] ? vms_complete_munmap_vmas+0x1d8/0x200 > > [ T620] ? report_bug+0xfa/0x140 > > [ T620] ? handle_bug+0x53/0x90 > > [ T620] ? exc_invalid_op+0x17/0x70 > > [ T620] ? asm_exc_invalid_op+0x1a/0x20 > > [ T620] ? vms_complete_munmap_vmas+0x1d8/0x200 > > [ T620] do_vmi_align_munmap+0x1e0/0x260 > > [ T620] do_vmi_munmap+0xbe/0x160 > > [ T620] __vm_munmap+0x96/0x110 > > [ T620] __x64_sys_munmap+0x16/0x20 > > [ T620] do_syscall_64+0x5f/0x170 > > [ T620] entry_SYSCALL_64_after_hwframe+0x55/0x5d > > [ T620] RIP: 0033:0x7fbd5ed3ec57 > > [ T620] Code: 73 01 c3 48 8b 0d c1 71 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e > > 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 0b 00 00 00 0f 05 <48> 3d 01 f0 ff ff > > 73 01 c3 48 8b 0d 91 71 0d 00 f7 d8 64 89 01 48 > > [ T620] RSP: 002b:00007fff0b04d298 EFLAGS: 00000202 ORIG_RAX: 000000000000000b > > [ T620] RAX: ffffffffffffffda RBX: ffffffffffffff88 RCX: 00007fbd5ed3ec57 > > [ T620] RDX: 0000000000000000 RSI: 0000000000100000 RDI: 00007fbd5ea5f000 > > [ T620] RBP: 0000000000000002 R08: 0000000000100000 R09: 0000000000000007 > > [ T620] R10: 0000000000000007 R11: 0000000000000202 R12: 00007fff0b04d588 > > [ T620] R13: 000055b76c789fc6 R14: 00007fff0b04d360 R15: 00007fff0b04d3c0 > > [ T620] </TASK> > > [ T620] ---[ end trace 0000000000000000 ]--- > > > > > > Bert Karwatzki I grabbed the patches by saving the v7 patch emails as an mbox file and using git am to apply them (which worked without error) and git pull --rebase to update the series to next-20240823 (which works without conflicts). $ git log HEAD~22..HEAD --oneline a060ce2752a8 (HEAD -> liamh_mmap_v7) mm/vma.h: Optimise vma_munmap_struct 62fdaa7f747c mm/vma: Drop incorrect comment from vms_gather_munmap_vmas() 8606e70278c5 mm: Move may_expand_vm() check in mmap_region() fada0fd73e66 ipc/shm, mm: Drop do_vma_munmap() bc57e24e2564 mm/mmap: Use vms accounted pages in mmap_region() 26f203f001eb mm/mmap: Use PHYS_PFN in mmap_region() efe56a49d0ef mm: Change failure of MAP_FIXED to restoring the gap on failure 494d21bcde64 mm/mmap: Avoid zeroing vma tree in mmap_region() ff688d8cec39 mm: Clean up unmap_region() argument list 862b919b20a4 mm/vma: Track start and end for munmap in vma_munmap_struct f406d75d8787 mm/mmap: Reposition vma iterator in mmap_region() 6548fe69d672 mm/vma: Support vma == NULL in init_vma_munmap() 2ff31a2341d2 mm/vma: Expand mmap_region() munmap call 7806ca6562c5 mm/vma: Inline munmap operation in mmap_region() b9659761b35e mm/vma: Extract validate_mm() from vma_complete() 48fde0bebb75 mm/vma: Change munmap to use vma_munmap_struct() for accounting and surrounding vmas 7bb7a27044f0 mm/vma: Introduce vma_munmap_struct for use in munmap operations 3b4885e2e6b2 mm/vma: Extract the gathering of vmas from do_vmi_align_munmap() 427cdb242d36 mm/vma: Introduce vmi_complete_munmap_vmas() 5035f0d0c68b mm/vma: Introduce abort_munmap_vmas() 717dcbdf7521 mm/vma: Correctly position vma_iterator in __split_vma() c79c85875f1a (tag: next-20240823, origin/master, origin/HEAD) Add linux-next specific files for 20240823 Here's a short extract from dmesg (the buffer has already overrun) [ 206.641849] [ T3201] ------------[ cut here ]------------ [ 206.641852] [ T3201] WARNING: CPU: 7 PID: 3201 at mm/vma.c:725 vms_complete_munmap_vmas+0x1d8/0x200 [ 206.641859] [ T3201] Modules linked in: ccm snd_seq_dummy snd_hrtimer snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device rfcomm cpufreq_userspace cpufreq_powersave cpufreq_conservative bnep nls_ascii nls_cp437 vfat fat snd_ctl_led btusb btrtl snd_hda_codec_realtek btintel btbcm snd_hda_codec_generic btmtk snd_hda_scodec_component snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg bluetooth amd_atl uvcvideo snd_hda_codec videobuf2_vmalloc snd_acp3x_pdm_dma snd_soc_dmic snd_acp3x_rn uvc snd_hwdep videobuf2_memops snd_soc_core snd_hda_core videobuf2_v4l2 snd_pcm_oss snd_mixer_oss snd_rn_pci_acp3x videodev snd_acp_config videobuf2_common snd_soc_acpi snd_pcm msi_wmi ecdh_generic ecc mc edac_mce_amd sparse_keymap wmi_bmof snd_timer snd_pci_acp3x snd k10temp soundcore ccp ac battery button hid_sensor_gyro_3d hid_sensor_magn_3d hid_sensor_prox hid_sensor_accel_3d hid_sensor_als hid_sensor_trigger industrialio_triggered_buffer kfifo_buf industrialio amd_pmc hid_sensor_iio_common joydev evdev serio_raw mt7921e [ 206.641927] [ T3201] mt7921_common mt792x_lib mt76_connac_lib mt76 mac80211 libarc4 cfg80211 rfkill msr nvme_fabrics fuse efi_pstore configfs efivarfs autofs4 ext4 crc32c_generic mbcache jbd2 usbhid amdgpu i2c_algo_bit drm_ttm_helper xhci_pci ttm drm_exec drm_suballoc_helper xhci_hcd amdxcp drm_buddy hid_sensor_hub usbcore i2c_piix4 nvme mfd_core gpu_sched hid_multitouch hid_generic crc32c_intel psmouse i2c_hid_acpi i2c_smbus usb_common amd_sfh drm_display_helper nvme_core i2c_hid crc16 r8169 hid i2c_designware_platform i2c_designware_core [ 206.641971] [ T3201] CPU: 7 UID: 0 PID: 3201 Comm: apt-get Tainted: G W 6.11.0-rc4-next-20240823-liamh-v7-00021-ga060ce2752a8 #325 [ 206.641974] [ T3201] Tainted: [W]=WARN [ 206.641976] [ T3201] Hardware name: Micro-Star International Co., Ltd. Alpha 15 B5EEK/MS-158L, BIOS E158LAMS.107 11/10/2021 [ 206.641977] [ T3201] RIP: 0010:vms_complete_munmap_vmas+0x1d8/0x200 [ 206.641980] [ T3201] Code: 8b 85 a8 00 00 00 a8 01 74 35 8b 85 e0 00 00 00 48 8d bd a8 00 00 00 83 c0 01 89 85 e0 00 00 00 e8 3d 43 e8 ff e9 63 fe ff ff <0f> 0b e9 eb fe ff ff 0f 0b e9 d0 fe ff ff 0f 0b e9 d3 fe ff ff 0f [ 206.641982] [ T3201] RSP: 0018:ffffb05784eb7d10 EFLAGS: 00010287 [ 206.641984] [ T3201] RAX: 000000000000015d RBX: ffffb05784eb7d90 RCX: 000000000000087b [ 206.641986] [ T3201] RDX: 0000000000000021 RSI: 00000000000007e2 RDI: ffff9f56ae7e63c0 [ 206.641987] [ T3201] RBP: ffff9f48030a0540 R08: 0000000000000000 R09: 000000000000070d [ 206.641988] [ T3201] R10: 000000000001d4de R11: 0000000000000048 R12: ffffb05784eb7d48 [ 206.641990] [ T3201] R13: 00007f2017000000 R14: 00007f201dbc9fff R15: ffffb05784eb7d90 [ 206.641991] [ T3201] FS: 00007f201e88d880(0000) GS:ffff9f56ae7c0000(0000) knlGS:0000000000000000 [ 206.641993] [ T3201] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 206.641994] [ T3201] CR2: 000055cfbbc10000 CR3: 00000001813b8000 CR4: 0000000000750ef0 [ 206.641995] [ T3201] PKRU: 55555554 [ 206.641996] [ T3201] Call Trace: [ 206.641998] [ T3201] <TASK> [ 206.642001] [ T3201] ? __warn.cold+0x90/0x9e [ 206.642004] [ T3201] ? vms_complete_munmap_vmas+0x1d8/0x200 [ 206.642007] [ T3201] ? report_bug+0xfa/0x140 [ 206.642010] [ T3201] ? handle_bug+0x53/0x90 [ 206.642012] [ T3201] ? exc_invalid_op+0x17/0x70 [ 206.642014] [ T3201] ? asm_exc_invalid_op+0x1a/0x20 [ 206.642018] [ T3201] ? vms_complete_munmap_vmas+0x1d8/0x200 [ 206.642021] [ T3201] do_vmi_align_munmap+0x1e0/0x260 [ 206.642025] [ T3201] do_vmi_munmap+0xbe/0x160 [ 206.642028] [ T3201] __vm_munmap+0x96/0x110 [ 206.642032] [ T3201] __x64_sys_munmap+0x16/0x20 [ 206.642034] [ T3201] do_syscall_64+0x5f/0x170 [ 206.642037] [ T3201] entry_SYSCALL_64_after_hwframe+0x55/0x5d [ 206.642040] [ T3201] RIP: 0033:0x7f201e519c57 [ 206.642042] [ T3201] Code: 73 01 c3 48 8b 0d c1 71 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 0b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 71 0d 00 f7 d8 64 89 01 48 [ 206.642044] [ T3201] RSP: 002b:00007ffd2e02c5f8 EFLAGS: 00000246 ORIG_RAX: 000000000000000b [ 206.642046] [ T3201] RAX: ffffffffffffffda RBX: 000055cfbbc554f0 RCX: 00007f201e519c57 [ 206.642047] [ T3201] RDX: 0000000000000004 RSI: 0000000006bc97c1 RDI: 00007f2017000000 [ 206.642048] [ T3201] RBP: 0000000000000000 R08: 0000000000000005 R09: 0000000000000004 [ 206.642049] [ T3201] R10: 0000000000000007 R11: 0000000000000246 R12: 000055cfbbc54160 [ 206.642050] [ T3201] R13: 000055cfbbbeb198 R14: 000055cfbbc554f0 R15: 00007ffd2e02c6c0 [ 206.642053] [ T3201] </TASK> [ 206.642054] [ T3201] ---[ end trace 0000000000000000 ]--- [ 206.659454] [ T3201] ------------[ cut here ]------------ [ 206.659458] [ T3201] WARNING: CPU: 7 PID: 3201 at mm/vma.c:725 vms_complete_munmap_vmas+0x1d8/0x200 [ 206.659465] [ T3201] Modules linked in: ccm snd_seq_dummy snd_hrtimer snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device rfcomm cpufreq_userspace cpufreq_powersave cpufreq_conservative bnep nls_ascii nls_cp437 vfat fat snd_ctl_led btusb btrtl snd_hda_codec_realtek btintel btbcm snd_hda_codec_generic btmtk snd_hda_scodec_component snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg bluetooth amd_atl uvcvideo snd_hda_codec videobuf2_vmalloc snd_acp3x_pdm_dma snd_soc_dmic snd_acp3x_rn uvc snd_hwdep videobuf2_memops snd_soc_core snd_hda_core videobuf2_v4l2 snd_pcm_oss snd_mixer_oss snd_rn_pci_acp3x videodev snd_acp_config videobuf2_common snd_soc_acpi snd_pcm msi_wmi ecdh_generic ecc mc edac_mce_amd sparse_keymap wmi_bmof snd_timer snd_pci_acp3x snd k10temp soundcore ccp ac battery button hid_sensor_gyro_3d hid_sensor_magn_3d hid_sensor_prox hid_sensor_accel_3d hid_sensor_als hid_sensor_trigger industrialio_triggered_buffer kfifo_buf industrialio amd_pmc hid_sensor_iio_common joydev evdev serio_raw mt7921e [ 206.659530] [ T3201] mt7921_common mt792x_lib mt76_connac_lib mt76 mac80211 libarc4 cfg80211 rfkill msr nvme_fabrics fuse efi_pstore configfs efivarfs autofs4 ext4 crc32c_generic mbcache jbd2 usbhid amdgpu i2c_algo_bit drm_ttm_helper xhci_pci ttm drm_exec drm_suballoc_helper xhci_hcd amdxcp drm_buddy hid_sensor_hub usbcore i2c_piix4 nvme mfd_core gpu_sched hid_multitouch hid_generic crc32c_intel psmouse i2c_hid_acpi i2c_smbus usb_common amd_sfh drm_display_helper nvme_core i2c_hid crc16 r8169 hid i2c_designware_platform i2c_designware_core [ 206.659575] [ T3201] CPU: 7 UID: 0 PID: 3201 Comm: apt-get Tainted: G W 6.11.0-rc4-next-20240823-liamh-v7-00021-ga060ce2752a8 #325 [ 206.659578] [ T3201] Tainted: [W]=WARN [ 206.659580] [ T3201] Hardware name: Micro-Star International Co., Ltd. Alpha 15 B5EEK/MS-158L, BIOS E158LAMS.107 11/10/2021 [ 206.659581] [ T3201] RIP: 0010:vms_complete_munmap_vmas+0x1d8/0x200 [ 206.659584] [ T3201] Code: 8b 85 a8 00 00 00 a8 01 74 35 8b 85 e0 00 00 00 48 8d bd a8 00 00 00 83 c0 01 89 85 e0 00 00 00 e8 3d 43 e8 ff e9 63 fe ff ff <0f> 0b e9 eb fe ff ff 0f 0b e9 d0 fe ff ff 0f 0b e9 d3 fe ff ff 0f [ 206.659586] [ T3201] RSP: 0018:ffffb05784eb7d10 EFLAGS: 00010283 [ 206.659588] [ T3201] RAX: 000000000000015d RBX: ffffb05784eb7d90 RCX: 000000000000087b [ 206.659589] [ T3201] RDX: 0000000000000021 RSI: 0000000000000821 RDI: ffff9f56ae7e63c0 [ 206.659591] [ T3201] RBP: ffff9f48030a0540 R08: 0000000000000000 R09: 00000000000006f2 [ 206.659592] [ T3201] R10: 000000000001d4de R11: 0000000000000048 R12: ffffb05784eb7d48 [ 206.659593] [ T3201] R13: 00007f2017000000 R14: 00007f201dbaffff R15: ffffb05784eb7d90 [ 206.659594] [ T3201] FS: 00007f201e88d880(0000) GS:ffff9f56ae7c0000(0000) knlGS:0000000000000000 [ 206.659596] [ T3201] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 206.659597] [ T3201] CR2: 000055cfbbc28288 CR3: 00000001813b8000 CR4: 0000000000750ef0 [ 206.659599] [ T3201] PKRU: 55555554 [ 206.659600] [ T3201] Call Trace: [ 206.659602] [ T3201] <TASK> [ 206.659604] [ T3201] ? __warn.cold+0x90/0x9e [ 206.659607] [ T3201] ? vms_complete_munmap_vmas+0x1d8/0x200 [ 206.659610] [ T3201] ? report_bug+0xfa/0x140 [ 206.659613] [ T3201] ? handle_bug+0x53/0x90 [ 206.659615] [ T3201] ? exc_invalid_op+0x17/0x70 [ 206.659618] [ T3201] ? asm_exc_invalid_op+0x1a/0x20 [ 206.659621] [ T3201] ? vms_complete_munmap_vmas+0x1d8/0x200 [ 206.659624] [ T3201] do_vmi_align_munmap+0x1e0/0x260 [ 206.659628] [ T3201] do_vmi_munmap+0xbe/0x160 [ 206.659631] [ T3201] __vm_munmap+0x96/0x110 [ 206.659635] [ T3201] __x64_sys_munmap+0x16/0x20 [ 206.659637] [ T3201] do_syscall_64+0x5f/0x170 [ 206.659640] [ T3201] entry_SYSCALL_64_after_hwframe+0x55/0x5d [ 206.659642] [ T3201] RIP: 0033:0x7f201e519c57 [ 206.659644] [ T3201] Code: 73 01 c3 48 8b 0d c1 71 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 0b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 71 0d 00 f7 d8 64 89 01 48 [ 206.659646] [ T3201] RSP: 002b:00007ffd2e02c5f8 EFLAGS: 00000246 ORIG_RAX: 000000000000000b [ 206.659648] [ T3201] RAX: ffffffffffffffda RBX: 000055cfbbbff660 RCX: 00007f201e519c57 [ 206.659649] [ T3201] RDX: 0000000000000004 RSI: 0000000006baf09c RDI: 00007f2017000000 [ 206.659650] [ T3201] RBP: 0000000000000001 R08: 0000000000000007 R09: 0000000000000006 [ 206.659651] [ T3201] R10: 0000000000000007 R11: 0000000000000246 R12: 000055cfbbc60e30 [ 206.659652] [ T3201] R13: 0000000000000044 R14: 000055cfbbbff660 R15: 00007ffd2e02c6c0 [ 206.659655] [ T3201] </TASK> [ 206.659656] [ T3201] ---[ end trace 0000000000000000 ]--- [ 212.679951] [ T3222] ------------[ cut here ]------------ [ 212.679955] [ T3222] WARNING: CPU: 11 PID: 3222 at mm/vma.c:725 vms_complete_munmap_vmas+0x1d8/0x200 [ 212.679963] [ T3222] Modules linked in: ccm snd_seq_dummy snd_hrtimer snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device rfcomm cpufreq_userspace cpufreq_powersave cpufreq_conservative bnep nls_ascii nls_cp437 vfat fat snd_ctl_led btusb btrtl snd_hda_codec_realtek btintel btbcm snd_hda_codec_generic btmtk snd_hda_scodec_component snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg bluetooth amd_atl uvcvideo snd_hda_codec videobuf2_vmalloc snd_acp3x_pdm_dma snd_soc_dmic snd_acp3x_rn uvc snd_hwdep videobuf2_memops snd_soc_core snd_hda_core videobuf2_v4l2 snd_pcm_oss snd_mixer_oss snd_rn_pci_acp3x videodev snd_acp_config videobuf2_common snd_soc_acpi snd_pcm msi_wmi ecdh_generic ecc mc edac_mce_amd sparse_keymap wmi_bmof snd_timer snd_pci_acp3x snd k10temp soundcore ccp ac battery button hid_sensor_gyro_3d hid_sensor_magn_3d hid_sensor_prox hid_sensor_accel_3d hid_sensor_als hid_sensor_trigger industrialio_triggered_buffer kfifo_buf industrialio amd_pmc hid_sensor_iio_common joydev evdev serio_raw mt7921e [ 212.680030] [ T3222] mt7921_common mt792x_lib mt76_connac_lib mt76 mac80211 libarc4 cfg80211 rfkill msr nvme_fabrics fuse efi_pstore configfs efivarfs autofs4 ext4 crc32c_generic mbcache jbd2 usbhid amdgpu i2c_algo_bit drm_ttm_helper xhci_pci ttm drm_exec drm_suballoc_helper xhci_hcd amdxcp drm_buddy hid_sensor_hub usbcore i2c_piix4 nvme mfd_core gpu_sched hid_multitouch hid_generic crc32c_intel psmouse i2c_hid_acpi i2c_smbus usb_common amd_sfh drm_display_helper nvme_core i2c_hid crc16 r8169 hid i2c_designware_platform i2c_designware_core [ 212.680071] [ T3222] CPU: 11 UID: 0 PID: 3222 Comm: apt-extracttemp Tainted: G W 6.11.0-rc4-next-20240823-liamh-v7-00021- ga060ce2752a8 #325 [ 212.680074] [ T3222] Tainted: [W]=WARN [ 212.680076] [ T3222] Hardware name: Micro-Star International Co., Ltd. Alpha 15 B5EEK/MS-158L, BIOS E158LAMS.107 11/10/2021 [ 212.680077] [ T3222] RIP: 0010:vms_complete_munmap_vmas+0x1d8/0x200 [ 212.680080] [ T3222] Code: 8b 85 a8 00 00 00 a8 01 74 35 8b 85 e0 00 00 00 48 8d bd a8 00 00 00 83 c0 01 89 85 e0 00 00 00 e8 3d 43 e8 ff e9 63 fe ff ff <0f> 0b e9 eb fe ff ff 0f 0b e9 d0 fe ff ff 0f 0b e9 d3 fe ff ff 0f [ 212.680082] [ T3222] RSP: 0018:ffffb05785b7fd10 EFLAGS: 00010283 [ 212.680084] [ T3222] RAX: 000000000000093a RBX: ffffb05785b7fd90 RCX: 0000000000000877 [ 212.680085] [ T3222] RDX: 0000000000000021 RSI: 0000000000000ad6 RDI: ffff9f56ae8e63c0 [ 212.680086] [ T3222] RBP: ffff9f4889d178c0 R08: 0000000000000000 R09: 00000000000006a4 [ 212.680088] [ T3222] R10: 000000000001d4de R11: 0000000000000048 R12: ffffb05785b7fd48 [ 212.680089] [ T3222] R13: 00007f1474400000 R14: 00007f147afc9fff R15: ffffb05785b7fd90 [ 212.680090] [ T3222] FS: 00007f147bc6e880(0000) GS:ffff9f56ae8c0000(0000) knlGS:0000000000000000 [ 212.680092] [ T3222] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 212.680093] [ T3222] CR2: 0000564415171000 CR3: 0000000223e48000 CR4: 0000000000750ef0 [ 212.680094] [ T3222] PKRU: 55555554 [ 212.680095] [ T3222] Call Trace: [ 212.680097] [ T3222] <TASK> [ 212.680100] [ T3222] ? __warn.cold+0x90/0x9e [ 212.680103] [ T3222] ? vms_complete_munmap_vmas+0x1d8/0x200 [ 212.680106] [ T3222] ? report_bug+0xfa/0x140 [ 212.680109] [ T3222] ? handle_bug+0x53/0x90 [ 212.680111] [ T3222] ? exc_invalid_op+0x17/0x70 [ 212.680113] [ T3222] ? asm_exc_invalid_op+0x1a/0x20 [ 212.680117] [ T3222] ? vms_complete_munmap_vmas+0x1d8/0x200 [ 212.680119] [ T3222] do_vmi_align_munmap+0x1e0/0x260 [ 212.680124] [ T3222] do_vmi_munmap+0xbe/0x160 [ 212.680126] [ T3222] __vm_munmap+0x96/0x110 [ 212.680130] [ T3222] __x64_sys_munmap+0x16/0x20 [ 212.680132] [ T3222] do_syscall_64+0x5f/0x170 [ 212.680135] [ T3222] entry_SYSCALL_64_after_hwframe+0x55/0x5d [ 212.680137] [ T3222] RIP: 0033:0x7f147b919c57 [ 212.680139] [ T3222] Code: 73 01 c3 48 8b 0d c1 71 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 0b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 91 71 0d 00 f7 d8 64 89 01 48 [ 212.680140] [ T3222] RSP: 002b:00007ffc1a6d5ae8 EFLAGS: 00000246 ORIG_RAX: 000000000000000b [ 212.680142] [ T3222] RAX: ffffffffffffffda RBX: 00005644150b34a0 RCX: 00007f147b919c57 [ 212.680144] [ T3222] RDX: 0000000000000004 RSI: 0000000006bc983b RDI: 00007f1474400000 [ 212.680145] [ T3222] RBP: 00007ffc1a6d5d90 R08: 0000000564415156 R09: 0000000000000007 [ 212.680146] [ T3222] R10: 0000000000000007 R11: 0000000000000246 R12: 00007ffc1a6d5c40 [ 212.680147] [ T3222] R13: 0000000000000011 R14: 0000000000000010 R15: 00007ffc1a6d5bc0 [ 212.680150] [ T3222] </TASK> [ 212.680150] [ T3222] ---[ end trace 0000000000000000 ]--- These messages aside everything seems to work (I sending this email using the affected kernel) so I'm wondering if the checks aren't a little too paranoid. By the way: These 6 patches by Pedro Falcato are present in linux-next-20240822, too: mm: remove can_modify_mm() Pedro Falcato mseal: replace can_modify_mm_madv with a vma variant Pedro Falcato mm/mremap: replace can_modify_mm with can_modify_vma Pedro Falcato mm/mprotect: replace can_modify_mm with can_modify_vma Pedro Falcato mm/munmap: replace can_modify_mm with can_modify_vma Pedro Falcato mm: move can_modify_vma to mm/vma.h Pedro Falcato Bert Karwatzki
On Fri, Aug 23, 2024 at 10:43:11AM GMT, Bert Karwatzki wrote: [snip] > > @@ -731,21 +708,31 @@ static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, > > if (vms->unlock) > > mmap_write_downgrade(mm); > > > > - prev = vma_iter_prev_range(vms->vmi); > > - next = vma_next(vms->vmi); > > - if (next) > > - vma_iter_prev_range(vms->vmi); > > - > > /* > > * We can free page tables without write-locking mmap_lock because VMAs > > * were isolated before we downgraded mmap_lock. > > */ > > mas_set(mas_detach, 1); > > - unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end, > > - vms->vma_count, !vms->unlock); > > - /* Statistics and freeing VMAs */ > > + unmap_region(mm, mas_detach, vms->vma, vms->prev, vms->next, > > + vms->start, vms->end, vms->vma_count, !vms->unlock); > > + /* Update high watermark before we lower total_vm */ > > + update_hiwater_vm(mm); > > + /* Stat accounting */ > > + WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm) - vms->nr_pages); > > + mm->exec_vm -= vms->exec_vm; > > + mm->stack_vm -= vms->stack_vm; > > + mm->data_vm -= vms->data_vm; > > + /* Paranoid bookkeeping */ > > + VM_WARN_ON(vms->exec_vm > mm->exec_vm); > > + VM_WARN_ON(vms->stack_vm > mm->stack_vm); > > + VM_WARN_ON(vms->data_vm > mm->data_vm); Hang on... I didn't read this closely enough (clearly!) we're doing these checks _after_ we decrement the counters, which is... not correct :) Your processes must be reducing their data_vm value to something less than what was reduced during the munmap operation. Liam - I suggest we put these checks before we decrement. > > + > > I'm running the v7 Patchset on linux-next-20240822 and I get lots of these > errors (right on boot) (both when using the complete patchset and when using > only the patches up to this): > > [ T620] WARNING: CPU: 6 PID: 620 at mm/vma.c:725 > vms_complete_munmap_vmas+0x1d8/0x200 > [ T620] Modules linked in: amd_atl ecc mc sparse_keymap wmi_bmof edac_mce_amd > snd snd_pci_acp3x k10temp soundcore ccp battery ac button hid_sensor_gyro_3d > hid_sensor_als hid_sensor_magn_3d hid_sensor_prox hid_sensor_accel_3d > hid_sensor_trigger industrialio_triggered_buffer kfifo_buf industrialio amd_pmc > hid_sensor_iio_common joydev evdev serio_raw mt7921e mt7921_common mt792x_lib > mt76_connac_lib mt76 mac80211 libarc4 cfg80211 rfkill msr nvme_fabrics fuse > efi_pstore configfs efivarfs autofs4 ext4 crc32c_generic mbcache jbd2 usbhid > amdgpu i2c_algo_bit drm_ttm_helper ttm drm_exec drm_suballoc_helper amdxcp > xhci_pci drm_buddy hid_sensor_hub xhci_hcd nvme mfd_core gpu_sched > hid_multitouch hid_generic crc32c_intel psmouse usbcore i2c_piix4 > drm_display_helper amd_sfh i2c_hid_acpi i2c_smbus usb_common crc16 nvme_core > r8169 i2c_hid hid i2c_designware_platform i2c_designware_core > [ T620] CPU: 6 UID: 0 PID: 620 Comm: fsck.vfat Not tainted 6.11.0-rc4-next- > 20240822-liamh-v7-00021-gc6686c81601f #322 > [ T620] Hardware name: Micro-Star International Co., Ltd. Alpha 15 B5EEK/MS- > 158L, BIOS E158LAMS.107 11/10/2021 > [ T620] RIP: 0010:vms_complete_munmap_vmas+0x1d8/0x200 > [ T620] Code: 8b 85 a8 00 00 00 a8 01 74 35 8b 85 e0 00 00 00 48 8d bd a8 00 00 > 00 83 c0 01 89 85 e0 00 00 00 e8 7d 39 e8 ff e9 63 fe ff ff <0f> 0b e9 eb fe ff > ff 0f 0b e9 d0 fe ff ff 0f 0b e9 d3 fe ff ff 0f > [ T620] RSP: 0018:ffffa415c09d7d10 EFLAGS: 00010283 > [ T620] RAX: 00000000000000cd RBX: ffffa415c09d7d90 RCX: 000000000000018e > [ T620] RDX: 0000000000000021 RSI: 00000000000019d9 RDI: ffff9073ee7a6400 > [ T620] RBP: ffff906541341f80 R08: 0000000000000000 R09: 000000000000080a > [ T620] R10: 000000000001d4de R11: 0000000000000140 R12: ffffa415c09d7d48 > [ T620] R13: 00007fbd5ea5f000 R14: 00007fbd5eb5efff R15: ffffa415c09d7d90 > [ T620] FS: 00007fbd5ec38740(0000) GS:ffff9073ee780000(0000) > knlGS:0000000000000000 > [ T620] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ T620] CR2: 00007fc336339c90 CR3: 000000010a39e000 CR4: 0000000000750ef0 > [ T620] PKRU: 55555554 > [ T620] Call Trace: > [ T620] <TASK> > [ T620] ? __warn.cold+0x90/0x9e > [ T620] ? vms_complete_munmap_vmas+0x1d8/0x200 > [ T620] ? report_bug+0xfa/0x140 > [ T620] ? handle_bug+0x53/0x90 > [ T620] ? exc_invalid_op+0x17/0x70 > [ T620] ? asm_exc_invalid_op+0x1a/0x20 > [ T620] ? vms_complete_munmap_vmas+0x1d8/0x200 > [ T620] do_vmi_align_munmap+0x1e0/0x260 > [ T620] do_vmi_munmap+0xbe/0x160 > [ T620] __vm_munmap+0x96/0x110 > [ T620] __x64_sys_munmap+0x16/0x20 > [ T620] do_syscall_64+0x5f/0x170 > [ T620] entry_SYSCALL_64_after_hwframe+0x55/0x5d > [ T620] RIP: 0033:0x7fbd5ed3ec57 > [ T620] Code: 73 01 c3 48 8b 0d c1 71 0d 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e > 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 b8 0b 00 00 00 0f 05 <48> 3d 01 f0 ff ff > 73 01 c3 48 8b 0d 91 71 0d 00 f7 d8 64 89 01 48 > [ T620] RSP: 002b:00007fff0b04d298 EFLAGS: 00000202 ORIG_RAX: 000000000000000b > [ T620] RAX: ffffffffffffffda RBX: ffffffffffffff88 RCX: 00007fbd5ed3ec57 > [ T620] RDX: 0000000000000000 RSI: 0000000000100000 RDI: 00007fbd5ea5f000 > [ T620] RBP: 0000000000000002 R08: 0000000000100000 R09: 0000000000000007 > [ T620] R10: 0000000000000007 R11: 0000000000000202 R12: 00007fff0b04d588 > [ T620] R13: 000055b76c789fc6 R14: 00007fff0b04d360 R15: 00007fff0b04d3c0 > [ T620] </TASK> > [ T620] ---[ end trace 0000000000000000 ]--- > > > Bert Karwatzki
On Fri, Aug 23, 2024 at 12:42:18PM GMT, Bert Karwatzki wrote: > Am Freitag, dem 23.08.2024 um 10:55 +0100 schrieb Lorenzo Stoakes: [snip] > > On Fri, Aug 23, 2024 at 10:43:11AM GMT, Bert Karwatzki wrote: > > > > [snip] > > > I'm running the v7 Patchset on linux-next-20240822 and I get lots of these > > > errors (right on boot) (both when using the complete patchset and when using > > > only the patches up to this): > > > > Hm curious, I'm running this in qemu with CONFIG_DEBUG_VM set and don't see > > this at lesat on next-20240823. > > > > Liam's series is based on the mseal series by Pedro, not sure if that wasn't in > > 22 somehow? > > > > Can you try with 23, from tip and: > > > > b4 shazam 20240822192543.3359552-1-Liam.Howlett@oracle.com > > > > To grab this series just to be sure? > > > > Because that'd definitely be very weird + concerning and something we hadn't > > seen before (I don't think?) for the mm->data_vm to be incorrect... > > > > > [snip] > > I grabbed the patches by saving the v7 patch emails as an mbox file and using > git am to apply them (which worked without error) and git pull --rebase to > update the series to next-20240823 (which works without conflicts). Thanks, you are right, see other thread for an explanation. Good spot! [snip - for brevity cutting dmesg logs, but much appreciated!] These messages aside everything seems to work (I sending this email using the > affected kernel) so I'm wondering if the checks aren't a little too paranoid. Just to make the point - if these checks were the correct way around, this would indicate that key mm counters are underflowing, which would be very serious indeed - so just paranoid enough :) The paranoia is because this should never happen. > > By the way: These 6 patches by Pedro Falcato are present in linux-next-20240822, > too: > > mm: remove can_modify_mm() Pedro Falcato > mseal: replace can_modify_mm_madv with a vma variant Pedro Falcato > mm/mremap: replace can_modify_mm with can_modify_vma Pedro Falcato > mm/mprotect: replace can_modify_mm with can_modify_vma Pedro Falcato > mm/munmap: replace can_modify_mm with can_modify_vma Pedro Falcato > mm: move can_modify_vma to mm/vma.h Pedro Falcato > > Bert Karwatzki > Thanks for confirming!
diff --git a/mm/vma.c b/mm/vma.c index e1aee43a3dc4..58604fe3bd03 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -103,7 +103,8 @@ static inline void init_vma_munmap(struct vma_munmap_struct *vms, vms->unlock = unlock; vms->uf = uf; vms->vma_count = 0; - vms->nr_pages = vms->locked_vm = 0; + vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0; + vms->exec_vm = vms->stack_vm = vms->data_vm = 0; } /* @@ -299,30 +300,6 @@ static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, return __split_vma(vmi, vma, addr, new_below); } -/* - * Ok - we have the memory areas we should free on a maple tree so release them, - * and do the vma updates. - * - * Called with the mm semaphore held. - */ -static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) -{ - unsigned long nr_accounted = 0; - struct vm_area_struct *vma; - - /* Update high watermark before we lower total_vm */ - update_hiwater_vm(mm); - mas_for_each(mas, vma, ULONG_MAX) { - long nrpages = vma_pages(vma); - - if (vma->vm_flags & VM_ACCOUNT) - nr_accounted += nrpages; - vm_stat_account(mm, vma->vm_flags, -nrpages); - remove_vma(vma, false); - } - vm_unacct_memory(nr_accounted); -} - /* * init_vma_prep() - Initializer wrapper for vma_prepare struct * @vp: The vma_prepare struct @@ -722,7 +699,7 @@ static inline void abort_munmap_vmas(struct ma_state *mas_detach) static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, struct ma_state *mas_detach) { - struct vm_area_struct *prev, *next; + struct vm_area_struct *vma; struct mm_struct *mm; mm = vms->mm; @@ -731,21 +708,31 @@ static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, if (vms->unlock) mmap_write_downgrade(mm); - prev = vma_iter_prev_range(vms->vmi); - next = vma_next(vms->vmi); - if (next) - vma_iter_prev_range(vms->vmi); - /* * We can free page tables without write-locking mmap_lock because VMAs * were isolated before we downgraded mmap_lock. */ mas_set(mas_detach, 1); - unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end, - vms->vma_count, !vms->unlock); - /* Statistics and freeing VMAs */ + unmap_region(mm, mas_detach, vms->vma, vms->prev, vms->next, + vms->start, vms->end, vms->vma_count, !vms->unlock); + /* Update high watermark before we lower total_vm */ + update_hiwater_vm(mm); + /* Stat accounting */ + WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm) - vms->nr_pages); + mm->exec_vm -= vms->exec_vm; + mm->stack_vm -= vms->stack_vm; + mm->data_vm -= vms->data_vm; + /* Paranoid bookkeeping */ + VM_WARN_ON(vms->exec_vm > mm->exec_vm); + VM_WARN_ON(vms->stack_vm > mm->stack_vm); + VM_WARN_ON(vms->data_vm > mm->data_vm); + + /* Remove and clean up vmas */ mas_set(mas_detach, 0); - remove_mt(mm, mas_detach); + mas_for_each(mas_detach, vma, ULONG_MAX) + remove_vma(vma, false); + + vm_unacct_memory(vms->nr_accounted); validate_mm(mm); if (vms->unlock) mmap_read_unlock(mm); @@ -799,18 +786,19 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, if (error) goto start_split_failed; } + vms->prev = vma_prev(vms->vmi); /* * Detach a range of VMAs from the mm. Using next as a temp variable as * it is always overwritten. */ - next = vms->vma; - do { + for_each_vma_range(*(vms->vmi), next, vms->end) { + long nrpages; + if (!can_modify_vma(next)) { error = -EPERM; goto modify_vma_failed; } - /* Does it split the end? */ if (next->vm_end > vms->end) { error = __split_vma(vms->vmi, next, vms->end, 0); @@ -824,8 +812,21 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, goto munmap_gather_failed; vma_mark_detached(next, true); + nrpages = vma_pages(next); + + vms->nr_pages += nrpages; if (next->vm_flags & VM_LOCKED) - vms->locked_vm += vma_pages(next); + vms->locked_vm += nrpages; + + if (next->vm_flags & VM_ACCOUNT) + vms->nr_accounted += nrpages; + + if (is_exec_mapping(next->vm_flags)) + vms->exec_vm += nrpages; + else if (is_stack_mapping(next->vm_flags)) + vms->stack_vm += nrpages; + else if (is_data_mapping(next->vm_flags)) + vms->data_vm += nrpages; if (unlikely(vms->uf)) { /* @@ -847,7 +848,9 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, BUG_ON(next->vm_start < vms->start); BUG_ON(next->vm_start > vms->end); #endif - } for_each_vma_range(*(vms->vmi), next, vms->end); + } + + vms->next = vma_next(vms->vmi); #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) /* Make sure no VMAs are about to be lost. */ diff --git a/mm/vma.h b/mm/vma.h index cb67acf59012..cbf55e0e0c4f 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -33,12 +33,18 @@ struct vma_munmap_struct { struct vma_iterator *vmi; struct mm_struct *mm; struct vm_area_struct *vma; /* The first vma to munmap */ + struct vm_area_struct *prev; /* vma before the munmap area */ + struct vm_area_struct *next; /* vma after the munmap area */ struct list_head *uf; /* Userfaultfd list_head */ unsigned long start; /* Aligned start addr (inclusive) */ unsigned long end; /* Aligned end addr (exclusive) */ int vma_count; /* Number of vmas that will be removed */ unsigned long nr_pages; /* Number of pages being removed */ unsigned long locked_vm; /* Number of locked pages */ + unsigned long nr_accounted; /* Number of VM_ACCOUNT pages */ + unsigned long exec_vm; + unsigned long stack_vm; + unsigned long data_vm; bool unlock; /* Unlock after the munmap */ };