diff mbox series

[RFC,2/2] mm/page_alloc: free_pcppages_bulk clean up

Message ID 20230817-free_pcppages_bulk-v1-2-c14574a9f80c@kernel.org (mailing list archive)
State New
Headers show
Series mm/page_alloc: free_pcppages_bulk safeguard | expand

Commit Message

Chris Li Aug. 18, 2023, 6:05 a.m. UTC
This patch does not have functional change. Just pure clean up.

It removes the pindex_max and pindex_min and replaces it with a simpler
loop.

It uses for_each_entry_safe_reverse() to replace the loop over
list_last_entry(). It produces slightly better machine code.

Signed-off-by: Chris Li <chrisl@kernel.org>
---
 mm/page_alloc.c | 38 +++++++++++++-------------------------
 1 file changed, 13 insertions(+), 25 deletions(-)

Comments

kernel test robot Aug. 24, 2023, 6:28 a.m. UTC | #1
hi, Chris Li,

we noticed "This patch does not have functional change" in commit message.
however, the issue seems keep happening randomly while we run up to 100 times.
at the same time, the parent keeps clean.

55aea7978bd8df28 3373e582e78e8aaaf8977b42bc8
---------------- ---------------------------
       fail:runs  %reproduction    fail:runs
           |             |             |
           :100         12%          12:100   dmesg.BUG:kernel_NULL_pointer_dereference,address
           :100          3%           3:100   dmesg.BUG:unable_to_handle_page_fault_for_address
           :100         16%          16:100   dmesg.EIP:free_pcppages_bulk
           :100         15%          15:100   dmesg.Kernel_panic-not_syncing:Fatal_exception
           :100          1%           1:100   dmesg.Kernel_panic-not_syncing:Fatal_exception_in_interrupt
           :100         16%          16:100   dmesg.Oops:#[##]
           :100         16%          16:100   dmesg.boot_failures

and since there is
[   15.898250][    C0] EIP: free_pcppages_bulk+0x7d/0x200
and free_pcppages_bulk() is changed in this commit, we just report this to you.
FYI


Hello,

kernel test robot noticed "BUG:kernel_NULL_pointer_dereference,address" on:

commit: 3373e582e78e8aaaf8977b42bc8edd8487310033 ("[PATCH RFC 2/2] mm/page_alloc: free_pcppages_bulk clean up")
url: https://github.com/intel-lab-lkp/linux/commits/Chris-Li/mm-page_alloc-safeguard-free_pcppages_bulk/20230818-140815
patch link: https://lore.kernel.org/all/20230817-free_pcppages_bulk-v1-2-c14574a9f80c@kernel.org/
patch subject: [PATCH RFC 2/2] mm/page_alloc: free_pcppages_bulk clean up

in testcase: boot

compiler: gcc-12
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202308241221.cc5ac84a-oliver.sang@intel.com



[   15.890907][    C0] BUG: kernel NULL pointer dereference, address: 00000005
[   15.891555][    C0] #PF: supervisor read access in kernel mode
[   15.892037][    C0] #PF: error_code(0x0000) - not-present page
[   15.893161][    C0] *pdpt = 000000002c9e7001 *pde = 0000000000000000
[   15.894410][    C0] Oops: 0000 [#1] SMP PTI
[   15.895385][    C0] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G            E      6.5.0-rc4-00242-g3373e582e78e #1
[   15.896801][    C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[   15.898250][    C0] EIP: free_pcppages_bulk+0x7d/0x200
[   15.899281][    C0] Code: 8d 34 c6 89 75 e4 83 f8 0c 0f 84 ee 00 00 00 ba ab aa aa aa f7 e2 b8 01 00 00 00 89 d1 d1 e9 d3 e0 89 45 e0 8b 45 e4 8b 50 04 <8b> 72 04
 8d 5a fc 83 ee 04 39 c2 0f 84 0a 01 00 00 89 4d e8 eb 28
[   15.914130][    C0] EAX: e4c7520c EBX: e4c9f510 ECX: 55555555 EDX: 00000001
[   15.915477][    C0] ESI: e4c7520c EDI: e4c75200 EBP: c1819d98 ESP: c1819d60
[   15.916755][    C0] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00210006
[   15.918067][    C0] CR0: 80050033 CR2: 00000005 CR3: 2ca6a000 CR4: 000406f0
[   15.919278][    C0] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[   15.920468][    C0] DR6: fffe0ff0 DR7: 00000400
[   15.921446][    C0] Call Trace:
[   15.922298][    C0]  <SOFTIRQ>
[   15.923149][    C0]  ? show_regs+0x55/0x80
[   15.924069][    C0]  ? __die+0x1d/0x80
[   15.924934][    C0]  ? page_fault_oops+0x65/0xc0
[   15.925890][    C0]  ? kernelmode_fixup_or_oops+0x73/0x100
[   15.926986][    C0]  ? __bad_area_nosemaphore+0xdc/0x1c0
[   15.928086][    C0]  ? bad_area_nosemaphore+0xf/0x40
[   15.929061][    C0]  ? do_user_addr_fault+0x1ac/0x3c0
[   15.930036][    C0]  ? exc_page_fault+0x51/0x140
[   15.930957][    C0]  ? pvclock_clocksource_read_nowd+0x140/0x140
[   15.931997][    C0]  ? handle_exception+0x133/0x133
[   15.932938][    C0]  ? pmd_clear_huge+0x7b/0x80
[   15.934885][    C0]  ? pvclock_clocksource_read_nowd+0x140/0x140
[   15.935963][    C0]  ? free_pcppages_bulk+0x7d/0x200
[   15.936915][    C0]  ? pvclock_clocksource_read_nowd+0x140/0x140
[   15.937954][    C0]  ? free_pcppages_bulk+0x7d/0x200
[   15.938876][    C0]  free_unref_page_commit+0x120/0x180
[   15.939826][    C0]  free_unref_page+0xe7/0x100
[   15.940688][    C0]  __free_pages+0x87/0xc0
[   15.941519][    C0]  __free_slab+0xa1/0x100
[   15.942383][    C0]  free_slab+0x27/0xc0
[   15.943187][    C0]  discard_slab+0x38/0x40
[   15.944014][    C0]  __unfreeze_partials+0x20c/0x240
[   15.946439][    C0]  put_cpu_partial+0x5b/0x80
[   15.947347][    C0]  __slab_free+0x287/0x380
[   15.948207][    C0]  ? __mod_memcg_lruvec_state+0x3e/0x80
[   15.949138][    C0]  kmem_cache_free+0x329/0x340
[   15.950003][    C0]  ? mt_free_rcu+0x10/0x40
[   15.950805][    C0]  ? free_task+0x4d/0x80
[   15.951594][    C0]  ? mt_free_rcu+0x10/0x40
[   15.952390][    C0]  mt_free_rcu+0x10/0x40
[   15.953181][    C0]  rcu_do_batch+0x158/0x440
[   15.953998][    C0]  rcu_core+0xce/0x1c0
[   15.954751][    C0]  rcu_core_si+0xd/0x40
[   15.955512][    C0]  __do_softirq+0xad/0x233
[   15.956293][    C0]  ? __lock_text_end+0x3/0x3
[   15.957084][    C0]  call_on_stack+0x45/0x80
[   15.957871][    C0]  </SOFTIRQ>
[   15.958525][    C0]  ? irq_exit_rcu+0x6a/0xc0
[   15.959284][    C0]  ? sysvec_apic_timer_interrupt+0x27/0x40
[   15.960136][    C0]  ? handle_exception+0x133/0x133
[   15.960900][    C0]  ? alarm_handle_timer+0xfb/0x100
[   15.961669][    C0]  ? sysvec_call_function_single+0x40/0x40
[   15.962542][    C0]  ? default_idle+0xb/0x40
[   15.963234][    C0]  ? sysvec_call_function_single+0x40/0x40
[   15.964026][    C0]  ? default_idle+0xb/0x40
[   15.964703][    C0]  ? arch_cpu_idle+0x8/0x40
[   15.965379][    C0]  ? default_idle_call+0x2a/0xc0
[   15.967867][    C0]  ? cpuidle_idle_call+0x122/0x180
[   15.968680][    C0]  ? do_idle+0x79/0xc0
[   15.969357][    C0]  ? cpu_startup_entry+0x25/0x40
[   15.970106][    C0]  ? rest_init+0x96/0xc0
[   15.970770][    C0]  ? arch_call_rest_init+0xd/0x80
[   15.971505][    C0]  ? start_kernel+0x347/0x480
[   15.972209][    C0]  ? early_idt_handler_common+0x44/0x44
[   15.972969][    C0]  ? i386_start_kernel+0x48/0x80
[   15.973678][    C0]  ? startup_32_smp+0x156/0x158
[   15.974397][    C0] Modules linked in: intel_rapl_msr(E) intel_rapl_common(E) ata_generic(E) ppdev(E) crc32_pclmul(E) crc32c_intel(E) aesni_intel(E) ipmi_devintf(
E) ipmi_msghandler(E) crypto_simd(E) ata_piix(E) cryptd(E) rapl(E) i2c_piix4(E) psmouse(E) evdev(E) serio_raw(E) bochs(E) drm_vram_helper(E) drm_kms_helper(E) drm_tt
m_helper(E) ttm(E) libata(E) parport_pc(E) floppy(E) parport(E) qemu_fw_cfg(E) button(E) drm(E) configfs(E) fuse(E) autofs4(E)
[   15.978905][    C0] CR2: 0000000000000005
[   15.979621][    C0] ---[ end trace 0000000000000000 ]---
[   15.980419][    C0] EIP: free_pcppages_bulk+0x7d/0x200
[   15.981222][    C0] Code: 8d 34 c6 89 75 e4 83 f8 0c 0f 84 ee 00 00 00 ba ab aa aa aa f7 e2 b8 01 00 00 00 89 d1 d1 e9 d3 e0 89 45 e0 8b 45 e4 8b 50 04 <8b> 72 04
 8d 5a fc 83 ee 04 39 c2 0f 84 0a 01 00 00 89 4d e8 eb 28
[   15.983663][    C0] EAX: e4c7520c EBX: e4c9f510 ECX: 55555555 EDX: 00000001
[   15.984705][    C0] ESI: e4c7520c EDI: e4c75200 EBP: c1819d98 ESP: c1819d60
[   15.985738][    C0] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00210006
[   15.986822][    C0] CR0: 80050033 CR2: 00000005 CR3: 2ca6a000 CR4: 000406f0
[   15.987832][    C0] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
[   15.988839][    C0] DR6: fffe0ff0 DR7: 00000400
[   15.989679][    C0] Kernel panic - not syncing: Fatal exception in interrupt
[   15.996118][    C0] Kernel Offset: disabled



The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20230824/202308241221.cc5ac84a-oliver.sang@intel.com
Chris Li Aug. 24, 2023, 3:25 p.m. UTC | #2
Hi Oliver,

Indeed, that is my bad. Thanks for reporting it.

The patch has been sitting in my tree for a very long time.
When I adopt it for the later kernel, I accidentally drop this chunk:

-                       if (!list_empty(list))
-                               break;

I think that is what is missing. I will address that in the V2 and do
more testing before I send it out.

On Wed, Aug 23, 202

>
> in testcase: boot
>
> compiler: gcc-12
> test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
>
> (please refer to attached dmesg/kmsg for entire log/backtrace)

The kernel test robot is very interesting.
I see that is how to start the qemu system.

How do I invoke the test once I have qemu up and running with my testing kernel?

I want to replicate it before I send out the V2 version.

Chris

>
>
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <oliver.sang@intel.com>
> | Closes: https://lore.kernel.org/oe-lkp/202308241221.cc5ac84a-oliver.sang@intel.com
>
>
>
> [   15.890907][    C0] BUG: kernel NULL pointer dereference, address: 00000005
> [   15.891555][    C0] #PF: supervisor read access in kernel mode
> [   15.892037][    C0] #PF: error_code(0x0000) - not-present page
> [   15.893161][    C0] *pdpt = 000000002c9e7001 *pde = 0000000000000000
> [   15.894410][    C0] Oops: 0000 [#1] SMP PTI
> [   15.895385][    C0] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G            E      6.5.0-rc4-00242-g3373e582e78e #1
> [   15.896801][    C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
> [   15.898250][    C0] EIP: free_pcppages_bulk+0x7d/0x200
> [   15.899281][    C0] Code: 8d 34 c6 89 75 e4 83 f8 0c 0f 84 ee 00 00 00 ba ab aa aa aa f7 e2 b8 01 00 00 00 89 d1 d1 e9 d3 e0 89 45 e0 8b 45 e4 8b 50 04 <8b> 72 04
>  8d 5a fc 83 ee 04 39 c2 0f 84 0a 01 00 00 89 4d e8 eb 28
> [   15.914130][    C0] EAX: e4c7520c EBX: e4c9f510 ECX: 55555555 EDX: 00000001
> [   15.915477][    C0] ESI: e4c7520c EDI: e4c75200 EBP: c1819d98 ESP: c1819d60
> [   15.916755][    C0] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00210006
> [   15.918067][    C0] CR0: 80050033 CR2: 00000005 CR3: 2ca6a000 CR4: 000406f0
> [   15.919278][    C0] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
> [   15.920468][    C0] DR6: fffe0ff0 DR7: 00000400
> [   15.921446][    C0] Call Trace:
> [   15.922298][    C0]  <SOFTIRQ>
> [   15.923149][    C0]  ? show_regs+0x55/0x80
> [   15.924069][    C0]  ? __die+0x1d/0x80
> [   15.924934][    C0]  ? page_fault_oops+0x65/0xc0
> [   15.925890][    C0]  ? kernelmode_fixup_or_oops+0x73/0x100
> [   15.926986][    C0]  ? __bad_area_nosemaphore+0xdc/0x1c0
> [   15.928086][    C0]  ? bad_area_nosemaphore+0xf/0x40
> [   15.929061][    C0]  ? do_user_addr_fault+0x1ac/0x3c0
> [   15.930036][    C0]  ? exc_page_fault+0x51/0x140
> [   15.930957][    C0]  ? pvclock_clocksource_read_nowd+0x140/0x140
> [   15.931997][    C0]  ? handle_exception+0x133/0x133
> [   15.932938][    C0]  ? pmd_clear_huge+0x7b/0x80
> [   15.934885][    C0]  ? pvclock_clocksource_read_nowd+0x140/0x140
> [   15.935963][    C0]  ? free_pcppages_bulk+0x7d/0x200
> [   15.936915][    C0]  ? pvclock_clocksource_read_nowd+0x140/0x140
> [   15.937954][    C0]  ? free_pcppages_bulk+0x7d/0x200
> [   15.938876][    C0]  free_unref_page_commit+0x120/0x180
> [   15.939826][    C0]  free_unref_page+0xe7/0x100
> [   15.940688][    C0]  __free_pages+0x87/0xc0
> [   15.941519][    C0]  __free_slab+0xa1/0x100
> [   15.942383][    C0]  free_slab+0x27/0xc0
> [   15.943187][    C0]  discard_slab+0x38/0x40
> [   15.944014][    C0]  __unfreeze_partials+0x20c/0x240
> [   15.946439][    C0]  put_cpu_partial+0x5b/0x80
> [   15.947347][    C0]  __slab_free+0x287/0x380
> [   15.948207][    C0]  ? __mod_memcg_lruvec_state+0x3e/0x80
> [   15.949138][    C0]  kmem_cache_free+0x329/0x340
> [   15.950003][    C0]  ? mt_free_rcu+0x10/0x40
> [   15.950805][    C0]  ? free_task+0x4d/0x80
> [   15.951594][    C0]  ? mt_free_rcu+0x10/0x40
> [   15.952390][    C0]  mt_free_rcu+0x10/0x40
> [   15.953181][    C0]  rcu_do_batch+0x158/0x440
> [   15.953998][    C0]  rcu_core+0xce/0x1c0
> [   15.954751][    C0]  rcu_core_si+0xd/0x40
> [   15.955512][    C0]  __do_softirq+0xad/0x233
> [   15.956293][    C0]  ? __lock_text_end+0x3/0x3
> [   15.957084][    C0]  call_on_stack+0x45/0x80
> [   15.957871][    C0]  </SOFTIRQ>
> [   15.958525][    C0]  ? irq_exit_rcu+0x6a/0xc0
> [   15.959284][    C0]  ? sysvec_apic_timer_interrupt+0x27/0x40
> [   15.960136][    C0]  ? handle_exception+0x133/0x133
> [   15.960900][    C0]  ? alarm_handle_timer+0xfb/0x100
> [   15.961669][    C0]  ? sysvec_call_function_single+0x40/0x40
> [   15.962542][    C0]  ? default_idle+0xb/0x40
> [   15.963234][    C0]  ? sysvec_call_function_single+0x40/0x40
> [   15.964026][    C0]  ? default_idle+0xb/0x40
> [   15.964703][    C0]  ? arch_cpu_idle+0x8/0x40
> [   15.965379][    C0]  ? default_idle_call+0x2a/0xc0
> [   15.967867][    C0]  ? cpuidle_idle_call+0x122/0x180
> [   15.968680][    C0]  ? do_idle+0x79/0xc0
> [   15.969357][    C0]  ? cpu_startup_entry+0x25/0x40
> [   15.970106][    C0]  ? rest_init+0x96/0xc0
> [   15.970770][    C0]  ? arch_call_rest_init+0xd/0x80
> [   15.971505][    C0]  ? start_kernel+0x347/0x480
> [   15.972209][    C0]  ? early_idt_handler_common+0x44/0x44
> [   15.972969][    C0]  ? i386_start_kernel+0x48/0x80
> [   15.973678][    C0]  ? startup_32_smp+0x156/0x158
> [   15.974397][    C0] Modules linked in: intel_rapl_msr(E) intel_rapl_common(E) ata_generic(E) ppdev(E) crc32_pclmul(E) crc32c_intel(E) aesni_intel(E) ipmi_devintf(
> E) ipmi_msghandler(E) crypto_simd(E) ata_piix(E) cryptd(E) rapl(E) i2c_piix4(E) psmouse(E) evdev(E) serio_raw(E) bochs(E) drm_vram_helper(E) drm_kms_helper(E) drm_tt
> m_helper(E) ttm(E) libata(E) parport_pc(E) floppy(E) parport(E) qemu_fw_cfg(E) button(E) drm(E) configfs(E) fuse(E) autofs4(E)
> [   15.978905][    C0] CR2: 0000000000000005
> [   15.979621][    C0] ---[ end trace 0000000000000000 ]---
> [   15.980419][    C0] EIP: free_pcppages_bulk+0x7d/0x200
> [   15.981222][    C0] Code: 8d 34 c6 89 75 e4 83 f8 0c 0f 84 ee 00 00 00 ba ab aa aa aa f7 e2 b8 01 00 00 00 89 d1 d1 e9 d3 e0 89 45 e0 8b 45 e4 8b 50 04 <8b> 72 04
>  8d 5a fc 83 ee 04 39 c2 0f 84 0a 01 00 00 89 4d e8 eb 28
> [   15.983663][    C0] EAX: e4c7520c EBX: e4c9f510 ECX: 55555555 EDX: 00000001
> [   15.984705][    C0] ESI: e4c7520c EDI: e4c75200 EBP: c1819d98 ESP: c1819d60
> [   15.985738][    C0] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 EFLAGS: 00210006
> [   15.986822][    C0] CR0: 80050033 CR2: 00000005 CR3: 2ca6a000 CR4: 000406f0
> [   15.987832][    C0] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
> [   15.988839][    C0] DR6: fffe0ff0 DR7: 00000400
> [   15.989679][    C0] Kernel panic - not syncing: Fatal exception in interrupt
> [   15.996118][    C0] Kernel Offset: disabled
>
>
>
> The kernel config and materials to reproduce are available at:
> https://download.01.org/0day-ci/archive/20230824/202308241221.cc5ac84a-oliver.sang@intel.com
>
>
>
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests/wiki
>
>
diff mbox series

Patch

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 347cb93081a02..d64d0f5ec70b4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1209,11 +1209,9 @@  static void free_pcppages_bulk(struct zone *zone, int count,
 					int pindex)
 {
 	unsigned long flags;
-	int min_pindex = 0;
-	int max_pindex = NR_PCP_LISTS - 1;
 	unsigned int order;
 	bool isolated_pageblocks;
-	struct page *page;
+	int i;
 
 	/* Ensure requested pindex is drained first. */
 	pindex = pindex - 1;
@@ -1221,31 +1219,18 @@  static void free_pcppages_bulk(struct zone *zone, int count,
 	spin_lock_irqsave(&zone->lock, flags);
 	isolated_pageblocks = has_isolate_pageblock(zone);
 
-	while (count > 0) {
+	for (i = 0; i < NR_PCP_LISTS; i++, pindex++) {
 		struct list_head *list;
 		int nr_pages;
+		struct page *page, *next;
 
-		/* Remove pages from lists in a round-robin fashion. */
-		do {
-			if (++pindex > max_pindex)
-				pindex = min_pindex;
-			list = &pcp->lists[pindex];
-			if (!list_empty(list))
-				break;
-
-			if (pindex == max_pindex)
-				max_pindex--;
-			if (pindex == min_pindex)
-				min_pindex++;
-		} while (1);
-
+		if (pindex == NR_PCP_LISTS)
+			pindex = 0;
+		list = pcp->lists + pindex;
 		order = pindex_to_order(pindex);
 		nr_pages = 1 << order;
-		do {
-			int mt;
-
-			page = list_last_entry(list, struct page, pcp_list);
-			mt = get_pcppage_migratetype(page);
+		list_for_each_entry_safe_reverse(page, next, list, lru) {
+			int mt = get_pcppage_migratetype(page);
 
 			/* must delete to avoid corrupting pcp list */
 			list_del(&page->pcp_list);
@@ -1260,9 +1245,12 @@  static void free_pcppages_bulk(struct zone *zone, int count,
 
 			__free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE);
 			trace_mm_page_pcpu_drain(page, order, mt);
-		} while (count > 0 && pcp->count > 0 && !list_empty(list));
-	}
 
+			if (count <= 0 || pcp->count <= 0)
+				goto out;
+		}
+	}
+out:
 	spin_unlock_irqrestore(&zone->lock, flags);
 }