diff mbox series

[mm-unstable,v1,5/5] mm/swap: remove boilerplate

Message ID 20240711021317.596178-6-yuzhao@google.com (mailing list archive)
State New
Headers show
Series mm/swap: remove boilerplate | expand

Commit Message

Yu Zhao July 11, 2024, 2:13 a.m. UTC
Remove boilerplate by using a macro to choose the corresponding lock
and handler for each folio_batch in cpu_fbatches.

Signed-off-by: Yu Zhao <yuzhao@google.com>
---
 mm/swap.c | 107 +++++++++++++++++++-----------------------------------
 1 file changed, 37 insertions(+), 70 deletions(-)

Comments

Barry Song July 26, 2024, 5:48 a.m. UTC | #1
On Thu, Jul 11, 2024 at 2:15 PM Yu Zhao <yuzhao@google.com> wrote:
>
> Remove boilerplate by using a macro to choose the corresponding lock
> and handler for each folio_batch in cpu_fbatches.
>
> Signed-off-by: Yu Zhao <yuzhao@google.com>
> ---
>  mm/swap.c | 107 +++++++++++++++++++-----------------------------------
>  1 file changed, 37 insertions(+), 70 deletions(-)
>
> diff --git a/mm/swap.c b/mm/swap.c
> index 4a66d2f87f26..342ff4e39ba4 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -220,16 +220,45 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
>         folios_put(fbatch);
>  }
>
> -static void folio_batch_add_and_move(struct folio_batch *fbatch,
> -               struct folio *folio, move_fn_t move_fn)
> +static void __folio_batch_add_and_move(struct folio_batch *fbatch,
> +               struct folio *folio, move_fn_t move_fn,
> +               bool on_lru, bool disable_irq)
>  {
> +       unsigned long flags;
> +
> +       folio_get(folio);
> +
> +       if (on_lru && !folio_test_clear_lru(folio)) {
> +               folio_put(folio);
> +               return;
> +       }
> +
>         if (folio_batch_add(fbatch, folio) && !folio_test_large(folio) &&
>             !lru_cache_disabled())
>                 return;
>
> +       if (disable_irq)
> +               local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
> +       else
> +               local_lock(&cpu_fbatches.lock);
> +
>         folio_batch_move_lru(fbatch, move_fn);
> +
> +       if (disable_irq)
> +               local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
> +       else
> +               local_unlock(&cpu_fbatches.lock);
>  }
>
> +#define folio_batch_add_and_move(folio, op, on_lru)                                            \
> +       __folio_batch_add_and_move(                                                             \
> +               this_cpu_ptr(&cpu_fbatches.op),                                                 \
> +               folio,                                                                          \
> +               op,                                                                             \
> +               on_lru,                                                                         \
> +               offsetof(struct cpu_fbatches, op) > offsetof(struct cpu_fbatches, lock_irq)     \
> +       )

I am running into this BUG, is it relevant?

/ # [   64.908801] check_preemption_disabled: 1804 callbacks suppressed
[   64.908915] BUG: using smp_processor_id() in preemptible [00000000]
code: jbd2/vda-8/96
[   64.909912] caller is debug_smp_processor_id+0x20/0x30
[   64.911743] CPU: 0 UID: 0 PID: 96 Comm: jbd2/vda-8 Not tainted
6.10.0-gef32eccacce2 #59
[   64.912373] Hardware name: linux,dummy-virt (DT)
[   64.912741] Call trace:
[   64.913048]  dump_backtrace+0x9c/0x100
[   64.913414]  show_stack+0x20/0x38
[   64.913761]  dump_stack_lvl+0xc4/0x150
[   64.914197]  dump_stack+0x18/0x28
[   64.914557]  check_preemption_disabled+0xd8/0x120
[   64.914944]  debug_smp_processor_id+0x20/0x30
[   64.915321]  folio_add_lru+0x30/0xa8
[   64.915680]  filemap_add_folio+0xe4/0x118
[   64.916082]  __filemap_get_folio+0x178/0x450
[   64.916455]  __getblk_slow+0xb0/0x310
[   64.916816]  bdev_getblk+0x94/0xc0
[   64.917169]  jbd2_journal_get_descriptor_buffer+0x6c/0x1b0
[   64.917590]  jbd2_journal_commit_transaction+0x7f0/0x1c88
[   64.917994]  kjournald2+0xd4/0x278
[   64.918344]  kthread+0x11c/0x128
[   64.918693]  ret_from_fork+0x10/0x20
[   64.928277] BUG: using smp_processor_id() in preemptible [00000000]
code: jbd2/vda-8/96
[   64.928878] caller is debug_smp_processor_id+0x20/0x30
[   64.929381] CPU: 0 UID: 0 PID: 96 Comm: jbd2/vda-8 Not tainted
6.10.0-gef32eccacce2 #59
[   64.929886] Hardware name: linux,dummy-virt (DT)
[   64.930252] Call trace:
[   64.930544]  dump_backtrace+0x9c/0x100
[   64.930907]  show_stack+0x20/0x38
[   64.931255]  dump_stack_lvl+0xc4/0x150
[   64.931616]  dump_stack+0x18/0x28
[   64.932022]  check_preemption_disabled+0xd8/0x120
[   64.932486]  debug_smp_processor_id+0x20/0x30
[   64.933023]  folio_add_lru+0x30/0xa8
[   64.933523]  filemap_add_folio+0xe4/0x118
[   64.933892]  __filemap_get_folio+0x178/0x450
[   64.934265]  __getblk_slow+0xb0/0x310
[   64.934626]  bdev_getblk+0x94/0xc0
[   64.934977]  jbd2_journal_get_descriptor_buffer+0x6c/0x1b0
[   64.935418]  journal_submit_commit_record.part.0.constprop.0+0x48/0x288
[   64.935919]  jbd2_journal_commit_transaction+0x1590/0x1c88
[   64.936519]  kjournald2+0xd4/0x278
[   64.936908]  kthread+0x11c/0x128
[   64.937323]  ret_from_fork+0x10/0x20

> +
>  static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
>  {
>         if (folio_test_unevictable(folio))
> @@ -250,23 +279,11 @@ static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
>   */
>  void folio_rotate_reclaimable(struct folio *folio)
>  {
> -       struct folio_batch *fbatch;
> -       unsigned long flags;
> -
>         if (folio_test_locked(folio) || folio_test_dirty(folio) ||
>             folio_test_unevictable(folio))
>                 return;
>
> -       folio_get(folio);
> -       if (!folio_test_clear_lru(folio)) {
> -               folio_put(folio);
> -               return;
> -       }
> -
> -       local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
> -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_move_tail);
> -       folio_batch_add_and_move(fbatch, folio, lru_move_tail);
> -       local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
> +       folio_batch_add_and_move(folio, lru_move_tail, true);
>  }
>
>  void lru_note_cost(struct lruvec *lruvec, bool file,
> @@ -355,21 +372,10 @@ static void folio_activate_drain(int cpu)
>
>  void folio_activate(struct folio *folio)
>  {
> -       struct folio_batch *fbatch;
> -
>         if (folio_test_active(folio) || folio_test_unevictable(folio))
>                 return;
>
> -       folio_get(folio);
> -       if (!folio_test_clear_lru(folio)) {
> -               folio_put(folio);
> -               return;
> -       }
> -
> -       local_lock(&cpu_fbatches.lock);
> -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_activate);
> -       folio_batch_add_and_move(fbatch, folio, lru_activate);
> -       local_unlock(&cpu_fbatches.lock);
> +       folio_batch_add_and_move(folio, lru_activate, true);
>  }
>
>  #else
> @@ -513,8 +519,6 @@ EXPORT_SYMBOL(folio_mark_accessed);
>   */
>  void folio_add_lru(struct folio *folio)
>  {
> -       struct folio_batch *fbatch;
> -
>         VM_BUG_ON_FOLIO(folio_test_active(folio) &&
>                         folio_test_unevictable(folio), folio);
>         VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
> @@ -524,11 +528,7 @@ void folio_add_lru(struct folio *folio)
>             lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
>                 folio_set_active(folio);
>
> -       folio_get(folio);
> -       local_lock(&cpu_fbatches.lock);
> -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
> -       folio_batch_add_and_move(fbatch, folio, lru_add);
> -       local_unlock(&cpu_fbatches.lock);
> +       folio_batch_add_and_move(folio, lru_add, false);
>  }
>  EXPORT_SYMBOL(folio_add_lru);
>
> @@ -702,22 +702,11 @@ void lru_add_drain_cpu(int cpu)
>   */
>  void deactivate_file_folio(struct folio *folio)
>  {
> -       struct folio_batch *fbatch;
> -
>         /* Deactivating an unevictable folio will not accelerate reclaim */
>         if (folio_test_unevictable(folio))
>                 return;
>
> -       folio_get(folio);
> -       if (!folio_test_clear_lru(folio)) {
> -               folio_put(folio);
> -               return;
> -       }
> -
> -       local_lock(&cpu_fbatches.lock);
> -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate_file);
> -       folio_batch_add_and_move(fbatch, folio, lru_deactivate_file);
> -       local_unlock(&cpu_fbatches.lock);
> +       folio_batch_add_and_move(folio, lru_deactivate_file, true);
>  }
>
>  /*
> @@ -730,21 +719,10 @@ void deactivate_file_folio(struct folio *folio)
>   */
>  void folio_deactivate(struct folio *folio)
>  {
> -       struct folio_batch *fbatch;
> -
>         if (folio_test_unevictable(folio) || !(folio_test_active(folio) || lru_gen_enabled()))
>                 return;
>
> -       folio_get(folio);
> -       if (!folio_test_clear_lru(folio)) {
> -               folio_put(folio);
> -               return;
> -       }
> -
> -       local_lock(&cpu_fbatches.lock);
> -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate);
> -       folio_batch_add_and_move(fbatch, folio, lru_deactivate);
> -       local_unlock(&cpu_fbatches.lock);
> +       folio_batch_add_and_move(folio, lru_deactivate, true);
>  }
>
>  /**
> @@ -756,22 +734,11 @@ void folio_deactivate(struct folio *folio)
>   */
>  void folio_mark_lazyfree(struct folio *folio)
>  {
> -       struct folio_batch *fbatch;
> -
>         if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) ||
>             folio_test_swapcache(folio) || folio_test_unevictable(folio))
>                 return;
>
> -       folio_get(folio);
> -       if (!folio_test_clear_lru(folio)) {
> -               folio_put(folio);
> -               return;
> -       }
> -
> -       local_lock(&cpu_fbatches.lock);
> -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_lazyfree);
> -       folio_batch_add_and_move(fbatch, folio, lru_lazyfree);
> -       local_unlock(&cpu_fbatches.lock);
> +       folio_batch_add_and_move(folio, lru_lazyfree, true);
>  }
>
>  void lru_add_drain(void)
> --
> 2.45.2.803.g4e1b14247a-goog
>
>

Thanks
Barry
Barry Song July 26, 2024, 5:56 a.m. UTC | #2
On Fri, Jul 26, 2024 at 5:48 PM Barry Song <21cnbao@gmail.com> wrote:
>
> On Thu, Jul 11, 2024 at 2:15 PM Yu Zhao <yuzhao@google.com> wrote:
> >
> > Remove boilerplate by using a macro to choose the corresponding lock
> > and handler for each folio_batch in cpu_fbatches.
> >
> > Signed-off-by: Yu Zhao <yuzhao@google.com>
> > ---
> >  mm/swap.c | 107 +++++++++++++++++++-----------------------------------
> >  1 file changed, 37 insertions(+), 70 deletions(-)
> >
> > diff --git a/mm/swap.c b/mm/swap.c
> > index 4a66d2f87f26..342ff4e39ba4 100644
> > --- a/mm/swap.c
> > +++ b/mm/swap.c
> > @@ -220,16 +220,45 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
> >         folios_put(fbatch);
> >  }
> >
> > -static void folio_batch_add_and_move(struct folio_batch *fbatch,
> > -               struct folio *folio, move_fn_t move_fn)
> > +static void __folio_batch_add_and_move(struct folio_batch *fbatch,
> > +               struct folio *folio, move_fn_t move_fn,
> > +               bool on_lru, bool disable_irq)
> >  {
> > +       unsigned long flags;
> > +
> > +       folio_get(folio);
> > +
> > +       if (on_lru && !folio_test_clear_lru(folio)) {
> > +               folio_put(folio);
> > +               return;
> > +       }
> > +
> >         if (folio_batch_add(fbatch, folio) && !folio_test_large(folio) &&
> >             !lru_cache_disabled())
> >                 return;
> >
> > +       if (disable_irq)
> > +               local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
> > +       else
> > +               local_lock(&cpu_fbatches.lock);
> > +
> >         folio_batch_move_lru(fbatch, move_fn);
> > +
> > +       if (disable_irq)
> > +               local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
> > +       else
> > +               local_unlock(&cpu_fbatches.lock);
> >  }
> >
> > +#define folio_batch_add_and_move(folio, op, on_lru)                                            \
> > +       __folio_batch_add_and_move(                                                             \
> > +               this_cpu_ptr(&cpu_fbatches.op),                                                 \
> > +               folio,                                                                          \
> > +               op,                                                                             \
> > +               on_lru,                                                                         \
> > +               offsetof(struct cpu_fbatches, op) > offsetof(struct cpu_fbatches, lock_irq)     \
> > +       )
>
> I am running into this BUG, is it relevant?
>
> / # [   64.908801] check_preemption_disabled: 1804 callbacks suppressed
> [   64.908915] BUG: using smp_processor_id() in preemptible [00000000]
> code: jbd2/vda-8/96
> [   64.909912] caller is debug_smp_processor_id+0x20/0x30
> [   64.911743] CPU: 0 UID: 0 PID: 96 Comm: jbd2/vda-8 Not tainted
> 6.10.0-gef32eccacce2 #59
> [   64.912373] Hardware name: linux,dummy-virt (DT)
> [   64.912741] Call trace:
> [   64.913048]  dump_backtrace+0x9c/0x100
> [   64.913414]  show_stack+0x20/0x38
> [   64.913761]  dump_stack_lvl+0xc4/0x150
> [   64.914197]  dump_stack+0x18/0x28
> [   64.914557]  check_preemption_disabled+0xd8/0x120
> [   64.914944]  debug_smp_processor_id+0x20/0x30
> [   64.915321]  folio_add_lru+0x30/0xa8
> [   64.915680]  filemap_add_folio+0xe4/0x118
> [   64.916082]  __filemap_get_folio+0x178/0x450
> [   64.916455]  __getblk_slow+0xb0/0x310
> [   64.916816]  bdev_getblk+0x94/0xc0
> [   64.917169]  jbd2_journal_get_descriptor_buffer+0x6c/0x1b0
> [   64.917590]  jbd2_journal_commit_transaction+0x7f0/0x1c88
> [   64.917994]  kjournald2+0xd4/0x278
> [   64.918344]  kthread+0x11c/0x128
> [   64.918693]  ret_from_fork+0x10/0x20
> [   64.928277] BUG: using smp_processor_id() in preemptible [00000000]
> code: jbd2/vda-8/96
> [   64.928878] caller is debug_smp_processor_id+0x20/0x30
> [   64.929381] CPU: 0 UID: 0 PID: 96 Comm: jbd2/vda-8 Not tainted
> 6.10.0-gef32eccacce2 #59
> [   64.929886] Hardware name: linux,dummy-virt (DT)
> [   64.930252] Call trace:
> [   64.930544]  dump_backtrace+0x9c/0x100
> [   64.930907]  show_stack+0x20/0x38
> [   64.931255]  dump_stack_lvl+0xc4/0x150
> [   64.931616]  dump_stack+0x18/0x28
> [   64.932022]  check_preemption_disabled+0xd8/0x120
> [   64.932486]  debug_smp_processor_id+0x20/0x30
> [   64.933023]  folio_add_lru+0x30/0xa8
> [   64.933523]  filemap_add_folio+0xe4/0x118
> [   64.933892]  __filemap_get_folio+0x178/0x450
> [   64.934265]  __getblk_slow+0xb0/0x310
> [   64.934626]  bdev_getblk+0x94/0xc0
> [   64.934977]  jbd2_journal_get_descriptor_buffer+0x6c/0x1b0
> [   64.935418]  journal_submit_commit_record.part.0.constprop.0+0x48/0x288
> [   64.935919]  jbd2_journal_commit_transaction+0x1590/0x1c88
> [   64.936519]  kjournald2+0xd4/0x278
> [   64.936908]  kthread+0x11c/0x128
> [   64.937323]  ret_from_fork+0x10/0x20

This removes the BUG complaint, but I'm unsure if it's the correct fix:

diff --git a/mm/swap.c b/mm/swap.c
index 342ff4e39ba4..a2781edeceef 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -252,7 +252,7 @@ static void __folio_batch_add_and_move(struct
folio_batch *fbatch,

 #define folio_batch_add_and_move(folio, op, on_lru)
                         \
        __folio_batch_add_and_move(
                         \
-               this_cpu_ptr(&cpu_fbatches.op),
                         \
+               raw_cpu_ptr(&cpu_fbatches.op),
                         \
                folio,
                         \
                op,
                         \
                on_lru,
                         \

>
> > +
> >  static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
> >  {
> >         if (folio_test_unevictable(folio))
> > @@ -250,23 +279,11 @@ static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
> >   */
> >  void folio_rotate_reclaimable(struct folio *folio)
> >  {
> > -       struct folio_batch *fbatch;
> > -       unsigned long flags;
> > -
> >         if (folio_test_locked(folio) || folio_test_dirty(folio) ||
> >             folio_test_unevictable(folio))
> >                 return;
> >
> > -       folio_get(folio);
> > -       if (!folio_test_clear_lru(folio)) {
> > -               folio_put(folio);
> > -               return;
> > -       }
> > -
> > -       local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
> > -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_move_tail);
> > -       folio_batch_add_and_move(fbatch, folio, lru_move_tail);
> > -       local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
> > +       folio_batch_add_and_move(folio, lru_move_tail, true);
> >  }
> >
> >  void lru_note_cost(struct lruvec *lruvec, bool file,
> > @@ -355,21 +372,10 @@ static void folio_activate_drain(int cpu)
> >
> >  void folio_activate(struct folio *folio)
> >  {
> > -       struct folio_batch *fbatch;
> > -
> >         if (folio_test_active(folio) || folio_test_unevictable(folio))
> >                 return;
> >
> > -       folio_get(folio);
> > -       if (!folio_test_clear_lru(folio)) {
> > -               folio_put(folio);
> > -               return;
> > -       }
> > -
> > -       local_lock(&cpu_fbatches.lock);
> > -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_activate);
> > -       folio_batch_add_and_move(fbatch, folio, lru_activate);
> > -       local_unlock(&cpu_fbatches.lock);
> > +       folio_batch_add_and_move(folio, lru_activate, true);
> >  }
> >
> >  #else
> > @@ -513,8 +519,6 @@ EXPORT_SYMBOL(folio_mark_accessed);
> >   */
> >  void folio_add_lru(struct folio *folio)
> >  {
> > -       struct folio_batch *fbatch;
> > -
> >         VM_BUG_ON_FOLIO(folio_test_active(folio) &&
> >                         folio_test_unevictable(folio), folio);
> >         VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
> > @@ -524,11 +528,7 @@ void folio_add_lru(struct folio *folio)
> >             lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
> >                 folio_set_active(folio);
> >
> > -       folio_get(folio);
> > -       local_lock(&cpu_fbatches.lock);
> > -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
> > -       folio_batch_add_and_move(fbatch, folio, lru_add);
> > -       local_unlock(&cpu_fbatches.lock);
> > +       folio_batch_add_and_move(folio, lru_add, false);
> >  }
> >  EXPORT_SYMBOL(folio_add_lru);
> >
> > @@ -702,22 +702,11 @@ void lru_add_drain_cpu(int cpu)
> >   */
> >  void deactivate_file_folio(struct folio *folio)
> >  {
> > -       struct folio_batch *fbatch;
> > -
> >         /* Deactivating an unevictable folio will not accelerate reclaim */
> >         if (folio_test_unevictable(folio))
> >                 return;
> >
> > -       folio_get(folio);
> > -       if (!folio_test_clear_lru(folio)) {
> > -               folio_put(folio);
> > -               return;
> > -       }
> > -
> > -       local_lock(&cpu_fbatches.lock);
> > -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate_file);
> > -       folio_batch_add_and_move(fbatch, folio, lru_deactivate_file);
> > -       local_unlock(&cpu_fbatches.lock);
> > +       folio_batch_add_and_move(folio, lru_deactivate_file, true);
> >  }
> >
> >  /*
> > @@ -730,21 +719,10 @@ void deactivate_file_folio(struct folio *folio)
> >   */
> >  void folio_deactivate(struct folio *folio)
> >  {
> > -       struct folio_batch *fbatch;
> > -
> >         if (folio_test_unevictable(folio) || !(folio_test_active(folio) || lru_gen_enabled()))
> >                 return;
> >
> > -       folio_get(folio);
> > -       if (!folio_test_clear_lru(folio)) {
> > -               folio_put(folio);
> > -               return;
> > -       }
> > -
> > -       local_lock(&cpu_fbatches.lock);
> > -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate);
> > -       folio_batch_add_and_move(fbatch, folio, lru_deactivate);
> > -       local_unlock(&cpu_fbatches.lock);
> > +       folio_batch_add_and_move(folio, lru_deactivate, true);
> >  }
> >
> >  /**
> > @@ -756,22 +734,11 @@ void folio_deactivate(struct folio *folio)
> >   */
> >  void folio_mark_lazyfree(struct folio *folio)
> >  {
> > -       struct folio_batch *fbatch;
> > -
> >         if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) ||
> >             folio_test_swapcache(folio) || folio_test_unevictable(folio))
> >                 return;
> >
> > -       folio_get(folio);
> > -       if (!folio_test_clear_lru(folio)) {
> > -               folio_put(folio);
> > -               return;
> > -       }
> > -
> > -       local_lock(&cpu_fbatches.lock);
> > -       fbatch = this_cpu_ptr(&cpu_fbatches.lru_lazyfree);
> > -       folio_batch_add_and_move(fbatch, folio, lru_lazyfree);
> > -       local_unlock(&cpu_fbatches.lock);
> > +       folio_batch_add_and_move(folio, lru_lazyfree, true);
> >  }
> >
> >  void lru_add_drain(void)
> > --
> > 2.45.2.803.g4e1b14247a-goog
> >
> >
>
> Thanks
> Barry
Yu Zhao July 26, 2024, 6:50 a.m. UTC | #3
On Fri, Jul 26, 2024 at 05:56:10PM +1200, Barry Song wrote:
> On Fri, Jul 26, 2024 at 5:48 PM Barry Song <21cnbao@gmail.com> wrote:
> >
> > On Thu, Jul 11, 2024 at 2:15 PM Yu Zhao <yuzhao@google.com> wrote:
> > >
> > > Remove boilerplate by using a macro to choose the corresponding lock
> > > and handler for each folio_batch in cpu_fbatches.
> > >
> > > Signed-off-by: Yu Zhao <yuzhao@google.com>
> > > ---
> > >  mm/swap.c | 107 +++++++++++++++++++-----------------------------------
> > >  1 file changed, 37 insertions(+), 70 deletions(-)
> > >
> > > diff --git a/mm/swap.c b/mm/swap.c
> > > index 4a66d2f87f26..342ff4e39ba4 100644
> > > --- a/mm/swap.c
> > > +++ b/mm/swap.c
> > > @@ -220,16 +220,45 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
> > >         folios_put(fbatch);
> > >  }
> > >
> > > -static void folio_batch_add_and_move(struct folio_batch *fbatch,
> > > -               struct folio *folio, move_fn_t move_fn)
> > > +static void __folio_batch_add_and_move(struct folio_batch *fbatch,
> > > +               struct folio *folio, move_fn_t move_fn,
> > > +               bool on_lru, bool disable_irq)
> > >  {
> > > +       unsigned long flags;
> > > +
> > > +       folio_get(folio);
> > > +
> > > +       if (on_lru && !folio_test_clear_lru(folio)) {
> > > +               folio_put(folio);
> > > +               return;
> > > +       }
> > > +
> > >         if (folio_batch_add(fbatch, folio) && !folio_test_large(folio) &&
> > >             !lru_cache_disabled())
> > >                 return;
> > >
> > > +       if (disable_irq)
> > > +               local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
> > > +       else
> > > +               local_lock(&cpu_fbatches.lock);
> > > +
> > >         folio_batch_move_lru(fbatch, move_fn);
> > > +
> > > +       if (disable_irq)
> > > +               local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
> > > +       else
> > > +               local_unlock(&cpu_fbatches.lock);
> > >  }
> > >
> > > +#define folio_batch_add_and_move(folio, op, on_lru)                                            \
> > > +       __folio_batch_add_and_move(                                                             \
> > > +               this_cpu_ptr(&cpu_fbatches.op),                                                 \
> > > +               folio,                                                                          \
> > > +               op,                                                                             \
> > > +               on_lru,                                                                         \
> > > +               offsetof(struct cpu_fbatches, op) > offsetof(struct cpu_fbatches, lock_irq)     \
> > > +       )
> >
> > I am running into this BUG, is it relevant?

Sorry for the trouble.

> > / # [   64.908801] check_preemption_disabled: 1804 callbacks suppressed
> > [   64.908915] BUG: using smp_processor_id() in preemptible [00000000]
> > code: jbd2/vda-8/96
> > [   64.909912] caller is debug_smp_processor_id+0x20/0x30
> > [   64.911743] CPU: 0 UID: 0 PID: 96 Comm: jbd2/vda-8 Not tainted
> > 6.10.0-gef32eccacce2 #59
> > [   64.912373] Hardware name: linux,dummy-virt (DT)
> > [   64.912741] Call trace:
> > [   64.913048]  dump_backtrace+0x9c/0x100
> > [   64.913414]  show_stack+0x20/0x38
> > [   64.913761]  dump_stack_lvl+0xc4/0x150
> > [   64.914197]  dump_stack+0x18/0x28
> > [   64.914557]  check_preemption_disabled+0xd8/0x120
> > [   64.914944]  debug_smp_processor_id+0x20/0x30
> > [   64.915321]  folio_add_lru+0x30/0xa8
> > [   64.915680]  filemap_add_folio+0xe4/0x118
> > [   64.916082]  __filemap_get_folio+0x178/0x450
> > [   64.916455]  __getblk_slow+0xb0/0x310
> > [   64.916816]  bdev_getblk+0x94/0xc0
> > [   64.917169]  jbd2_journal_get_descriptor_buffer+0x6c/0x1b0
> > [   64.917590]  jbd2_journal_commit_transaction+0x7f0/0x1c88
> > [   64.917994]  kjournald2+0xd4/0x278
> > [   64.918344]  kthread+0x11c/0x128
> > [   64.918693]  ret_from_fork+0x10/0x20
> 
> This removes the BUG complaint, but I'm unsure if it's the correct fix:

Below is the proper fix. Will post v2.

--- a/mm/swap.c
+++ b/mm/swap.c
@@ -221,7 +221,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
 	folios_put(fbatch);
 }
 
-static void __folio_batch_add_and_move(struct folio_batch *fbatch,
+static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch,
 		struct folio *folio, move_fn_t move_fn,
 		bool on_lru, bool disable_irq)
 {
@@ -234,16 +234,14 @@ static void __folio_batch_add_and_move(struct folio_batch *fbatch,
 		return;
 	}
 
-	if (folio_batch_add(fbatch, folio) && !folio_test_large(folio) &&
-	    !lru_cache_disabled())
-		return;
-
 	if (disable_irq)
 		local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
 	else
 		local_lock(&cpu_fbatches.lock);
 
-	folio_batch_move_lru(fbatch, move_fn);
+	if (!folio_batch_add(this_cpu_ptr(fbatch), folio) || folio_test_large(folio) ||
+	    lru_cache_disabled())
+		folio_batch_move_lru(this_cpu_ptr(fbatch), move_fn);
 
 	if (disable_irq)
 		local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
@@ -253,7 +251,7 @@ static void __folio_batch_add_and_move(struct folio_batch *fbatch,
 
 #define folio_batch_add_and_move(folio, op, on_lru)						\
 	__folio_batch_add_and_move(								\
-		this_cpu_ptr(&cpu_fbatches.op),							\
+		&cpu_fbatches.op,								\
 		folio,										\
 		op,										\
 		on_lru,										\
Hugh Dickins Aug. 4, 2024, 6:55 a.m. UTC | #4
On Wed, 10 Jul 2024, Yu Zhao wrote:

> Remove boilerplate by using a macro to choose the corresponding lock
> and handler for each folio_batch in cpu_fbatches.
> 
> Signed-off-by: Yu Zhao <yuzhao@google.com>

Andrew, please revert this "remove boilerplate" patch (and of course its
followup fix) from mm-unstable. From the title I presume it was intended
to make no functional change, but that's far from so.

Under tmpfs swapping load, on different runs I see various badnesses:
"Bad page" in __free_one_page(), Oops in __run_timer_base(),
WARNING at kernel/workqueue.c:790 in set_work_data(), PageBuddy BUG
at page-flags.h:1009 from __del_page_from_freelist(), something (I'd
given up taking better notes by this time) in __queue_work(), others.

All those were including the fix to Barry's report: without that fix,
the boot is drowned in warnings scrolling past too fast to be read.

(All the above were on the HP workstation, swapping to SSD; whereas on
this ThinkPad, swapping to NVMe, no problem seen at all - I mention the
swapping medium, but have no idea whether that's a relevant difference.
In each case, MGLRU compiled in but not enabled. THPs and 64kTHPs active.)

Sorry, but I've put no effort whatsoever into debugging this: "remove
boilerplate" didn't seem worth the effort, and my personal preference
is for readable boilerplate over hiding in a macro.  If you prefer the
macro, I expect Yu can soon come up with a fix (which I could test here):
but for now please revert "remove boilerplate", since its issues get in
the way of further mm testing.

Thanks,
Hugh

> ---
>  mm/swap.c | 107 +++++++++++++++++++-----------------------------------
>  1 file changed, 37 insertions(+), 70 deletions(-)
> 
> diff --git a/mm/swap.c b/mm/swap.c
> index 4a66d2f87f26..342ff4e39ba4 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -220,16 +220,45 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
>  	folios_put(fbatch);
>  }
>  
> -static void folio_batch_add_and_move(struct folio_batch *fbatch,
> -		struct folio *folio, move_fn_t move_fn)
> +static void __folio_batch_add_and_move(struct folio_batch *fbatch,
> +		struct folio *folio, move_fn_t move_fn,
> +		bool on_lru, bool disable_irq)
>  {
> +	unsigned long flags;
> +
> +	folio_get(folio);
> +
> +	if (on_lru && !folio_test_clear_lru(folio)) {
> +		folio_put(folio);
> +		return;
> +	}
> +
>  	if (folio_batch_add(fbatch, folio) && !folio_test_large(folio) &&
>  	    !lru_cache_disabled())
>  		return;
>  
> +	if (disable_irq)
> +		local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
> +	else
> +		local_lock(&cpu_fbatches.lock);
> +
>  	folio_batch_move_lru(fbatch, move_fn);
> +
> +	if (disable_irq)
> +		local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
> +	else
> +		local_unlock(&cpu_fbatches.lock);
>  }
>  
> +#define folio_batch_add_and_move(folio, op, on_lru)						\
> +	__folio_batch_add_and_move(								\
> +		this_cpu_ptr(&cpu_fbatches.op),							\
> +		folio,										\
> +		op,										\
> +		on_lru,										\
> +		offsetof(struct cpu_fbatches, op) > offsetof(struct cpu_fbatches, lock_irq)	\
> +	)
> +
>  static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
>  {
>  	if (folio_test_unevictable(folio))
> @@ -250,23 +279,11 @@ static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
>   */
>  void folio_rotate_reclaimable(struct folio *folio)
>  {
> -	struct folio_batch *fbatch;
> -	unsigned long flags;
> -
>  	if (folio_test_locked(folio) || folio_test_dirty(folio) ||
>  	    folio_test_unevictable(folio))
>  		return;
>  
> -	folio_get(folio);
> -	if (!folio_test_clear_lru(folio)) {
> -		folio_put(folio);
> -		return;
> -	}
> -
> -	local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
> -	fbatch = this_cpu_ptr(&cpu_fbatches.lru_move_tail);
> -	folio_batch_add_and_move(fbatch, folio, lru_move_tail);
> -	local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
> +	folio_batch_add_and_move(folio, lru_move_tail, true);
>  }
>  
>  void lru_note_cost(struct lruvec *lruvec, bool file,
> @@ -355,21 +372,10 @@ static void folio_activate_drain(int cpu)
>  
>  void folio_activate(struct folio *folio)
>  {
> -	struct folio_batch *fbatch;
> -
>  	if (folio_test_active(folio) || folio_test_unevictable(folio))
>  		return;
>  
> -	folio_get(folio);
> -	if (!folio_test_clear_lru(folio)) {
> -		folio_put(folio);
> -		return;
> -	}
> -
> -	local_lock(&cpu_fbatches.lock);
> -	fbatch = this_cpu_ptr(&cpu_fbatches.lru_activate);
> -	folio_batch_add_and_move(fbatch, folio, lru_activate);
> -	local_unlock(&cpu_fbatches.lock);
> +	folio_batch_add_and_move(folio, lru_activate, true);
>  }
>  
>  #else
> @@ -513,8 +519,6 @@ EXPORT_SYMBOL(folio_mark_accessed);
>   */
>  void folio_add_lru(struct folio *folio)
>  {
> -	struct folio_batch *fbatch;
> -
>  	VM_BUG_ON_FOLIO(folio_test_active(folio) &&
>  			folio_test_unevictable(folio), folio);
>  	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
> @@ -524,11 +528,7 @@ void folio_add_lru(struct folio *folio)
>  	    lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
>  		folio_set_active(folio);
>  
> -	folio_get(folio);
> -	local_lock(&cpu_fbatches.lock);
> -	fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
> -	folio_batch_add_and_move(fbatch, folio, lru_add);
> -	local_unlock(&cpu_fbatches.lock);
> +	folio_batch_add_and_move(folio, lru_add, false);
>  }
>  EXPORT_SYMBOL(folio_add_lru);
>  
> @@ -702,22 +702,11 @@ void lru_add_drain_cpu(int cpu)
>   */
>  void deactivate_file_folio(struct folio *folio)
>  {
> -	struct folio_batch *fbatch;
> -
>  	/* Deactivating an unevictable folio will not accelerate reclaim */
>  	if (folio_test_unevictable(folio))
>  		return;
>  
> -	folio_get(folio);
> -	if (!folio_test_clear_lru(folio)) {
> -		folio_put(folio);
> -		return;
> -	}
> -
> -	local_lock(&cpu_fbatches.lock);
> -	fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate_file);
> -	folio_batch_add_and_move(fbatch, folio, lru_deactivate_file);
> -	local_unlock(&cpu_fbatches.lock);
> +	folio_batch_add_and_move(folio, lru_deactivate_file, true);
>  }
>  
>  /*
> @@ -730,21 +719,10 @@ void deactivate_file_folio(struct folio *folio)
>   */
>  void folio_deactivate(struct folio *folio)
>  {
> -	struct folio_batch *fbatch;
> -
>  	if (folio_test_unevictable(folio) || !(folio_test_active(folio) || lru_gen_enabled()))
>  		return;
>  
> -	folio_get(folio);
> -	if (!folio_test_clear_lru(folio)) {
> -		folio_put(folio);
> -		return;
> -	}
> -
> -	local_lock(&cpu_fbatches.lock);
> -	fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate);
> -	folio_batch_add_and_move(fbatch, folio, lru_deactivate);
> -	local_unlock(&cpu_fbatches.lock);
> +	folio_batch_add_and_move(folio, lru_deactivate, true);
>  }
>  
>  /**
> @@ -756,22 +734,11 @@ void folio_deactivate(struct folio *folio)
>   */
>  void folio_mark_lazyfree(struct folio *folio)
>  {
> -	struct folio_batch *fbatch;
> -
>  	if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) ||
>  	    folio_test_swapcache(folio) || folio_test_unevictable(folio))
>  		return;
>  
> -	folio_get(folio);
> -	if (!folio_test_clear_lru(folio)) {
> -		folio_put(folio);
> -		return;
> -	}
> -
> -	local_lock(&cpu_fbatches.lock);
> -	fbatch = this_cpu_ptr(&cpu_fbatches.lru_lazyfree);
> -	folio_batch_add_and_move(fbatch, folio, lru_lazyfree);
> -	local_unlock(&cpu_fbatches.lock);
> +	folio_batch_add_and_move(folio, lru_lazyfree, true);
>  }
>  
>  void lru_add_drain(void)
> -- 
> 2.45.2.803.g4e1b14247a-goog
Yu Zhao Aug. 4, 2024, 9:36 p.m. UTC | #5
On Sat, Aug 03, 2024 at 11:55:51PM -0700, Hugh Dickins wrote:
> On Wed, 10 Jul 2024, Yu Zhao wrote:
> 
> > Remove boilerplate by using a macro to choose the corresponding lock
> > and handler for each folio_batch in cpu_fbatches.
> > 
> > Signed-off-by: Yu Zhao <yuzhao@google.com>
> 
> Andrew, please revert this "remove boilerplate" patch (and of course its
> followup fix) from mm-unstable. From the title I presume it was intended
> to make no functional change, but that's far from so.
> 
> Under tmpfs swapping load, on different runs I see various badnesses:
> "Bad page" in __free_one_page(), Oops in __run_timer_base(),
> WARNING at kernel/workqueue.c:790 in set_work_data(), PageBuddy BUG
> at page-flags.h:1009 from __del_page_from_freelist(), something (I'd
> given up taking better notes by this time) in __queue_work(), others.
> 
> All those were including the fix to Barry's report: without that fix,
> the boot is drowned in warnings scrolling past too fast to be read.
> 
> (All the above were on the HP workstation, swapping to SSD; whereas on
> this ThinkPad, swapping to NVMe, no problem seen at all - I mention the
> swapping medium, but have no idea whether that's a relevant difference.
> In each case, MGLRU compiled in but not enabled. THPs and 64kTHPs active.)
> 
> Sorry, but I've put no effort whatsoever into debugging this: "remove
> boilerplate" didn't seem worth the effort, and my personal preference
> is for readable boilerplate over hiding in a macro.  If you prefer the
> macro, I expect Yu can soon come up with a fix (which I could test here):
> but for now please revert "remove boilerplate", since its issues get in
> the way of further mm testing.

Sorry for getting in your way, Hugh.

Apparently I didn't expect local_lock_t to be zero length, i.e., when
CONFIG_DEBUG_LOCK_ALLOC is not set. So that might explain why you only
had problems with one of the two machines, where it failed to disable
IRQ when rotating clean pages after writeback.

The following should fix it, in case you want to verify the above:

diff --git a/mm/swap.c b/mm/swap.c
index 4bc08352ad87..67a246772811 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -254,7 +254,7 @@ static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch,
 		folio,										\
 		op,										\
 		on_lru,										\
-		offsetof(struct cpu_fbatches, op) > offsetof(struct cpu_fbatches, lock_irq)	\
+		offsetof(struct cpu_fbatches, op) >= offsetof(struct cpu_fbatches, lock_irq)	\
 	)
 
 static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
Hugh Dickins Aug. 5, 2024, 7:14 p.m. UTC | #6
On Sun, 4 Aug 2024, Yu Zhao wrote:
> On Sat, Aug 03, 2024 at 11:55:51PM -0700, Hugh Dickins wrote:
> > On Wed, 10 Jul 2024, Yu Zhao wrote:
> > 
> > > Remove boilerplate by using a macro to choose the corresponding lock
> > > and handler for each folio_batch in cpu_fbatches.
> > > 
> > > Signed-off-by: Yu Zhao <yuzhao@google.com>
> > 
> > Andrew, please revert this "remove boilerplate" patch (and of course its
> > followup fix) from mm-unstable. From the title I presume it was intended
> > to make no functional change, but that's far from so.
> > 
> > Under tmpfs swapping load, on different runs I see various badnesses:
> > "Bad page" in __free_one_page(), Oops in __run_timer_base(),
> > WARNING at kernel/workqueue.c:790 in set_work_data(), PageBuddy BUG
> > at page-flags.h:1009 from __del_page_from_freelist(), something (I'd
> > given up taking better notes by this time) in __queue_work(), others.
> > 
> > All those were including the fix to Barry's report: without that fix,
> > the boot is drowned in warnings scrolling past too fast to be read.
> > 
> > (All the above were on the HP workstation, swapping to SSD; whereas on
> > this ThinkPad, swapping to NVMe, no problem seen at all - I mention the
> > swapping medium, but have no idea whether that's a relevant difference.
> > In each case, MGLRU compiled in but not enabled. THPs and 64kTHPs active.)
> > 
> > Sorry, but I've put no effort whatsoever into debugging this: "remove
> > boilerplate" didn't seem worth the effort, and my personal preference
> > is for readable boilerplate over hiding in a macro.  If you prefer the
> > macro, I expect Yu can soon come up with a fix (which I could test here):
> > but for now please revert "remove boilerplate", since its issues get in
> > the way of further mm testing.
> 
> Sorry for getting in your way, Hugh.
> 
> Apparently I didn't expect local_lock_t to be zero length, i.e., when
> CONFIG_DEBUG_LOCK_ALLOC is not set. So that might explain why you only
> had problems with one of the two machines, where it failed to disable
> IRQ when rotating clean pages after writeback.
> 
> The following should fix it, in case you want to verify the above:
> 
> diff --git a/mm/swap.c b/mm/swap.c
> index 4bc08352ad87..67a246772811 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -254,7 +254,7 @@ static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch,
>  		folio,										\
>  		op,										\
>  		on_lru,										\
> -		offsetof(struct cpu_fbatches, op) > offsetof(struct cpu_fbatches, lock_irq)	\
> +		offsetof(struct cpu_fbatches, op) >= offsetof(struct cpu_fbatches, lock_irq)	\
>  	)
>  
>  static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)

Well caught! Yes, I confirm that fixes all the bad behaviour I was seeing
(and fits with my having DEBUG_LOCK_ALLOC and lockdep enabled on the
untroubled machine, but not on the one showing problems) - thanks.

But it does reinforce my opinion that mm/swap.c is more understandable
without that macro than with it.

Hugh
diff mbox series

Patch

diff --git a/mm/swap.c b/mm/swap.c
index 4a66d2f87f26..342ff4e39ba4 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -220,16 +220,45 @@  static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
 	folios_put(fbatch);
 }
 
-static void folio_batch_add_and_move(struct folio_batch *fbatch,
-		struct folio *folio, move_fn_t move_fn)
+static void __folio_batch_add_and_move(struct folio_batch *fbatch,
+		struct folio *folio, move_fn_t move_fn,
+		bool on_lru, bool disable_irq)
 {
+	unsigned long flags;
+
+	folio_get(folio);
+
+	if (on_lru && !folio_test_clear_lru(folio)) {
+		folio_put(folio);
+		return;
+	}
+
 	if (folio_batch_add(fbatch, folio) && !folio_test_large(folio) &&
 	    !lru_cache_disabled())
 		return;
 
+	if (disable_irq)
+		local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
+	else
+		local_lock(&cpu_fbatches.lock);
+
 	folio_batch_move_lru(fbatch, move_fn);
+
+	if (disable_irq)
+		local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
+	else
+		local_unlock(&cpu_fbatches.lock);
 }
 
+#define folio_batch_add_and_move(folio, op, on_lru)						\
+	__folio_batch_add_and_move(								\
+		this_cpu_ptr(&cpu_fbatches.op),							\
+		folio,										\
+		op,										\
+		on_lru,										\
+		offsetof(struct cpu_fbatches, op) > offsetof(struct cpu_fbatches, lock_irq)	\
+	)
+
 static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
 {
 	if (folio_test_unevictable(folio))
@@ -250,23 +279,11 @@  static void lru_move_tail(struct lruvec *lruvec, struct folio *folio)
  */
 void folio_rotate_reclaimable(struct folio *folio)
 {
-	struct folio_batch *fbatch;
-	unsigned long flags;
-
 	if (folio_test_locked(folio) || folio_test_dirty(folio) ||
 	    folio_test_unevictable(folio))
 		return;
 
-	folio_get(folio);
-	if (!folio_test_clear_lru(folio)) {
-		folio_put(folio);
-		return;
-	}
-
-	local_lock_irqsave(&cpu_fbatches.lock_irq, flags);
-	fbatch = this_cpu_ptr(&cpu_fbatches.lru_move_tail);
-	folio_batch_add_and_move(fbatch, folio, lru_move_tail);
-	local_unlock_irqrestore(&cpu_fbatches.lock_irq, flags);
+	folio_batch_add_and_move(folio, lru_move_tail, true);
 }
 
 void lru_note_cost(struct lruvec *lruvec, bool file,
@@ -355,21 +372,10 @@  static void folio_activate_drain(int cpu)
 
 void folio_activate(struct folio *folio)
 {
-	struct folio_batch *fbatch;
-
 	if (folio_test_active(folio) || folio_test_unevictable(folio))
 		return;
 
-	folio_get(folio);
-	if (!folio_test_clear_lru(folio)) {
-		folio_put(folio);
-		return;
-	}
-
-	local_lock(&cpu_fbatches.lock);
-	fbatch = this_cpu_ptr(&cpu_fbatches.lru_activate);
-	folio_batch_add_and_move(fbatch, folio, lru_activate);
-	local_unlock(&cpu_fbatches.lock);
+	folio_batch_add_and_move(folio, lru_activate, true);
 }
 
 #else
@@ -513,8 +519,6 @@  EXPORT_SYMBOL(folio_mark_accessed);
  */
 void folio_add_lru(struct folio *folio)
 {
-	struct folio_batch *fbatch;
-
 	VM_BUG_ON_FOLIO(folio_test_active(folio) &&
 			folio_test_unevictable(folio), folio);
 	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
@@ -524,11 +528,7 @@  void folio_add_lru(struct folio *folio)
 	    lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
 		folio_set_active(folio);
 
-	folio_get(folio);
-	local_lock(&cpu_fbatches.lock);
-	fbatch = this_cpu_ptr(&cpu_fbatches.lru_add);
-	folio_batch_add_and_move(fbatch, folio, lru_add);
-	local_unlock(&cpu_fbatches.lock);
+	folio_batch_add_and_move(folio, lru_add, false);
 }
 EXPORT_SYMBOL(folio_add_lru);
 
@@ -702,22 +702,11 @@  void lru_add_drain_cpu(int cpu)
  */
 void deactivate_file_folio(struct folio *folio)
 {
-	struct folio_batch *fbatch;
-
 	/* Deactivating an unevictable folio will not accelerate reclaim */
 	if (folio_test_unevictable(folio))
 		return;
 
-	folio_get(folio);
-	if (!folio_test_clear_lru(folio)) {
-		folio_put(folio);
-		return;
-	}
-
-	local_lock(&cpu_fbatches.lock);
-	fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate_file);
-	folio_batch_add_and_move(fbatch, folio, lru_deactivate_file);
-	local_unlock(&cpu_fbatches.lock);
+	folio_batch_add_and_move(folio, lru_deactivate_file, true);
 }
 
 /*
@@ -730,21 +719,10 @@  void deactivate_file_folio(struct folio *folio)
  */
 void folio_deactivate(struct folio *folio)
 {
-	struct folio_batch *fbatch;
-
 	if (folio_test_unevictable(folio) || !(folio_test_active(folio) || lru_gen_enabled()))
 		return;
 
-	folio_get(folio);
-	if (!folio_test_clear_lru(folio)) {
-		folio_put(folio);
-		return;
-	}
-
-	local_lock(&cpu_fbatches.lock);
-	fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate);
-	folio_batch_add_and_move(fbatch, folio, lru_deactivate);
-	local_unlock(&cpu_fbatches.lock);
+	folio_batch_add_and_move(folio, lru_deactivate, true);
 }
 
 /**
@@ -756,22 +734,11 @@  void folio_deactivate(struct folio *folio)
  */
 void folio_mark_lazyfree(struct folio *folio)
 {
-	struct folio_batch *fbatch;
-
 	if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) ||
 	    folio_test_swapcache(folio) || folio_test_unevictable(folio))
 		return;
 
-	folio_get(folio);
-	if (!folio_test_clear_lru(folio)) {
-		folio_put(folio);
-		return;
-	}
-
-	local_lock(&cpu_fbatches.lock);
-	fbatch = this_cpu_ptr(&cpu_fbatches.lru_lazyfree);
-	folio_batch_add_and_move(fbatch, folio, lru_lazyfree);
-	local_unlock(&cpu_fbatches.lock);
+	folio_batch_add_and_move(folio, lru_lazyfree, true);
 }
 
 void lru_add_drain(void)