diff mbox series

[04/24] mm/swap: avoid setting page lock bit and doing extra unlock check

Message ID 20231119194740.94101-5-ryncsn@gmail.com (mailing list archive)
State New
Headers show
Series Swapin path refactor for optimization and bugfix | expand

Commit Message

Kairui Song Nov. 19, 2023, 7:47 p.m. UTC
From: Kairui Song <kasong@tencent.com>

When swapping in a page, mem_cgroup_swapin_charge_folio is called for new
allocated folio, nothing else is referencing the folio so no need to set
the lock bit. This avoided doing unlock check on error path.

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 mm/swap_state.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

Comments

Chris Li Nov. 20, 2023, 4:17 a.m. UTC | #1
On Sun, Nov 19, 2023 at 11:48 AM Kairui Song <ryncsn@gmail.com> wrote:
>
> From: Kairui Song <kasong@tencent.com>
>
> When swapping in a page, mem_cgroup_swapin_charge_folio is called for new
> allocated folio, nothing else is referencing the folio so no need to set
> the lock bit. This avoided doing unlock check on error path.
>
> Signed-off-by: Kairui Song <kasong@tencent.com>
> ---
>  mm/swap_state.c | 20 +++++++++-----------
>  1 file changed, 9 insertions(+), 11 deletions(-)
>
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index ac4fa404eaa7..45dd8b7c195d 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -458,6 +458,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,

You move the mem_cgroup_swapin_charge_folio() inside the for loop:


        for (;;) {
                int err;
                /*
                 * First check the swap cache.  Since this is normally
                 * called after swap_cache_get_folio() failed, re-calling
                 * that would confuse statistics.
                 */
                folio = filemap_get_folio(swap_address_space(entry),
                                                swp_offset(entry));


>                                                 mpol, ilx, numa_node_id());
>                 if (!folio)
>                          goto fail_put_swap;
> +               if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
> +                       goto fail_put_folio;

Wouldn't it cause repeat charging of the folio when it is racing
against others in the for loop?

>
>                 /*
>                  * Swap entry may have been freed since our caller observed it.
> @@ -483,13 +485,9 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>         /*
>          * The swap entry is ours to swap in. Prepare the new page.
>          */
> -
>         __folio_set_locked(folio);
>         __folio_set_swapbacked(folio);
>
> -       if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
> -               goto fail_unlock;
> -

The original code makes the charge outside of the for loop. Only the
winner can charge once.

Chris
Kairui Song Nov. 20, 2023, 11:15 a.m. UTC | #2
Chris Li <chrisl@kernel.org> 于2023年11月20日周一 12:18写道:
>
> On Sun, Nov 19, 2023 at 11:48 AM Kairui Song <ryncsn@gmail.com> wrote:
> >
> > From: Kairui Song <kasong@tencent.com>
> >
> > When swapping in a page, mem_cgroup_swapin_charge_folio is called for new
> > allocated folio, nothing else is referencing the folio so no need to set
> > the lock bit. This avoided doing unlock check on error path.
> >
> > Signed-off-by: Kairui Song <kasong@tencent.com>
> > ---
> >  mm/swap_state.c | 20 +++++++++-----------
> >  1 file changed, 9 insertions(+), 11 deletions(-)
> >
> > diff --git a/mm/swap_state.c b/mm/swap_state.c
> > index ac4fa404eaa7..45dd8b7c195d 100644
> > --- a/mm/swap_state.c
> > +++ b/mm/swap_state.c
> > @@ -458,6 +458,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
>
> You move the mem_cgroup_swapin_charge_folio() inside the for loop:
>
>
>         for (;;) {
>                 int err;
>                 /*
>                  * First check the swap cache.  Since this is normally
>                  * called after swap_cache_get_folio() failed, re-calling
>                  * that would confuse statistics.
>                  */
>                 folio = filemap_get_folio(swap_address_space(entry),
>                                                 swp_offset(entry));
>
>
> >                                                 mpol, ilx, numa_node_id());
> >                 if (!folio)
> >                          goto fail_put_swap;
> > +               if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
> > +                       goto fail_put_folio;
>
> Wouldn't it cause repeat charging of the folio when it is racing
> against others in the for loop?

The race loser will call folio_put and discharge it?

>
> >
> >                 /*
> >                  * Swap entry may have been freed since our caller observed it.
> > @@ -483,13 +485,9 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
> >         /*
> >          * The swap entry is ours to swap in. Prepare the new page.
> >          */
> > -
> >         __folio_set_locked(folio);
> >         __folio_set_swapbacked(folio);
> >
> > -       if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
> > -               goto fail_unlock;
> > -
>
> The original code makes the charge outside of the for loop. Only the
> winner can charge once.

Right, this patch may make the charge/dis-charge path more complex for
race swapin, I'll re-check this part.

>
> Chris
Chris Li Nov. 20, 2023, 5:44 p.m. UTC | #3
On Mon, Nov 20, 2023 at 3:15 AM Kairui Song <ryncsn@gmail.com> wrote:
> > > diff --git a/mm/swap_state.c b/mm/swap_state.c
> > > index ac4fa404eaa7..45dd8b7c195d 100644
> > > --- a/mm/swap_state.c
> > > +++ b/mm/swap_state.c
> > > @@ -458,6 +458,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
> >
> > You move the mem_cgroup_swapin_charge_folio() inside the for loop:
> >
> >
> >         for (;;) {
> >                 int err;
> >                 /*
> >                  * First check the swap cache.  Since this is normally
> >                  * called after swap_cache_get_folio() failed, re-calling
> >                  * that would confuse statistics.
> >                  */
> >                 folio = filemap_get_folio(swap_address_space(entry),
> >                                                 swp_offset(entry));
> >
> >
> > >                                                 mpol, ilx, numa_node_id());
> > >                 if (!folio)
> > >                          goto fail_put_swap;
> > > +               if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
> > > +                       goto fail_put_folio;
> >
> > Wouldn't it cause repeat charging of the folio when it is racing
> > against others in the for loop?
>
> The race loser will call folio_put and discharge it?

There are two different charges. Memcg charging and memcg swapin charging.
The folio_put will do the memcg discharge, the corresponding memcg
charge is in follio allocation.
Memcg swapin charge does things differently, it needs to modify the
swap relately accounting.
The memcg uncharge is not a pair for memcg swapin charge.

> > >                 /*
> > >                  * Swap entry may have been freed since our caller observed it.
> > > @@ -483,13 +485,9 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
> > >         /*
> > >          * The swap entry is ours to swap in. Prepare the new page.
> > >          */
> > > -
> > >         __folio_set_locked(folio);
> > >         __folio_set_swapbacked(folio);
> > >
> > > -       if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
> > > -               goto fail_unlock;
> > > -
> >
> > The original code makes the charge outside of the for loop. Only the
> > winner can charge once.
>
> Right, this patch may make the charge/dis-charge path more complex for
> race swapin, I'll re-check this part.

It is more than just complex, it seems to change the behavior of this code.

Chris
Kairui Song Nov. 22, 2023, 5:32 p.m. UTC | #4
Chris Li <chrisl@kernel.org> 于2023年11月21日周二 01:44写道:
>
> On Mon, Nov 20, 2023 at 3:15 AM Kairui Song <ryncsn@gmail.com> wrote:
> > > > diff --git a/mm/swap_state.c b/mm/swap_state.c
> > > > index ac4fa404eaa7..45dd8b7c195d 100644
> > > > --- a/mm/swap_state.c
> > > > +++ b/mm/swap_state.c
> > > > @@ -458,6 +458,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
> > >
> > > You move the mem_cgroup_swapin_charge_folio() inside the for loop:
> > >
> > >
> > >         for (;;) {
> > >                 int err;
> > >                 /*
> > >                  * First check the swap cache.  Since this is normally
> > >                  * called after swap_cache_get_folio() failed, re-calling
> > >                  * that would confuse statistics.
> > >                  */
> > >                 folio = filemap_get_folio(swap_address_space(entry),
> > >                                                 swp_offset(entry));
> > >
> > >
> > > >                                                 mpol, ilx, numa_node_id());
> > > >                 if (!folio)
> > > >                          goto fail_put_swap;
> > > > +               if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
> > > > +                       goto fail_put_folio;
> > >
> > > Wouldn't it cause repeat charging of the folio when it is racing
> > > against others in the for loop?
> >
> > The race loser will call folio_put and discharge it?
>
> There are two different charges. Memcg charging and memcg swapin charging.
> The folio_put will do the memcg discharge, the corresponding memcg
> charge is in follio allocation.

Hi Chris,

I didn't get your idea here... By "memcg swapin charge", do you mean
"memory.swap.*"? And "memcg charging" means "memory.*"?. There is no
memcg charge related code in folio allocation (alloc_pages_mpol),
actually the mem_cgroup_swapin_charge_folio here is doing memcg charge
not memcg swapin charge. Swapin path actually need to uncharge
"memory.swap" by mem_cgroup_swapin_uncharge_swap in later part of this
function.


> Memcg swapin charge does things differently, it needs to modify the
> swap relately accounting.
> The memcg uncharge is not a pair for memcg swapin charge.
>
> > > >                 /*
> > > >                  * Swap entry may have been freed since our caller observed it.
> > > > @@ -483,13 +485,9 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
> > > >         /*
> > > >          * The swap entry is ours to swap in. Prepare the new page.
> > > >          */
> > > > -
> > > >         __folio_set_locked(folio);
> > > >         __folio_set_swapbacked(folio);
> > > >
> > > > -       if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
> > > > -               goto fail_unlock;
> > > > -
> > >
> > > The original code makes the charge outside of the for loop. Only the
> > > winner can charge once.
> >
> > Right, this patch may make the charge/dis-charge path more complex for
> > race swapin, I'll re-check this part.
>
> It is more than just complex, it seems to change the behavior of this code.
>
> Chris
Chris Li Nov. 22, 2023, 8:57 p.m. UTC | #5
Hi Kairui,

On Wed, Nov 22, 2023 at 9:33 AM Kairui Song <ryncsn@gmail.com> wrote:

> > There are two different charges. Memcg charging and memcg swapin charging.
> > The folio_put will do the memcg discharge, the corresponding memcg
> > charge is in follio allocation.
>
> Hi Chris,
>
> I didn't get your idea here... By "memcg swapin charge", do you mean
> "memory.swap.*"? And "memcg charging" means "memory.*"?. There is no

Sorry I should have used the function name then there is no ambiguity.
"memcg swapin charge" I mean function mem_cgroup_swapin_charge_folio().
This function will look up the swap entry and find the memcg by swap entry then
charge to that memcg.

> memcg charge related code in folio allocation (alloc_pages_mpol),
> actually the mem_cgroup_swapin_charge_folio here is doing memcg charge
> not memcg swapin charge. Swapin path actually need to uncharge
> "memory.swap" by mem_cgroup_swapin_uncharge_swap in later part of this
> function.

I still think you have a bug there.

Take this make up example:
Let say the for loop runs 3 times and the 3rd time breaks out the for loop.
The original code will call:
filemap_get_folio() 3 times
folio_put() 2 times
mem_cgroup_swapin_charge_folio() 1 time.

With your patch, it will call:
filemap_get_folio() 3 times
folio_put() 2 times
mem_cgroup_swapin_charge_folio() 3 times.

Do you see the behavior difference there?

Chris
Kairui Song Nov. 24, 2023, 8:14 a.m. UTC | #6
Chris Li <chrisl@kernel.org> 于2023年11月23日周四 04:57写道:
>
> Hi Kairui,
>
> On Wed, Nov 22, 2023 at 9:33 AM Kairui Song <ryncsn@gmail.com> wrote:
>
> > > There are two different charges. Memcg charging and memcg swapin charging.
> > > The folio_put will do the memcg discharge, the corresponding memcg
> > > charge is in follio allocation.
> >
> > Hi Chris,
> >
> > I didn't get your idea here... By "memcg swapin charge", do you mean
> > "memory.swap.*"? And "memcg charging" means "memory.*"?. There is no
>
> Sorry I should have used the function name then there is no ambiguity.
> "memcg swapin charge" I mean function mem_cgroup_swapin_charge_folio().
> This function will look up the swap entry and find the memcg by swap entry then
> charge to that memcg.
>
> > memcg charge related code in folio allocation (alloc_pages_mpol),
> > actually the mem_cgroup_swapin_charge_folio here is doing memcg charge
> > not memcg swapin charge. Swapin path actually need to uncharge
> > "memory.swap" by mem_cgroup_swapin_uncharge_swap in later part of this
> > function.
>
> I still think you have a bug there.
>
> Take this make up example:
> Let say the for loop runs 3 times and the 3rd time breaks out the for loop.
> The original code will call:
> filemap_get_folio() 3 times
> folio_put() 2 times
> mem_cgroup_swapin_charge_folio() 1 time.
>
> With your patch, it will call:
> filemap_get_folio() 3 times
> folio_put() 2 times
> mem_cgroup_swapin_charge_folio() 3 times.
>
> Do you see the behavior difference there?

Hi Chris.

folio_put will discharge a page if it's charged, in original code the
2 folio_put call simply free the page since it's not charged. But in
this patch, folio_put will cancel previous
mem_cgroup_swapin_charge_folio call, so actually the 3
mem_cgroup_swapin_charge_folio calls will only charge once. (2 calls
was cancelled by folio_put).

I think this is making it confusing indeed and causing more trouble in
error path (the uncharge could be more expensive than unlock check),
will rework this part.
Chris Li Nov. 24, 2023, 8:37 a.m. UTC | #7
On Fri, Nov 24, 2023 at 12:15 AM Kairui Song <ryncsn@gmail.com> wrote:
>
>
> folio_put will discharge a page if it's charged, in original code the
> 2 folio_put call simply free the page since it's not charged. But in
> this patch, folio_put will cancel previous
> mem_cgroup_swapin_charge_folio call, so actually the 3
> mem_cgroup_swapin_charge_folio calls will only charge once. (2 calls
> was cancelled by folio_put).

You are saying the original code case folio_put() will be free without
uncharge. But with your patch folio_put() will free and cancel
mem_cgroup_swapin_charge_folio() charge. That is because the
folio->memcg_data was not set in the first case and folio->memcg_data
was set in the second case?

>
> I think this is making it confusing indeed and causing more trouble in
> error path (the uncharge could be more expensive than unlock check),
> will rework this part.

Agree. Thanks.

Chris
diff mbox series

Patch

diff --git a/mm/swap_state.c b/mm/swap_state.c
index ac4fa404eaa7..45dd8b7c195d 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -458,6 +458,8 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 						mpol, ilx, numa_node_id());
 		if (!folio)
                         goto fail_put_swap;
+		if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
+			goto fail_put_folio;
 
 		/*
 		 * Swap entry may have been freed since our caller observed it.
@@ -483,13 +485,9 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	/*
 	 * The swap entry is ours to swap in. Prepare the new page.
 	 */
-
 	__folio_set_locked(folio);
 	__folio_set_swapbacked(folio);
 
-	if (mem_cgroup_swapin_charge_folio(folio, NULL, gfp_mask, entry))
-		goto fail_unlock;
-
 	/* May fail (-ENOMEM) if XArray node allocation failed. */
 	if (add_to_swap_cache(folio, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow))
 		goto fail_unlock;
@@ -510,6 +508,7 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 fail_unlock:
 	put_swap_folio(folio, entry);
 	folio_unlock(folio);
+fail_put_folio:
 	folio_put(folio);
 fail_put_swap:
 	put_swap_device(si);
@@ -873,16 +872,15 @@  struct page *swapin_no_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
 				vma, vmf->address, false);
 	if (folio) {
-		__folio_set_locked(folio);
-		__folio_set_swapbacked(folio);
-
-		if (mem_cgroup_swapin_charge_folio(folio,
-					vma->vm_mm, GFP_KERNEL,
-					entry)) {
-			folio_unlock(folio);
+		if (mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
+						   GFP_KERNEL, entry)) {
 			folio_put(folio);
 			return NULL;
 		}
+
+		__folio_set_locked(folio);
+		__folio_set_swapbacked(folio);
+
 		mem_cgroup_swapin_uncharge_swap(entry);
 
 		shadow = get_shadow_from_swap_cache(entry);