diff mbox series

[5/5] mm/zswap: cleanup zswap_reclaim_entry()

Message ID 20231213-zswap-dstmem-v1-5-896763369d04@bytedance.com (mailing list archive)
State New
Headers show
Series mm/zswap: dstmem reuse optimizations and cleanups | expand

Commit Message

Chengming Zhou Dec. 13, 2023, 4:18 a.m. UTC
Also after the common decompress part goes to __zswap_load(), we can
cleanup the zswap_reclaim_entry() a little.

Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
---
 mm/zswap.c | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

Comments

Nhat Pham Dec. 13, 2023, 11:27 p.m. UTC | #1
On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou
<zhouchengming@bytedance.com> wrote:
>
> Also after the common decompress part goes to __zswap_load(), we can
> cleanup the zswap_reclaim_entry() a little.
>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
>  mm/zswap.c | 23 +++++------------------
>  1 file changed, 5 insertions(+), 18 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 0476e1c553c2..9c709368a0e6 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1449,7 +1449,6 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>         struct page *page;
>         struct mempolicy *mpol;
>         bool page_was_allocated;
> -       int ret;
>         struct writeback_control wbc = {
>                 .sync_mode = WB_SYNC_NONE,
>         };
> @@ -1458,16 +1457,13 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>         mpol = get_task_policy(current);
>         page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
>                                 NO_INTERLEAVE_INDEX, &page_was_allocated, true);
> -       if (!page) {
> -               ret = -ENOMEM;
> -               goto fail;
> -       }
> +       if (!page)
> +               return -ENOMEM;
>
>         /* Found an existing page, we raced with load/swapin */
>         if (!page_was_allocated) {
>                 put_page(page);
> -               ret = -EEXIST;
> -               goto fail;
> +               return -EEXIST;
>         }
>
>         /*
> @@ -1481,8 +1477,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>         if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) {
>                 spin_unlock(&tree->lock);
>                 delete_from_swap_cache(page_folio(page));
> -               ret = -ENOMEM;
> -               goto fail;
> +               return -ENOMEM;
>         }
>         spin_unlock(&tree->lock);
>
> @@ -1503,15 +1498,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>         __swap_writepage(page, &wbc);
>         put_page(page);
>
> -       return ret;
> -
> -fail:
> -       /*
> -        * If we get here because the page is already in swapcache, a
> -        * load may be happening concurrently. It is safe and okay to
> -        * not free the entry. It is also okay to return !0.
> -        */
> -       return ret;
> +       return 0;
>  }
>
>  static int zswap_is_page_same_filled(void *ptr, unsigned long *value)
>
> --
> b4 0.10.1

LGTM. The fail label was primarily to free the temporary memory. Since
we're re-using dstmem, this isn't really needed anymore. Nice cleanup.
So, FWIW:
Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Yosry Ahmed Dec. 14, 2023, 1:02 a.m. UTC | #2
On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou
<zhouchengming@bytedance.com> wrote:
>
> Also after the common decompress part goes to __zswap_load(), we can
> cleanup the zswap_reclaim_entry() a little.

I think you mean zswap_writeback_entry(), same for the commit title.

>
> Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
> ---
>  mm/zswap.c | 23 +++++------------------
>  1 file changed, 5 insertions(+), 18 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 0476e1c553c2..9c709368a0e6 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1449,7 +1449,6 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>         struct page *page;
>         struct mempolicy *mpol;
>         bool page_was_allocated;
> -       int ret;
>         struct writeback_control wbc = {
>                 .sync_mode = WB_SYNC_NONE,
>         };
> @@ -1458,16 +1457,13 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>         mpol = get_task_policy(current);
>         page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
>                                 NO_INTERLEAVE_INDEX, &page_was_allocated, true);
> -       if (!page) {
> -               ret = -ENOMEM;
> -               goto fail;
> -       }
> +       if (!page)
> +               return -ENOMEM;
>
>         /* Found an existing page, we raced with load/swapin */
>         if (!page_was_allocated) {
>                 put_page(page);
> -               ret = -EEXIST;
> -               goto fail;
> +               return -EEXIST;
>         }
>
>         /*
> @@ -1481,8 +1477,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>         if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) {
>                 spin_unlock(&tree->lock);
>                 delete_from_swap_cache(page_folio(page));
> -               ret = -ENOMEM;
> -               goto fail;
> +               return -ENOMEM;
>         }
>         spin_unlock(&tree->lock);
>
> @@ -1503,15 +1498,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
>         __swap_writepage(page, &wbc);
>         put_page(page);
>
> -       return ret;
> -
> -fail:
> -       /*
> -        * If we get here because the page is already in swapcache, a
> -        * load may be happening concurrently. It is safe and okay to
> -        * not free the entry. It is also okay to return !0.
> -        */

This comment should be moved above the failure check of
__read_swap_cache_async() above, not completely removed. With that:

Reviewed-by: Yosry Ahmed <yosryahmed@google.com>

Feel free to squash this patch into the one creating __zswap_load() or
leaving it as-is.
Andrew Morton Dec. 14, 2023, 10:23 p.m. UTC | #3
On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote:

> On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou
> <zhouchengming@bytedance.com> wrote:
> >
> > Also after the common decompress part goes to __zswap_load(), we can
> > cleanup the zswap_reclaim_entry() a little.
> 
> I think you mean zswap_writeback_entry(), same for the commit title.

I updated my copy of the changelog, thanks.

> > -       /*
> > -        * If we get here because the page is already in swapcache, a
> > -        * load may be happening concurrently. It is safe and okay to
> > -        * not free the entry. It is also okay to return !0.
> > -        */
> 
> This comment should be moved above the failure check of
> __read_swap_cache_async() above, not completely removed.

This?

--- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix
+++ a/mm/zswap.c
@@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct
 	mpol = get_task_policy(current);
 	page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
 				NO_INTERLEAVE_INDEX, &page_was_allocated, true);
-	if (!page)
+	if (!page) {
+		/*
+		 * If we get here because the page is already in swapcache, a
+		 * load may be happening concurrently. It is safe and okay to
+		 * not free the entry. It is also okay to return !0.
+		 */
 		return -ENOMEM;
+	}
 
 	/* Found an existing page, we raced with load/swapin */
 	if (!page_was_allocated) {
Yosry Ahmed Dec. 14, 2023, 10:41 p.m. UTC | #4
On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote:
>
> On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote:
>
> > On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou
> > <zhouchengming@bytedance.com> wrote:
> > >
> > > Also after the common decompress part goes to __zswap_load(), we can
> > > cleanup the zswap_reclaim_entry() a little.
> >
> > I think you mean zswap_writeback_entry(), same for the commit title.
>
> I updated my copy of the changelog, thanks.
>
> > > -       /*
> > > -        * If we get here because the page is already in swapcache, a
> > > -        * load may be happening concurrently. It is safe and okay to
> > > -        * not free the entry. It is also okay to return !0.
> > > -        */
> >
> > This comment should be moved above the failure check of
> > __read_swap_cache_async() above, not completely removed.
>
> This?

Yes, thanks a lot. Although I think a new version is needed anyway to
address other comments.

>
> --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix
> +++ a/mm/zswap.c
> @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct
>         mpol = get_task_policy(current);
>         page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
>                                 NO_INTERLEAVE_INDEX, &page_was_allocated, true);
> -       if (!page)
> +       if (!page) {
> +               /*
> +                * If we get here because the page is already in swapcache, a
> +                * load may be happening concurrently. It is safe and okay to
> +                * not free the entry. It is also okay to return !0.
> +                */
>                 return -ENOMEM;
> +       }
>
>         /* Found an existing page, we raced with load/swapin */
>         if (!page_was_allocated) {
>
Johannes Weiner Dec. 18, 2023, 2:03 p.m. UTC | #5
On Thu, Dec 14, 2023 at 02:41:26PM -0800, Yosry Ahmed wrote:
> On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote:
> >
> > On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote:
> >
> > > On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou
> > > <zhouchengming@bytedance.com> wrote:
> > > >
> > > > Also after the common decompress part goes to __zswap_load(), we can
> > > > cleanup the zswap_reclaim_entry() a little.
> > >
> > > I think you mean zswap_writeback_entry(), same for the commit title.
> >
> > I updated my copy of the changelog, thanks.
> >
> > > > -       /*
> > > > -        * If we get here because the page is already in swapcache, a
> > > > -        * load may be happening concurrently. It is safe and okay to
> > > > -        * not free the entry. It is also okay to return !0.
> > > > -        */
> > >
> > > This comment should be moved above the failure check of
> > > __read_swap_cache_async() above, not completely removed.
> >
> > This?
> 
> Yes, thanks a lot. Although I think a new version is needed anyway to
> address other comments.
> 
> >
> > --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix
> > +++ a/mm/zswap.c
> > @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct
> >         mpol = get_task_policy(current);
> >         page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
> >                                 NO_INTERLEAVE_INDEX, &page_was_allocated, true);
> > -       if (!page)
> > +       if (!page) {
> > +               /*
> > +                * If we get here because the page is already in swapcache, a
> > +                * load may be happening concurrently. It is safe and okay to
> > +                * not free the entry. It is also okay to return !0.
> > +                */
> >                 return -ENOMEM;
> > +       }
> >
> >         /* Found an existing page, we raced with load/swapin */
> >         if (!page_was_allocated) {

That's the wrong branch, no?

!page -> -ENOMEM

page && !page_was_allocated -> already in swapcache

Personally, I don't really get the comment. What does it mean that
it's "okay" not to free the entry? There is a put, which may or may
not free the entry if somebody else is using it. Is it explaining how
lifetime works for refcounted objects? I'm similarly confused by the
"it's okay" to return non-zero. What is that trying to convey?

Deletion seemed like the right choice here, IMO ;)
Yosry Ahmed Dec. 18, 2023, 2:39 p.m. UTC | #6
On Mon, Dec 18, 2023 at 6:03 AM Johannes Weiner <hannes@cmpxchg.org> wrote:
>
> On Thu, Dec 14, 2023 at 02:41:26PM -0800, Yosry Ahmed wrote:
> > On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote:
> > >
> > > On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote:
> > >
> > > > On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou
> > > > <zhouchengming@bytedance.com> wrote:
> > > > >
> > > > > Also after the common decompress part goes to __zswap_load(), we can
> > > > > cleanup the zswap_reclaim_entry() a little.
> > > >
> > > > I think you mean zswap_writeback_entry(), same for the commit title.
> > >
> > > I updated my copy of the changelog, thanks.
> > >
> > > > > -       /*
> > > > > -        * If we get here because the page is already in swapcache, a
> > > > > -        * load may be happening concurrently. It is safe and okay to
> > > > > -        * not free the entry. It is also okay to return !0.
> > > > > -        */
> > > >
> > > > This comment should be moved above the failure check of
> > > > __read_swap_cache_async() above, not completely removed.
> > >
> > > This?
> >
> > Yes, thanks a lot. Although I think a new version is needed anyway to
> > address other comments.
> >
> > >
> > > --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix
> > > +++ a/mm/zswap.c
> > > @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct
> > >         mpol = get_task_policy(current);
> > >         page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
> > >                                 NO_INTERLEAVE_INDEX, &page_was_allocated, true);
> > > -       if (!page)
> > > +       if (!page) {
> > > +               /*
> > > +                * If we get here because the page is already in swapcache, a
> > > +                * load may be happening concurrently. It is safe and okay to
> > > +                * not free the entry. It is also okay to return !0.
> > > +                */
> > >                 return -ENOMEM;
> > > +       }
> > >
> > >         /* Found an existing page, we raced with load/swapin */
> > >         if (!page_was_allocated) {
>
> That's the wrong branch, no?
>
> !page -> -ENOMEM
>
> page && !page_was_allocated -> already in swapcache

Ah yes, my bad.

>
> Personally, I don't really get the comment. What does it mean that
> it's "okay" not to free the entry? There is a put, which may or may
> not free the entry if somebody else is using it. Is it explaining how
> lifetime works for refcounted objects? I'm similarly confused by the
> "it's okay" to return non-zero. What is that trying to convey?
>
> Deletion seemed like the right choice here, IMO ;)

It's not the clearest of comments for sure. I think it is just trying
to say that it is okay not to write back the entry from zswap and to
fail, because the caller will just try another page. I did not like
silently deleting the comment during the refactoring. How about
rewriting it to something like:

/*
 * If we get here because the page is already in the swapcache, a
 * load may be happening concurrently. Skip this page, the caller
 * will move on to a different page.
 */
Johannes Weiner Dec. 18, 2023, 2:58 p.m. UTC | #7
On Mon, Dec 18, 2023 at 06:39:13AM -0800, Yosry Ahmed wrote:
> On Mon, Dec 18, 2023 at 6:03 AM Johannes Weiner <hannes@cmpxchg.org> wrote:
> >
> > On Thu, Dec 14, 2023 at 02:41:26PM -0800, Yosry Ahmed wrote:
> > > On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote:
> > > >
> > > > On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote:
> > > >
> > > > > On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou
> > > > > <zhouchengming@bytedance.com> wrote:
> > > > > >
> > > > > > Also after the common decompress part goes to __zswap_load(), we can
> > > > > > cleanup the zswap_reclaim_entry() a little.
> > > > >
> > > > > I think you mean zswap_writeback_entry(), same for the commit title.
> > > >
> > > > I updated my copy of the changelog, thanks.
> > > >
> > > > > > -       /*
> > > > > > -        * If we get here because the page is already in swapcache, a
> > > > > > -        * load may be happening concurrently. It is safe and okay to
> > > > > > -        * not free the entry. It is also okay to return !0.
> > > > > > -        */
> > > > >
> > > > > This comment should be moved above the failure check of
> > > > > __read_swap_cache_async() above, not completely removed.
> > > >
> > > > This?
> > >
> > > Yes, thanks a lot. Although I think a new version is needed anyway to
> > > address other comments.
> > >
> > > >
> > > > --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix
> > > > +++ a/mm/zswap.c
> > > > @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct
> > > >         mpol = get_task_policy(current);
> > > >         page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
> > > >                                 NO_INTERLEAVE_INDEX, &page_was_allocated, true);
> > > > -       if (!page)
> > > > +       if (!page) {
> > > > +               /*
> > > > +                * If we get here because the page is already in swapcache, a
> > > > +                * load may be happening concurrently. It is safe and okay to
> > > > +                * not free the entry. It is also okay to return !0.
> > > > +                */
> > > >                 return -ENOMEM;
> > > > +       }
> > > >
> > > >         /* Found an existing page, we raced with load/swapin */
> > > >         if (!page_was_allocated) {
> >
> > That's the wrong branch, no?
> >
> > !page -> -ENOMEM
> >
> > page && !page_was_allocated -> already in swapcache
> 
> Ah yes, my bad.
> 
> >
> > Personally, I don't really get the comment. What does it mean that
> > it's "okay" not to free the entry? There is a put, which may or may
> > not free the entry if somebody else is using it. Is it explaining how
> > lifetime works for refcounted objects? I'm similarly confused by the
> > "it's okay" to return non-zero. What is that trying to convey?
> >
> > Deletion seemed like the right choice here, IMO ;)
> 
> It's not the clearest of comments for sure. I think it is just trying
> to say that it is okay not to write back the entry from zswap and to
> fail, because the caller will just try another page. I did not like
> silently deleting the comment during the refactoring. How about
> rewriting it to something like:
> 
> /*
>  * If we get here because the page is already in the swapcache, a
>  * load may be happening concurrently. Skip this page, the caller
>  * will move on to a different page.
>  */

Well there is this one already on the branch:

/* Found an existing page, we raced with load/swapin */

which covers the first half. The unspoken assumption there is that
writeback is an operation for an aged out page, while swapin means the
age just got reset to 0. Maybe it makes sense to elaborate on that?
Yosry Ahmed Dec. 18, 2023, 8:52 p.m. UTC | #8
On Mon, Dec 18, 2023 at 6:58 AM Johannes Weiner <hannes@cmpxchg.org> wrote:
>
> On Mon, Dec 18, 2023 at 06:39:13AM -0800, Yosry Ahmed wrote:
> > On Mon, Dec 18, 2023 at 6:03 AM Johannes Weiner <hannes@cmpxchg.org> wrote:
> > >
> > > On Thu, Dec 14, 2023 at 02:41:26PM -0800, Yosry Ahmed wrote:
> > > > On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote:
> > > > >
> > > > > On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote:
> > > > >
> > > > > > On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou
> > > > > > <zhouchengming@bytedance.com> wrote:
> > > > > > >
> > > > > > > Also after the common decompress part goes to __zswap_load(), we can
> > > > > > > cleanup the zswap_reclaim_entry() a little.
> > > > > >
> > > > > > I think you mean zswap_writeback_entry(), same for the commit title.
> > > > >
> > > > > I updated my copy of the changelog, thanks.
> > > > >
> > > > > > > -       /*
> > > > > > > -        * If we get here because the page is already in swapcache, a
> > > > > > > -        * load may be happening concurrently. It is safe and okay to
> > > > > > > -        * not free the entry. It is also okay to return !0.
> > > > > > > -        */
> > > > > >
> > > > > > This comment should be moved above the failure check of
> > > > > > __read_swap_cache_async() above, not completely removed.
> > > > >
> > > > > This?
> > > >
> > > > Yes, thanks a lot. Although I think a new version is needed anyway to
> > > > address other comments.
> > > >
> > > > >
> > > > > --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix
> > > > > +++ a/mm/zswap.c
> > > > > @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct
> > > > >         mpol = get_task_policy(current);
> > > > >         page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
> > > > >                                 NO_INTERLEAVE_INDEX, &page_was_allocated, true);
> > > > > -       if (!page)
> > > > > +       if (!page) {
> > > > > +               /*
> > > > > +                * If we get here because the page is already in swapcache, a
> > > > > +                * load may be happening concurrently. It is safe and okay to
> > > > > +                * not free the entry. It is also okay to return !0.
> > > > > +                */
> > > > >                 return -ENOMEM;
> > > > > +       }
> > > > >
> > > > >         /* Found an existing page, we raced with load/swapin */
> > > > >         if (!page_was_allocated) {
> > >
> > > That's the wrong branch, no?
> > >
> > > !page -> -ENOMEM
> > >
> > > page && !page_was_allocated -> already in swapcache
> >
> > Ah yes, my bad.
> >
> > >
> > > Personally, I don't really get the comment. What does it mean that
> > > it's "okay" not to free the entry? There is a put, which may or may
> > > not free the entry if somebody else is using it. Is it explaining how
> > > lifetime works for refcounted objects? I'm similarly confused by the
> > > "it's okay" to return non-zero. What is that trying to convey?
> > >
> > > Deletion seemed like the right choice here, IMO ;)
> >
> > It's not the clearest of comments for sure. I think it is just trying
> > to say that it is okay not to write back the entry from zswap and to
> > fail, because the caller will just try another page. I did not like
> > silently deleting the comment during the refactoring. How about
> > rewriting it to something like:
> >
> > /*
> >  * If we get here because the page is already in the swapcache, a
> >  * load may be happening concurrently. Skip this page, the caller
> >  * will move on to a different page.
> >  */
>
> Well there is this one already on the branch:
>
> /* Found an existing page, we raced with load/swapin */
>
> which covers the first half. The unspoken assumption there is that
> writeback is an operation for an aged out page, while swapin means the
> age just got reset to 0. Maybe it makes sense to elaborate on that?

How about the following diff? This applies on top of Andrew's fix:

diff --git a/mm/zswap.c b/mm/zswap.c
index e8f8f47596dae..8228a0b370979 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1458,15 +1458,14 @@ static int zswap_writeback_entry(struct
zswap_entry *entry,
        page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
                                NO_INTERLEAVE_INDEX, &page_was_allocated, true);
        if (!page) {
-               /*
-                * If we get here because the page is already in swapcache, a
-                * load may be happening concurrently. It is safe and okay to
-                * not free the entry. It is also okay to return !0.
-                */
                return -ENOMEM;
        }

-       /* Found an existing page, we raced with load/swapin */
+       /*
+        * Found an existing page, we raced with load/swapin. We generally
+        * writeback cold pages from zswap, and swapin means the page just
+        * became hot. Skip this page and let the caller find another one.
+        */
        if (!page_was_allocated) {
                put_page(page);
                return -EEXIST;
Chengming Zhou Dec. 19, 2023, 12:16 p.m. UTC | #9
On 2023/12/19 04:52, Yosry Ahmed wrote:
> On Mon, Dec 18, 2023 at 6:58 AM Johannes Weiner <hannes@cmpxchg.org> wrote:
>>
>> On Mon, Dec 18, 2023 at 06:39:13AM -0800, Yosry Ahmed wrote:
>>> On Mon, Dec 18, 2023 at 6:03 AM Johannes Weiner <hannes@cmpxchg.org> wrote:
>>>>
>>>> On Thu, Dec 14, 2023 at 02:41:26PM -0800, Yosry Ahmed wrote:
>>>>> On Thu, Dec 14, 2023 at 2:23 PM Andrew Morton <akpm@linux-foundation.org> wrote:
>>>>>>
>>>>>> On Wed, 13 Dec 2023 17:02:25 -0800 Yosry Ahmed <yosryahmed@google.com> wrote:
>>>>>>
>>>>>>> On Tue, Dec 12, 2023 at 8:18 PM Chengming Zhou
>>>>>>> <zhouchengming@bytedance.com> wrote:
>>>>>>>>
>>>>>>>> Also after the common decompress part goes to __zswap_load(), we can
>>>>>>>> cleanup the zswap_reclaim_entry() a little.
>>>>>>>
>>>>>>> I think you mean zswap_writeback_entry(), same for the commit title.
>>>>>>
>>>>>> I updated my copy of the changelog, thanks.
>>>>>>
>>>>>>>> -       /*
>>>>>>>> -        * If we get here because the page is already in swapcache, a
>>>>>>>> -        * load may be happening concurrently. It is safe and okay to
>>>>>>>> -        * not free the entry. It is also okay to return !0.
>>>>>>>> -        */
>>>>>>>
>>>>>>> This comment should be moved above the failure check of
>>>>>>> __read_swap_cache_async() above, not completely removed.
>>>>>>
>>>>>> This?
>>>>>
>>>>> Yes, thanks a lot. Although I think a new version is needed anyway to
>>>>> address other comments.
>>>>>
>>>>>>
>>>>>> --- a/mm/zswap.c~mm-zswap-cleanup-zswap_reclaim_entry-fix
>>>>>> +++ a/mm/zswap.c
>>>>>> @@ -1457,8 +1457,14 @@ static int zswap_writeback_entry(struct
>>>>>>         mpol = get_task_policy(current);
>>>>>>         page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
>>>>>>                                 NO_INTERLEAVE_INDEX, &page_was_allocated, true);
>>>>>> -       if (!page)
>>>>>> +       if (!page) {
>>>>>> +               /*
>>>>>> +                * If we get here because the page is already in swapcache, a
>>>>>> +                * load may be happening concurrently. It is safe and okay to
>>>>>> +                * not free the entry. It is also okay to return !0.
>>>>>> +                */
>>>>>>                 return -ENOMEM;
>>>>>> +       }
>>>>>>
>>>>>>         /* Found an existing page, we raced with load/swapin */
>>>>>>         if (!page_was_allocated) {
>>>>
>>>> That's the wrong branch, no?
>>>>
>>>> !page -> -ENOMEM
>>>>
>>>> page && !page_was_allocated -> already in swapcache
>>>
>>> Ah yes, my bad.
>>>
>>>>
>>>> Personally, I don't really get the comment. What does it mean that
>>>> it's "okay" not to free the entry? There is a put, which may or may
>>>> not free the entry if somebody else is using it. Is it explaining how
>>>> lifetime works for refcounted objects? I'm similarly confused by the
>>>> "it's okay" to return non-zero. What is that trying to convey?
>>>>
>>>> Deletion seemed like the right choice here, IMO ;)
>>>
>>> It's not the clearest of comments for sure. I think it is just trying
>>> to say that it is okay not to write back the entry from zswap and to
>>> fail, because the caller will just try another page. I did not like
>>> silently deleting the comment during the refactoring. How about
>>> rewriting it to something like:
>>>
>>> /*
>>>  * If we get here because the page is already in the swapcache, a
>>>  * load may be happening concurrently. Skip this page, the caller
>>>  * will move on to a different page.
>>>  */
>>
>> Well there is this one already on the branch:
>>
>> /* Found an existing page, we raced with load/swapin */
>>
>> which covers the first half. The unspoken assumption there is that
>> writeback is an operation for an aged out page, while swapin means the
>> age just got reset to 0. Maybe it makes sense to elaborate on that?
> 
> How about the following diff? This applies on top of Andrew's fix:
> 

Reviewed-by: Chengming Zhou <zhouchengming@bytedance.com>

The latest v3 also put the comments on the wrong branch, and this diff
could be folded to fix it.

v3: https://lore.kernel.org/all/20231213-zswap-dstmem-v3-5-4eac09b94ece@bytedance.com/

> diff --git a/mm/zswap.c b/mm/zswap.c
> index e8f8f47596dae..8228a0b370979 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1458,15 +1458,14 @@ static int zswap_writeback_entry(struct
> zswap_entry *entry,
>         page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
>                                 NO_INTERLEAVE_INDEX, &page_was_allocated, true);
>         if (!page) {
> -               /*
> -                * If we get here because the page is already in swapcache, a
> -                * load may be happening concurrently. It is safe and okay to
> -                * not free the entry. It is also okay to return !0.
> -                */
>                 return -ENOMEM;
>         }
> 
> -       /* Found an existing page, we raced with load/swapin */
> +       /*
> +        * Found an existing page, we raced with load/swapin. We generally
> +        * writeback cold pages from zswap, and swapin means the page just
> +        * became hot. Skip this page and let the caller find another one.
> +        */
>         if (!page_was_allocated) {
>                 put_page(page);
>                 return -EEXIST;
Johannes Weiner Dec. 20, 2023, 4:30 a.m. UTC | #10
On Mon, Dec 18, 2023 at 12:52:40PM -0800, Yosry Ahmed wrote:
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1458,15 +1458,14 @@ static int zswap_writeback_entry(struct
> zswap_entry *entry,
>         page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
>                                 NO_INTERLEAVE_INDEX, &page_was_allocated, true);
>         if (!page) {
> -               /*
> -                * If we get here because the page is already in swapcache, a
> -                * load may be happening concurrently. It is safe and okay to
> -                * not free the entry. It is also okay to return !0.
> -                */
>                 return -ENOMEM;
>         }
> 
> -       /* Found an existing page, we raced with load/swapin */
> +       /*
> +        * Found an existing page, we raced with load/swapin. We generally
> +        * writeback cold pages from zswap, and swapin means the page just
> +        * became hot. Skip this page and let the caller find another one.
> +        */
>         if (!page_was_allocated) {
>                 put_page(page);
>                 return -EEXIST;

ACK, that looks good to me!

Thanks
diff mbox series

Patch

diff --git a/mm/zswap.c b/mm/zswap.c
index 0476e1c553c2..9c709368a0e6 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1449,7 +1449,6 @@  static int zswap_writeback_entry(struct zswap_entry *entry,
 	struct page *page;
 	struct mempolicy *mpol;
 	bool page_was_allocated;
-	int ret;
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_NONE,
 	};
@@ -1458,16 +1457,13 @@  static int zswap_writeback_entry(struct zswap_entry *entry,
 	mpol = get_task_policy(current);
 	page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
 				NO_INTERLEAVE_INDEX, &page_was_allocated, true);
-	if (!page) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	if (!page)
+		return -ENOMEM;
 
 	/* Found an existing page, we raced with load/swapin */
 	if (!page_was_allocated) {
 		put_page(page);
-		ret = -EEXIST;
-		goto fail;
+		return -EEXIST;
 	}
 
 	/*
@@ -1481,8 +1477,7 @@  static int zswap_writeback_entry(struct zswap_entry *entry,
 	if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) {
 		spin_unlock(&tree->lock);
 		delete_from_swap_cache(page_folio(page));
-		ret = -ENOMEM;
-		goto fail;
+		return -ENOMEM;
 	}
 	spin_unlock(&tree->lock);
 
@@ -1503,15 +1498,7 @@  static int zswap_writeback_entry(struct zswap_entry *entry,
 	__swap_writepage(page, &wbc);
 	put_page(page);
 
-	return ret;
-
-fail:
-	/*
-	 * If we get here because the page is already in swapcache, a
-	 * load may be happening concurrently. It is safe and okay to
-	 * not free the entry. It is also okay to return !0.
-	 */
-	return ret;
+	return 0;
 }
 
 static int zswap_is_page_same_filled(void *ptr, unsigned long *value)