diff mbox series

[3/4] mm/swap_cgroup: simplify swap cgroup definitions

Message ID 20241202184154.19321-4-ryncsn@gmail.com (mailing list archive)
State New
Headers show
Series mm/swap_cgroup: remove global swap cgroup lock | expand

Commit Message

Kairui Song Dec. 2, 2024, 6:41 p.m. UTC
From: Kairui Song <kasong@tencent.com>

Remove the intermediate struct swap_cgroup, it just a unsigned short
wrapper, simplify the code.

Also zero the map on initialization to prevent unexpected behaviour as
swap cgroup helpers are suppose to return 0 on error.

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 mm/swap_cgroup.c | 45 +++++++++++++++++++--------------------------
 1 file changed, 19 insertions(+), 26 deletions(-)

Comments

Yosry Ahmed Dec. 2, 2024, 7:25 p.m. UTC | #1
On Mon, Dec 2, 2024 at 10:42 AM Kairui Song <ryncsn@gmail.com> wrote:
>
> From: Kairui Song <kasong@tencent.com>
>
> Remove the intermediate struct swap_cgroup, it just a unsigned short
> wrapper, simplify the code.

Did you actually remove the struct? It doesn't seem like it.

>
> Also zero the map on initialization to prevent unexpected behaviour as
> swap cgroup helpers are suppose to return 0 on error.

All the callers lookup the id of an already swapped out page, so it
should never be uninitialized. Maybe we should WARN if the result of
the lookup is 0 in this case?

>
> Signed-off-by: Kairui Song <kasong@tencent.com>
> ---
>  mm/swap_cgroup.c | 45 +++++++++++++++++++--------------------------
>  1 file changed, 19 insertions(+), 26 deletions(-)
>
> diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
> index 1770b076f6b7..a76afdc3666a 100644
> --- a/mm/swap_cgroup.c
> +++ b/mm/swap_cgroup.c
> @@ -12,14 +12,12 @@ struct swap_cgroup {
>  };
>
>  struct swap_cgroup_ctrl {
> -       struct swap_cgroup *map;
> +       unsigned short  *map;
>         spinlock_t      lock;
>  };
>
>  static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
>
> -#define SC_PER_PAGE    (PAGE_SIZE/sizeof(struct swap_cgroup))
> -
>  /*
>   * SwapCgroup implements "lookup" and "exchange" operations.
>   * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
> @@ -33,18 +31,6 @@ static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
>   *
>   * TODO: we can push these buffers out to HIGHMEM.
>   */
> -static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
> -                                       struct swap_cgroup_ctrl **ctrlp)
> -{
> -       pgoff_t offset = swp_offset(ent);
> -       struct swap_cgroup_ctrl *ctrl;
> -
> -       ctrl = &swap_cgroup_ctrl[swp_type(ent)];
> -       if (ctrlp)
> -               *ctrlp = ctrl;
> -       return &ctrl->map[offset];
> -}
> -
>  /**
>   * swap_cgroup_record - record mem_cgroup for a set of swap entries
>   * @ent: the first swap entry to be recorded into
> @@ -58,20 +44,21 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
>                                   unsigned int nr_ents)
>  {
>         struct swap_cgroup_ctrl *ctrl;
> -       struct swap_cgroup *sc;
> +       unsigned short *map;
>         unsigned short old;
>         unsigned long flags;
>         pgoff_t offset = swp_offset(ent);
>         pgoff_t end = offset + nr_ents;
>
> -       sc = lookup_swap_cgroup(ent, &ctrl);
> +       ctrl = &swap_cgroup_ctrl[swp_type(ent)];
> +       map = ctrl->map;
>
>         spin_lock_irqsave(&ctrl->lock, flags);
> -       old = sc->id;
> -       for (; offset < end; offset++, sc++) {
> -               VM_BUG_ON(sc->id != old);
> -               sc->id = id;
> -       }
> +       old = map[offset];
> +       do {
> +               VM_BUG_ON(map[offset] != old);
> +               map[offset] = id;
> +       } while (++offset != end);

Why did you change the for loop here?

>         spin_unlock_irqrestore(&ctrl->lock, flags);
>
>         return old;
> @@ -85,20 +72,26 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
>   */
>  unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
>  {
> +       struct swap_cgroup_ctrl *ctrl;
> +
>         if (mem_cgroup_disabled())
>                 return 0;
> -       return lookup_swap_cgroup(ent, NULL)->id;
> +
> +       ctrl = &swap_cgroup_ctrl[swp_type(ent)];
> +       pgoff_t offset = swp_offset(ent);
> +
> +       return READ_ONCE(ctrl->map[offset]);

The READ_ONCE() does not exist today in lookup_swap_cgroup(). Why is it needed?

>  }
>
>  int swap_cgroup_swapon(int type, unsigned long max_pages)
>  {
> -       struct swap_cgroup *map;
> +       void *map;
>         struct swap_cgroup_ctrl *ctrl;
>
>         if (mem_cgroup_disabled())
>                 return 0;
>
> -       map = vcalloc(max_pages, sizeof(struct swap_cgroup));
> +       map = vzalloc(max_pages * sizeof(unsigned short));
>         if (!map)
>                 goto nomem;
>
> @@ -117,7 +110,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages)
>
>  void swap_cgroup_swapoff(int type)
>  {
> -       struct swap_cgroup *map;
> +       void *map;

Why void?

>         struct swap_cgroup_ctrl *ctrl;
>
>         if (mem_cgroup_disabled())
> --
> 2.47.0
>
Roman Gushchin Dec. 2, 2024, 10:34 p.m. UTC | #2
On Tue, Dec 03, 2024 at 02:41:53AM +0800, Kairui Song wrote:
> From: Kairui Song <kasong@tencent.com>
> 
> Remove the intermediate struct swap_cgroup, it just a unsigned short
> wrapper, simplify the code.
> 
> Also zero the map on initialization to prevent unexpected behaviour as
> swap cgroup helpers are suppose to return 0 on error.
> 
> Signed-off-by: Kairui Song <kasong@tencent.com>

Please, merge this one into the next one, so it's easier to follow
how the end result looks like.

It seems like v2 is coming for the next patch in any case.

Thanks!
Chris Li Dec. 4, 2024, 9:14 p.m. UTC | #3
On Mon, Dec 2, 2024 at 11:26 AM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> On Mon, Dec 2, 2024 at 10:42 AM Kairui Song <ryncsn@gmail.com> wrote:
> >
> > From: Kairui Song <kasong@tencent.com>
> >
> > Remove the intermediate struct swap_cgroup, it just a unsigned short
> > wrapper, simplify the code.
>
> Did you actually remove the struct? It doesn't seem like it.

Right, I did not see the struct get removed either.

Anyway I will wait for the V2 to review again.

Chris

>
> >
> > Also zero the map on initialization to prevent unexpected behaviour as
> > swap cgroup helpers are suppose to return 0 on error.
>
> All the callers lookup the id of an already swapped out page, so it
> should never be uninitialized. Maybe we should WARN if the result of
> the lookup is 0 in this case?
>
> >
> > Signed-off-by: Kairui Song <kasong@tencent.com>
> > ---
> >  mm/swap_cgroup.c | 45 +++++++++++++++++++--------------------------
> >  1 file changed, 19 insertions(+), 26 deletions(-)
> >
> > diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
> > index 1770b076f6b7..a76afdc3666a 100644
> > --- a/mm/swap_cgroup.c
> > +++ b/mm/swap_cgroup.c
> > @@ -12,14 +12,12 @@ struct swap_cgroup {
> >  };
> >
> >  struct swap_cgroup_ctrl {
> > -       struct swap_cgroup *map;
> > +       unsigned short  *map;
> >         spinlock_t      lock;
> >  };
> >
> >  static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
> >
> > -#define SC_PER_PAGE    (PAGE_SIZE/sizeof(struct swap_cgroup))
> > -
> >  /*
> >   * SwapCgroup implements "lookup" and "exchange" operations.
> >   * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
> > @@ -33,18 +31,6 @@ static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
> >   *
> >   * TODO: we can push these buffers out to HIGHMEM.
> >   */
> > -static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
> > -                                       struct swap_cgroup_ctrl **ctrlp)
> > -{
> > -       pgoff_t offset = swp_offset(ent);
> > -       struct swap_cgroup_ctrl *ctrl;
> > -
> > -       ctrl = &swap_cgroup_ctrl[swp_type(ent)];
> > -       if (ctrlp)
> > -               *ctrlp = ctrl;
> > -       return &ctrl->map[offset];
> > -}
> > -
> >  /**
> >   * swap_cgroup_record - record mem_cgroup for a set of swap entries
> >   * @ent: the first swap entry to be recorded into
> > @@ -58,20 +44,21 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
> >                                   unsigned int nr_ents)
> >  {
> >         struct swap_cgroup_ctrl *ctrl;
> > -       struct swap_cgroup *sc;
> > +       unsigned short *map;
> >         unsigned short old;
> >         unsigned long flags;
> >         pgoff_t offset = swp_offset(ent);
> >         pgoff_t end = offset + nr_ents;
> >
> > -       sc = lookup_swap_cgroup(ent, &ctrl);
> > +       ctrl = &swap_cgroup_ctrl[swp_type(ent)];
> > +       map = ctrl->map;
> >
> >         spin_lock_irqsave(&ctrl->lock, flags);
> > -       old = sc->id;
> > -       for (; offset < end; offset++, sc++) {
> > -               VM_BUG_ON(sc->id != old);
> > -               sc->id = id;
> > -       }
> > +       old = map[offset];
> > +       do {
> > +               VM_BUG_ON(map[offset] != old);
> > +               map[offset] = id;
> > +       } while (++offset != end);
>
> Why did you change the for loop here?
>
> >         spin_unlock_irqrestore(&ctrl->lock, flags);
> >
> >         return old;
> > @@ -85,20 +72,26 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
> >   */
> >  unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
> >  {
> > +       struct swap_cgroup_ctrl *ctrl;
> > +
> >         if (mem_cgroup_disabled())
> >                 return 0;
> > -       return lookup_swap_cgroup(ent, NULL)->id;
> > +
> > +       ctrl = &swap_cgroup_ctrl[swp_type(ent)];
> > +       pgoff_t offset = swp_offset(ent);
> > +
> > +       return READ_ONCE(ctrl->map[offset]);
>
> The READ_ONCE() does not exist today in lookup_swap_cgroup(). Why is it needed?
>
> >  }
> >
> >  int swap_cgroup_swapon(int type, unsigned long max_pages)
> >  {
> > -       struct swap_cgroup *map;
> > +       void *map;
> >         struct swap_cgroup_ctrl *ctrl;
> >
> >         if (mem_cgroup_disabled())
> >                 return 0;
> >
> > -       map = vcalloc(max_pages, sizeof(struct swap_cgroup));
> > +       map = vzalloc(max_pages * sizeof(unsigned short));
> >         if (!map)
> >                 goto nomem;
> >
> > @@ -117,7 +110,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages)
> >
> >  void swap_cgroup_swapoff(int type)
> >  {
> > -       struct swap_cgroup *map;
> > +       void *map;
>
> Why void?
>
> >         struct swap_cgroup_ctrl *ctrl;
> >
> >         if (mem_cgroup_disabled())
> > --
> > 2.47.0
> >
Kairui Song Dec. 10, 2024, 8:15 a.m. UTC | #4
On Tue, Dec 3, 2024 at 3:26 AM Yosry Ahmed <yosryahmed@google.com> wrote:
>
> On Mon, Dec 2, 2024 at 10:42 AM Kairui Song <ryncsn@gmail.com> wrote:
> >
> > From: Kairui Song <kasong@tencent.com>
> >
> > Remove the intermediate struct swap_cgroup, it just a unsigned short
> > wrapper, simplify the code.
>
> Did you actually remove the struct? It doesn't seem like it.

Oops, I forgot to drop these lines of code indeed, I'll send V2
merging this into patch 4/4 so this commit can be ignored for now.

>
> >
> > Also zero the map on initialization to prevent unexpected behaviour as
> > swap cgroup helpers are suppose to return 0 on error.
>
> All the callers lookup the id of an already swapped out page, so it
> should never be uninitialized. Maybe we should WARN if the result of
> the lookup is 0 in this case?

Yes, just a fallback for robustness.

Lookup returning 0 is expected in some cases, eg. Cgroup V1 will call
mem_cgroup_swapin_uncharge_swap early when the folio is added to swap
cache, and erase the record. Then the mem_cgroup_uncharge_swap in
swapfile.c (called when the swap cache is being dropped and entry is
freed) won't double uncharge it. So we can't WARN on that.

>
> >
> > Signed-off-by: Kairui Song <kasong@tencent.com>
> > ---
> >  mm/swap_cgroup.c | 45 +++++++++++++++++++--------------------------
> >  1 file changed, 19 insertions(+), 26 deletions(-)
> >
> > diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
> > index 1770b076f6b7..a76afdc3666a 100644
> > --- a/mm/swap_cgroup.c
> > +++ b/mm/swap_cgroup.c
> > @@ -12,14 +12,12 @@ struct swap_cgroup {
> >  };
> >
> >  struct swap_cgroup_ctrl {
> > -       struct swap_cgroup *map;
> > +       unsigned short  *map;
> >         spinlock_t      lock;
> >  };
> >
> >  static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
> >
> > -#define SC_PER_PAGE    (PAGE_SIZE/sizeof(struct swap_cgroup))
> > -
> >  /*
> >   * SwapCgroup implements "lookup" and "exchange" operations.
> >   * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
> > @@ -33,18 +31,6 @@ static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
> >   *
> >   * TODO: we can push these buffers out to HIGHMEM.
> >   */
> > -static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
> > -                                       struct swap_cgroup_ctrl **ctrlp)
> > -{
> > -       pgoff_t offset = swp_offset(ent);
> > -       struct swap_cgroup_ctrl *ctrl;
> > -
> > -       ctrl = &swap_cgroup_ctrl[swp_type(ent)];
> > -       if (ctrlp)
> > -               *ctrlp = ctrl;
> > -       return &ctrl->map[offset];
> > -}
> > -
> >  /**
> >   * swap_cgroup_record - record mem_cgroup for a set of swap entries
> >   * @ent: the first swap entry to be recorded into
> > @@ -58,20 +44,21 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
> >                                   unsigned int nr_ents)
> >  {
> >         struct swap_cgroup_ctrl *ctrl;
> > -       struct swap_cgroup *sc;
> > +       unsigned short *map;
> >         unsigned short old;
> >         unsigned long flags;
> >         pgoff_t offset = swp_offset(ent);
> >         pgoff_t end = offset + nr_ents;
> >
> > -       sc = lookup_swap_cgroup(ent, &ctrl);
> > +       ctrl = &swap_cgroup_ctrl[swp_type(ent)];
> > +       map = ctrl->map;
> >
> >         spin_lock_irqsave(&ctrl->lock, flags);
> > -       old = sc->id;
> > -       for (; offset < end; offset++, sc++) {
> > -               VM_BUG_ON(sc->id != old);
> > -               sc->id = id;
> > -       }
> > +       old = map[offset];
> > +       do {
> > +               VM_BUG_ON(map[offset] != old);
> > +               map[offset] = id;
> > +       } while (++offset != end);
>
> Why did you change the for loop here?
>
> >         spin_unlock_irqrestore(&ctrl->lock, flags);
> >
> >         return old;
> > @@ -85,20 +72,26 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
> >   */
> >  unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
> >  {
> > +       struct swap_cgroup_ctrl *ctrl;
> > +
> >         if (mem_cgroup_disabled())
> >                 return 0;
> > -       return lookup_swap_cgroup(ent, NULL)->id;
> > +
> > +       ctrl = &swap_cgroup_ctrl[swp_type(ent)];
> > +       pgoff_t offset = swp_offset(ent);
> > +
> > +       return READ_ONCE(ctrl->map[offset]);
>
> The READ_ONCE() does not exist today in lookup_swap_cgroup(). Why is it needed?
>
> >  }
> >
> >  int swap_cgroup_swapon(int type, unsigned long max_pages)
> >  {
> > -       struct swap_cgroup *map;
> > +       void *map;
> >         struct swap_cgroup_ctrl *ctrl;
> >
> >         if (mem_cgroup_disabled())
> >                 return 0;
> >
> > -       map = vcalloc(max_pages, sizeof(struct swap_cgroup));
> > +       map = vzalloc(max_pages * sizeof(unsigned short));
> >         if (!map)
> >                 goto nomem;
> >
> > @@ -117,7 +110,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages)
> >
> >  void swap_cgroup_swapoff(int type)
> >  {
> > -       struct swap_cgroup *map;
> > +       void *map;
>
> Why void?
>
> >         struct swap_cgroup_ctrl *ctrl;
> >
> >         if (mem_cgroup_disabled())
> > --
> > 2.47.0
> >
>
diff mbox series

Patch

diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index 1770b076f6b7..a76afdc3666a 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -12,14 +12,12 @@  struct swap_cgroup {
 };
 
 struct swap_cgroup_ctrl {
-	struct swap_cgroup *map;
+	unsigned short	*map;
 	spinlock_t	lock;
 };
 
 static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
 
-#define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
-
 /*
  * SwapCgroup implements "lookup" and "exchange" operations.
  * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
@@ -33,18 +31,6 @@  static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
  *
  * TODO: we can push these buffers out to HIGHMEM.
  */
-static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
-					struct swap_cgroup_ctrl **ctrlp)
-{
-	pgoff_t offset = swp_offset(ent);
-	struct swap_cgroup_ctrl *ctrl;
-
-	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
-	if (ctrlp)
-		*ctrlp = ctrl;
-	return &ctrl->map[offset];
-}
-
 /**
  * swap_cgroup_record - record mem_cgroup for a set of swap entries
  * @ent: the first swap entry to be recorded into
@@ -58,20 +44,21 @@  unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
 				  unsigned int nr_ents)
 {
 	struct swap_cgroup_ctrl *ctrl;
-	struct swap_cgroup *sc;
+	unsigned short *map;
 	unsigned short old;
 	unsigned long flags;
 	pgoff_t offset = swp_offset(ent);
 	pgoff_t end = offset + nr_ents;
 
-	sc = lookup_swap_cgroup(ent, &ctrl);
+	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
+	map = ctrl->map;
 
 	spin_lock_irqsave(&ctrl->lock, flags);
-	old = sc->id;
-	for (; offset < end; offset++, sc++) {
-		VM_BUG_ON(sc->id != old);
-		sc->id = id;
-	}
+	old = map[offset];
+	do {
+		VM_BUG_ON(map[offset] != old);
+		map[offset] = id;
+	} while (++offset != end);
 	spin_unlock_irqrestore(&ctrl->lock, flags);
 
 	return old;
@@ -85,20 +72,26 @@  unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
  */
 unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
 {
+	struct swap_cgroup_ctrl *ctrl;
+
 	if (mem_cgroup_disabled())
 		return 0;
-	return lookup_swap_cgroup(ent, NULL)->id;
+
+	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
+	pgoff_t offset = swp_offset(ent);
+
+	return READ_ONCE(ctrl->map[offset]);
 }
 
 int swap_cgroup_swapon(int type, unsigned long max_pages)
 {
-	struct swap_cgroup *map;
+	void *map;
 	struct swap_cgroup_ctrl *ctrl;
 
 	if (mem_cgroup_disabled())
 		return 0;
 
-	map = vcalloc(max_pages, sizeof(struct swap_cgroup));
+	map = vzalloc(max_pages * sizeof(unsigned short));
 	if (!map)
 		goto nomem;
 
@@ -117,7 +110,7 @@  int swap_cgroup_swapon(int type, unsigned long max_pages)
 
 void swap_cgroup_swapoff(int type)
 {
-	struct swap_cgroup *map;
+	void *map;
 	struct swap_cgroup_ctrl *ctrl;
 
 	if (mem_cgroup_disabled())