Message ID | 20241202184154.19321-4-ryncsn@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/swap_cgroup: remove global swap cgroup lock | expand |
On Mon, Dec 2, 2024 at 10:42 AM Kairui Song <ryncsn@gmail.com> wrote: > > From: Kairui Song <kasong@tencent.com> > > Remove the intermediate struct swap_cgroup, it just a unsigned short > wrapper, simplify the code. Did you actually remove the struct? It doesn't seem like it. > > Also zero the map on initialization to prevent unexpected behaviour as > swap cgroup helpers are suppose to return 0 on error. All the callers lookup the id of an already swapped out page, so it should never be uninitialized. Maybe we should WARN if the result of the lookup is 0 in this case? > > Signed-off-by: Kairui Song <kasong@tencent.com> > --- > mm/swap_cgroup.c | 45 +++++++++++++++++++-------------------------- > 1 file changed, 19 insertions(+), 26 deletions(-) > > diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c > index 1770b076f6b7..a76afdc3666a 100644 > --- a/mm/swap_cgroup.c > +++ b/mm/swap_cgroup.c > @@ -12,14 +12,12 @@ struct swap_cgroup { > }; > > struct swap_cgroup_ctrl { > - struct swap_cgroup *map; > + unsigned short *map; > spinlock_t lock; > }; > > static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; > > -#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) > - > /* > * SwapCgroup implements "lookup" and "exchange" operations. > * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge > @@ -33,18 +31,6 @@ static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; > * > * TODO: we can push these buffers out to HIGHMEM. > */ > -static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent, > - struct swap_cgroup_ctrl **ctrlp) > -{ > - pgoff_t offset = swp_offset(ent); > - struct swap_cgroup_ctrl *ctrl; > - > - ctrl = &swap_cgroup_ctrl[swp_type(ent)]; > - if (ctrlp) > - *ctrlp = ctrl; > - return &ctrl->map[offset]; > -} > - > /** > * swap_cgroup_record - record mem_cgroup for a set of swap entries > * @ent: the first swap entry to be recorded into > @@ -58,20 +44,21 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, > unsigned int nr_ents) > { > struct swap_cgroup_ctrl *ctrl; > - struct swap_cgroup *sc; > + unsigned short *map; > unsigned short old; > unsigned long flags; > pgoff_t offset = swp_offset(ent); > pgoff_t end = offset + nr_ents; > > - sc = lookup_swap_cgroup(ent, &ctrl); > + ctrl = &swap_cgroup_ctrl[swp_type(ent)]; > + map = ctrl->map; > > spin_lock_irqsave(&ctrl->lock, flags); > - old = sc->id; > - for (; offset < end; offset++, sc++) { > - VM_BUG_ON(sc->id != old); > - sc->id = id; > - } > + old = map[offset]; > + do { > + VM_BUG_ON(map[offset] != old); > + map[offset] = id; > + } while (++offset != end); Why did you change the for loop here? > spin_unlock_irqrestore(&ctrl->lock, flags); > > return old; > @@ -85,20 +72,26 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, > */ > unsigned short lookup_swap_cgroup_id(swp_entry_t ent) > { > + struct swap_cgroup_ctrl *ctrl; > + > if (mem_cgroup_disabled()) > return 0; > - return lookup_swap_cgroup(ent, NULL)->id; > + > + ctrl = &swap_cgroup_ctrl[swp_type(ent)]; > + pgoff_t offset = swp_offset(ent); > + > + return READ_ONCE(ctrl->map[offset]); The READ_ONCE() does not exist today in lookup_swap_cgroup(). Why is it needed? > } > > int swap_cgroup_swapon(int type, unsigned long max_pages) > { > - struct swap_cgroup *map; > + void *map; > struct swap_cgroup_ctrl *ctrl; > > if (mem_cgroup_disabled()) > return 0; > > - map = vcalloc(max_pages, sizeof(struct swap_cgroup)); > + map = vzalloc(max_pages * sizeof(unsigned short)); > if (!map) > goto nomem; > > @@ -117,7 +110,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages) > > void swap_cgroup_swapoff(int type) > { > - struct swap_cgroup *map; > + void *map; Why void? > struct swap_cgroup_ctrl *ctrl; > > if (mem_cgroup_disabled()) > -- > 2.47.0 >
On Tue, Dec 03, 2024 at 02:41:53AM +0800, Kairui Song wrote: > From: Kairui Song <kasong@tencent.com> > > Remove the intermediate struct swap_cgroup, it just a unsigned short > wrapper, simplify the code. > > Also zero the map on initialization to prevent unexpected behaviour as > swap cgroup helpers are suppose to return 0 on error. > > Signed-off-by: Kairui Song <kasong@tencent.com> Please, merge this one into the next one, so it's easier to follow how the end result looks like. It seems like v2 is coming for the next patch in any case. Thanks!
On Mon, Dec 2, 2024 at 11:26 AM Yosry Ahmed <yosryahmed@google.com> wrote: > > On Mon, Dec 2, 2024 at 10:42 AM Kairui Song <ryncsn@gmail.com> wrote: > > > > From: Kairui Song <kasong@tencent.com> > > > > Remove the intermediate struct swap_cgroup, it just a unsigned short > > wrapper, simplify the code. > > Did you actually remove the struct? It doesn't seem like it. Right, I did not see the struct get removed either. Anyway I will wait for the V2 to review again. Chris > > > > > Also zero the map on initialization to prevent unexpected behaviour as > > swap cgroup helpers are suppose to return 0 on error. > > All the callers lookup the id of an already swapped out page, so it > should never be uninitialized. Maybe we should WARN if the result of > the lookup is 0 in this case? > > > > > Signed-off-by: Kairui Song <kasong@tencent.com> > > --- > > mm/swap_cgroup.c | 45 +++++++++++++++++++-------------------------- > > 1 file changed, 19 insertions(+), 26 deletions(-) > > > > diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c > > index 1770b076f6b7..a76afdc3666a 100644 > > --- a/mm/swap_cgroup.c > > +++ b/mm/swap_cgroup.c > > @@ -12,14 +12,12 @@ struct swap_cgroup { > > }; > > > > struct swap_cgroup_ctrl { > > - struct swap_cgroup *map; > > + unsigned short *map; > > spinlock_t lock; > > }; > > > > static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; > > > > -#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) > > - > > /* > > * SwapCgroup implements "lookup" and "exchange" operations. > > * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge > > @@ -33,18 +31,6 @@ static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; > > * > > * TODO: we can push these buffers out to HIGHMEM. > > */ > > -static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent, > > - struct swap_cgroup_ctrl **ctrlp) > > -{ > > - pgoff_t offset = swp_offset(ent); > > - struct swap_cgroup_ctrl *ctrl; > > - > > - ctrl = &swap_cgroup_ctrl[swp_type(ent)]; > > - if (ctrlp) > > - *ctrlp = ctrl; > > - return &ctrl->map[offset]; > > -} > > - > > /** > > * swap_cgroup_record - record mem_cgroup for a set of swap entries > > * @ent: the first swap entry to be recorded into > > @@ -58,20 +44,21 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, > > unsigned int nr_ents) > > { > > struct swap_cgroup_ctrl *ctrl; > > - struct swap_cgroup *sc; > > + unsigned short *map; > > unsigned short old; > > unsigned long flags; > > pgoff_t offset = swp_offset(ent); > > pgoff_t end = offset + nr_ents; > > > > - sc = lookup_swap_cgroup(ent, &ctrl); > > + ctrl = &swap_cgroup_ctrl[swp_type(ent)]; > > + map = ctrl->map; > > > > spin_lock_irqsave(&ctrl->lock, flags); > > - old = sc->id; > > - for (; offset < end; offset++, sc++) { > > - VM_BUG_ON(sc->id != old); > > - sc->id = id; > > - } > > + old = map[offset]; > > + do { > > + VM_BUG_ON(map[offset] != old); > > + map[offset] = id; > > + } while (++offset != end); > > Why did you change the for loop here? > > > spin_unlock_irqrestore(&ctrl->lock, flags); > > > > return old; > > @@ -85,20 +72,26 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, > > */ > > unsigned short lookup_swap_cgroup_id(swp_entry_t ent) > > { > > + struct swap_cgroup_ctrl *ctrl; > > + > > if (mem_cgroup_disabled()) > > return 0; > > - return lookup_swap_cgroup(ent, NULL)->id; > > + > > + ctrl = &swap_cgroup_ctrl[swp_type(ent)]; > > + pgoff_t offset = swp_offset(ent); > > + > > + return READ_ONCE(ctrl->map[offset]); > > The READ_ONCE() does not exist today in lookup_swap_cgroup(). Why is it needed? > > > } > > > > int swap_cgroup_swapon(int type, unsigned long max_pages) > > { > > - struct swap_cgroup *map; > > + void *map; > > struct swap_cgroup_ctrl *ctrl; > > > > if (mem_cgroup_disabled()) > > return 0; > > > > - map = vcalloc(max_pages, sizeof(struct swap_cgroup)); > > + map = vzalloc(max_pages * sizeof(unsigned short)); > > if (!map) > > goto nomem; > > > > @@ -117,7 +110,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages) > > > > void swap_cgroup_swapoff(int type) > > { > > - struct swap_cgroup *map; > > + void *map; > > Why void? > > > struct swap_cgroup_ctrl *ctrl; > > > > if (mem_cgroup_disabled()) > > -- > > 2.47.0 > >
On Tue, Dec 3, 2024 at 3:26 AM Yosry Ahmed <yosryahmed@google.com> wrote: > > On Mon, Dec 2, 2024 at 10:42 AM Kairui Song <ryncsn@gmail.com> wrote: > > > > From: Kairui Song <kasong@tencent.com> > > > > Remove the intermediate struct swap_cgroup, it just a unsigned short > > wrapper, simplify the code. > > Did you actually remove the struct? It doesn't seem like it. Oops, I forgot to drop these lines of code indeed, I'll send V2 merging this into patch 4/4 so this commit can be ignored for now. > > > > > Also zero the map on initialization to prevent unexpected behaviour as > > swap cgroup helpers are suppose to return 0 on error. > > All the callers lookup the id of an already swapped out page, so it > should never be uninitialized. Maybe we should WARN if the result of > the lookup is 0 in this case? Yes, just a fallback for robustness. Lookup returning 0 is expected in some cases, eg. Cgroup V1 will call mem_cgroup_swapin_uncharge_swap early when the folio is added to swap cache, and erase the record. Then the mem_cgroup_uncharge_swap in swapfile.c (called when the swap cache is being dropped and entry is freed) won't double uncharge it. So we can't WARN on that. > > > > > Signed-off-by: Kairui Song <kasong@tencent.com> > > --- > > mm/swap_cgroup.c | 45 +++++++++++++++++++-------------------------- > > 1 file changed, 19 insertions(+), 26 deletions(-) > > > > diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c > > index 1770b076f6b7..a76afdc3666a 100644 > > --- a/mm/swap_cgroup.c > > +++ b/mm/swap_cgroup.c > > @@ -12,14 +12,12 @@ struct swap_cgroup { > > }; > > > > struct swap_cgroup_ctrl { > > - struct swap_cgroup *map; > > + unsigned short *map; > > spinlock_t lock; > > }; > > > > static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; > > > > -#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) > > - > > /* > > * SwapCgroup implements "lookup" and "exchange" operations. > > * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge > > @@ -33,18 +31,6 @@ static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; > > * > > * TODO: we can push these buffers out to HIGHMEM. > > */ > > -static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent, > > - struct swap_cgroup_ctrl **ctrlp) > > -{ > > - pgoff_t offset = swp_offset(ent); > > - struct swap_cgroup_ctrl *ctrl; > > - > > - ctrl = &swap_cgroup_ctrl[swp_type(ent)]; > > - if (ctrlp) > > - *ctrlp = ctrl; > > - return &ctrl->map[offset]; > > -} > > - > > /** > > * swap_cgroup_record - record mem_cgroup for a set of swap entries > > * @ent: the first swap entry to be recorded into > > @@ -58,20 +44,21 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, > > unsigned int nr_ents) > > { > > struct swap_cgroup_ctrl *ctrl; > > - struct swap_cgroup *sc; > > + unsigned short *map; > > unsigned short old; > > unsigned long flags; > > pgoff_t offset = swp_offset(ent); > > pgoff_t end = offset + nr_ents; > > > > - sc = lookup_swap_cgroup(ent, &ctrl); > > + ctrl = &swap_cgroup_ctrl[swp_type(ent)]; > > + map = ctrl->map; > > > > spin_lock_irqsave(&ctrl->lock, flags); > > - old = sc->id; > > - for (; offset < end; offset++, sc++) { > > - VM_BUG_ON(sc->id != old); > > - sc->id = id; > > - } > > + old = map[offset]; > > + do { > > + VM_BUG_ON(map[offset] != old); > > + map[offset] = id; > > + } while (++offset != end); > > Why did you change the for loop here? > > > spin_unlock_irqrestore(&ctrl->lock, flags); > > > > return old; > > @@ -85,20 +72,26 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, > > */ > > unsigned short lookup_swap_cgroup_id(swp_entry_t ent) > > { > > + struct swap_cgroup_ctrl *ctrl; > > + > > if (mem_cgroup_disabled()) > > return 0; > > - return lookup_swap_cgroup(ent, NULL)->id; > > + > > + ctrl = &swap_cgroup_ctrl[swp_type(ent)]; > > + pgoff_t offset = swp_offset(ent); > > + > > + return READ_ONCE(ctrl->map[offset]); > > The READ_ONCE() does not exist today in lookup_swap_cgroup(). Why is it needed? > > > } > > > > int swap_cgroup_swapon(int type, unsigned long max_pages) > > { > > - struct swap_cgroup *map; > > + void *map; > > struct swap_cgroup_ctrl *ctrl; > > > > if (mem_cgroup_disabled()) > > return 0; > > > > - map = vcalloc(max_pages, sizeof(struct swap_cgroup)); > > + map = vzalloc(max_pages * sizeof(unsigned short)); > > if (!map) > > goto nomem; > > > > @@ -117,7 +110,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages) > > > > void swap_cgroup_swapoff(int type) > > { > > - struct swap_cgroup *map; > > + void *map; > > Why void? > > > struct swap_cgroup_ctrl *ctrl; > > > > if (mem_cgroup_disabled()) > > -- > > 2.47.0 > > >
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c index 1770b076f6b7..a76afdc3666a 100644 --- a/mm/swap_cgroup.c +++ b/mm/swap_cgroup.c @@ -12,14 +12,12 @@ struct swap_cgroup { }; struct swap_cgroup_ctrl { - struct swap_cgroup *map; + unsigned short *map; spinlock_t lock; }; static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; -#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) - /* * SwapCgroup implements "lookup" and "exchange" operations. * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge @@ -33,18 +31,6 @@ static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; * * TODO: we can push these buffers out to HIGHMEM. */ -static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent, - struct swap_cgroup_ctrl **ctrlp) -{ - pgoff_t offset = swp_offset(ent); - struct swap_cgroup_ctrl *ctrl; - - ctrl = &swap_cgroup_ctrl[swp_type(ent)]; - if (ctrlp) - *ctrlp = ctrl; - return &ctrl->map[offset]; -} - /** * swap_cgroup_record - record mem_cgroup for a set of swap entries * @ent: the first swap entry to be recorded into @@ -58,20 +44,21 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, unsigned int nr_ents) { struct swap_cgroup_ctrl *ctrl; - struct swap_cgroup *sc; + unsigned short *map; unsigned short old; unsigned long flags; pgoff_t offset = swp_offset(ent); pgoff_t end = offset + nr_ents; - sc = lookup_swap_cgroup(ent, &ctrl); + ctrl = &swap_cgroup_ctrl[swp_type(ent)]; + map = ctrl->map; spin_lock_irqsave(&ctrl->lock, flags); - old = sc->id; - for (; offset < end; offset++, sc++) { - VM_BUG_ON(sc->id != old); - sc->id = id; - } + old = map[offset]; + do { + VM_BUG_ON(map[offset] != old); + map[offset] = id; + } while (++offset != end); spin_unlock_irqrestore(&ctrl->lock, flags); return old; @@ -85,20 +72,26 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id, */ unsigned short lookup_swap_cgroup_id(swp_entry_t ent) { + struct swap_cgroup_ctrl *ctrl; + if (mem_cgroup_disabled()) return 0; - return lookup_swap_cgroup(ent, NULL)->id; + + ctrl = &swap_cgroup_ctrl[swp_type(ent)]; + pgoff_t offset = swp_offset(ent); + + return READ_ONCE(ctrl->map[offset]); } int swap_cgroup_swapon(int type, unsigned long max_pages) { - struct swap_cgroup *map; + void *map; struct swap_cgroup_ctrl *ctrl; if (mem_cgroup_disabled()) return 0; - map = vcalloc(max_pages, sizeof(struct swap_cgroup)); + map = vzalloc(max_pages * sizeof(unsigned short)); if (!map) goto nomem; @@ -117,7 +110,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages) void swap_cgroup_swapoff(int type) { - struct swap_cgroup *map; + void *map; struct swap_cgroup_ctrl *ctrl; if (mem_cgroup_disabled())