diff mbox series

[mm-unstable,v2,4/5] mm: restart if multiple traversals raced

Message ID 20240813204716.842811-5-kinseyho@google.com (mailing list archive)
State New
Headers show
Series Improve mem_cgroup_iter() | expand

Commit Message

Kinsey Ho Aug. 13, 2024, 8:47 p.m. UTC
Currently, if multiple reclaimers raced on the same position, the
reclaimers which detect the race will still reclaim from the same memcg.
Instead, the reclaimers which detect the race should move on to the next
memcg in the hierarchy.

So, in the case where multiple traversals race, jump back to the start
of the mem_cgroup_iter() function to find the next memcg in the
hierarchy to reclaim from.

Signed-off-by: Kinsey Ho <kinseyho@google.com>
---
 include/linux/memcontrol.h |  4 ++--
 mm/memcontrol.c            | 22 ++++++++++++++--------
 2 files changed, 16 insertions(+), 10 deletions(-)

Comments

Michal Koutný Aug. 14, 2024, 9 a.m. UTC | #1
On Tue, Aug 13, 2024 at 08:47:14PM GMT, Kinsey Ho <kinseyho@google.com> wrote:
> @@ -1072,21 +1073,26 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
>  		 * and kicking, and don't take an extra reference.
>  		 */
>  		if (css == &root->css || css_tryget(css)) {
> -			memcg = mem_cgroup_from_css(css);
>  			break;
>  		}
>  	}
>  
> +	memcg = mem_cgroup_from_css(css);
> +
>  	if (reclaim) {
>  		/*
>  		 * The position could have already been updated by a competing
>  		 * thread, so check that the value hasn't changed since we read
>  		 * it to avoid reclaiming from the same cgroup twice.
>  		 */
> -		(void)cmpxchg(&iter->position, pos, memcg);
> +		if (cmpxchg(&iter->position, pos, memcg) != pos) {
> +			if (css && css != &root->css)
> +				css_put(css);
> +			goto restart;
> +		}

I may be missing (literal) context but I'd suggest not moving the memcg
assignment and leverage
	if (memcg != NULL)
		css_put(memcg->css)
so that the is-root comparison needn't be repeated.

Thanks,
Michal
Kinsey Ho Aug. 16, 2024, 4:27 p.m. UTC | #2
Hi Michal,

> I may be missing (literal) context but I'd suggest not moving the memcg
> assignment and leverage
>         if (memcg != NULL)
>                 css_put(memcg->css)
> so that the is-root comparison needn't be repeated.

I might also be misunderstanding you with respect to the is-root
comparison – the reason the memcg assignment is moved is because it is
possible that on the restart added in this patch, css could be NULL.
In that case, memcg won't be assigned and could be left with a
previous, invalid value. By moving the assignment out, it ensures that
memcg is a valid value.

Best,
Kinsey
Michal Koutný Aug. 20, 2024, 11:59 a.m. UTC | #3
On Fri, Aug 16, 2024 at 12:27:27PM GMT, Kinsey Ho <kinseyho@google.com> wrote:
> Hi Michal,
> 
> > I may be missing (literal) context but I'd suggest not moving the memcg
> > assignment and leverage
> >         if (memcg != NULL)
> >                 css_put(memcg->css)
> > so that the is-root comparison needn't be repeated.
> 
> I might also be misunderstanding you with respect to the is-root
> comparison – the reason the memcg assignment is moved is because it is
> possible that on the restart added in this patch, css could be NULL.

I've played with this applied up to 4/5 and I see more changes would be
needed to preserve the function. Please disregard my initial suggestion
':-)

Michal
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1aaed2f1f6ae..aada9ef3ca44 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -57,7 +57,7 @@  enum memcg_memory_event {
 
 struct mem_cgroup_reclaim_cookie {
 	pg_data_t *pgdat;
-	unsigned int generation;
+	int generation;
 };
 
 #ifdef CONFIG_MEMCG
@@ -77,7 +77,7 @@  struct lruvec_stats;
 struct mem_cgroup_reclaim_iter {
 	struct mem_cgroup __rcu *position;
 	/* scan generation, increased every round-trip */
-	unsigned int generation;
+	atomic_t generation;
 };
 
 /*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 937b7efc41ca..84de46ece9a9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1025,7 +1025,7 @@  struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
 				   struct mem_cgroup_reclaim_cookie *reclaim)
 {
 	struct mem_cgroup_reclaim_iter *iter;
-	struct cgroup_subsys_state *css = NULL;
+	struct cgroup_subsys_state *css;
 	struct mem_cgroup *memcg = NULL;
 	struct mem_cgroup *pos = NULL;
 
@@ -1038,18 +1038,20 @@  struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
 	rcu_read_lock();
 restart:
 	if (reclaim) {
+		int gen;
 		struct mem_cgroup_per_node *mz;
 
 		mz = root->nodeinfo[reclaim->pgdat->node_id];
 		iter = &mz->iter;
+		gen = atomic_read(&iter->generation);
 
 		/*
 		 * On start, join the current reclaim iteration cycle.
 		 * Exit when a concurrent walker completes it.
 		 */
 		if (!prev)
-			reclaim->generation = iter->generation;
-		else if (reclaim->generation != iter->generation)
+			reclaim->generation = gen;
+		else if (reclaim->generation != gen)
 			goto out_unlock;
 
 		pos = rcu_dereference(iter->position);
@@ -1057,8 +1059,7 @@  struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
 		pos = prev;
 	}
 
-	if (pos)
-		css = &pos->css;
+	css = pos ? &pos->css : NULL;
 
 	for (;;) {
 		css = css_next_descendant_pre(css, &root->css);
@@ -1072,21 +1073,26 @@  struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
 		 * and kicking, and don't take an extra reference.
 		 */
 		if (css == &root->css || css_tryget(css)) {
-			memcg = mem_cgroup_from_css(css);
 			break;
 		}
 	}
 
+	memcg = mem_cgroup_from_css(css);
+
 	if (reclaim) {
 		/*
 		 * The position could have already been updated by a competing
 		 * thread, so check that the value hasn't changed since we read
 		 * it to avoid reclaiming from the same cgroup twice.
 		 */
-		(void)cmpxchg(&iter->position, pos, memcg);
+		if (cmpxchg(&iter->position, pos, memcg) != pos) {
+			if (css && css != &root->css)
+				css_put(css);
+			goto restart;
+		}
 
 		if (!memcg) {
-			iter->generation++;
+			atomic_inc(&iter->generation);
 
 			/*
 			 * Reclaimers share the hierarchy walk, and a