diff mbox series

[6/7] mm/list_lru: split the lock to per-cgroup scope

Message ID 20240624175313.47329-7-ryncsn@gmail.com (mailing list archive)
State New
Headers show
Series Split list_lru lock into per-cgroup scope | expand

Commit Message

Kairui Song June 24, 2024, 5:53 p.m. UTC
From: Kairui Song <kasong@tencent.com>

Currently, every list_lru has a per-node lock that protects adding,
deletion, isolation, and reparenting of all list_lru_one instances
belonging to this list_lru on this node. This lock contention is
heavy when multiple cgroups modify the same list_lru.

This lock can be split into per-cgroup scope to reduce contention.

To achieve this, we need a stable list_lru_one for every cgroup.
This commit adds a lock to each list_lru_one and introduced a
helper function lock_list_lru_of_memcg, making it possible to pin
the list_lru of a memcg. Then reworked the reparenting process.

Reparenting will switch the list_lru_one instances one by one.
By locking each instance and marking it dead using the nr_items
counter, reparenting ensures that all items in the corresponding
cgroup (on-list or not, because items have a stable cgroup, see below)
will see the list_lru_one switch synchronously.

Objcg reparent is also moved after list_lru reparent so items will have
a stable mem cgroup until all list_lru_one instances are drained.

The only caller that doesn't work the *_obj interfaces are direct
calls to list_lru_{add,del}. But it's only used by zswap and
that's also based on objcg, so it's fine.

This also changes the bahaviour of the isolation function when
LRU_RETRY or LRU_REMOVED_RETRY is returned, because now releasing the
lock could unblock reparenting and free the list_lru_one, isolation
function will have to return withoug re-lock the lru.

I see a 25% performance gain for SWAP after this change:

    modprobe zram
    echo 128G > /sys/block/zram0/disksize
    mkswap /dev/zram0; swapon /dev/zram0
    for i in $(seq 1 64); do
        mkdir /sys/fs/cgroup/benchmark-$i
        echo 64M > /sys/fs/cgroup/benchmark-$i/memory.max
    done
    do_test() {
        for i in $(seq 1 64); do
            cd /sys/fs/cgroup/benchmark-$i
            (exec sh -c 'echo "$PPID"') > cgroup.procs
            memhog 1G &
        done; wait
    }
    time do_test

Before:
real    0m20.328s user    0m4.315s sys     10m23.639s
real    0m20.440s user    0m4.142s sys     10m34.756s
real    0m20.381s user    0m4.164s sys     10m29.035s

After:
real    0m15.156s user    0m4.590s sys     7m34.361s
real    0m15.161s user    0m4.776s sys     7m35.086s
real    0m15.429s user    0m4.734s sys     7m42.919s

Similar performance gain with inode / dentry workload:

    prepare() {
        mkdir /tmp/test-fs
        modprobe brd rd_nr=1 rd_size=16777216
        mkfs.xfs -f /dev/ram0
        mount -t xfs /dev/ram0 /tmp/test-fs
        for i in $(seq 1 256); do
            mkdir "/tmp/test-fs/$i"
            for j in $(seq 1 10000); do
                echo TEST-CONTENT > "/tmp/test-fs/$i/$j"
            done
        done
    }

    do_test() {
        read_worker() {
            for j in $(seq 1 10000); do
                read -r __TMP < "/tmp/test-fs/$1/$j"
            done
        }
        read_in_all() {
            for i in $(seq 1 256); do
                (exec sh -c 'echo "$PPID"') > "/sys/fs/cgroup/benchmark/$i/cgroup.procs"
                read_worker "$i" &
            done; wait
        }
        echo 3 > /proc/sys/vm/drop_caches
        mkdir -p /sys/fs/cgroup/benchmark
        echo +memory > /sys/fs/cgroup/benchmark/cgroup.subtree_control
        echo 512M > /sys/fs/cgroup/benchmark/memory.max
        for i in $(seq 1 256); do
            rmdir "/sys/fs/cgroup/benchmark/$i" &>/dev/null
            mkdir -p "/sys/fs/cgroup/benchmark/$i"
        done
    }

Before this series:
real    0m26.939s user    0m36.322s sys     6m30.248s
real    0m15.111s user    0m33.749s sys     5m4.991s
real    0m16.796s user    0m33.438s sys     5m22.865s
real    0m15.256s user    0m34.060s sys     4m56.870s
real    0m14.826s user    0m33.531s sys     4m55.907s
real    0m15.664s user    0m35.619s sys     6m3.638s
real    0m15.746s user    0m34.066s sys     4m56.519s

After this commit (>10%):
real    0m22.166s user    0m35.155s sys     6m21.045s
real    0m13.753s user    0m34.554s sys     4m40.982s
real    0m13.815s user    0m34.693s sys     4m39.605s
real    0m13.495s user    0m34.372s sys     4m40.776s
real    0m13.895s user    0m34.005s sys     4m39.061s
real    0m13.629s user    0m33.476s sys     4m43.626s
real    0m14.001s user    0m33.463s sys     4m41.261s

Signed-off-by: Kairui Song <kasong@tencent.com>
---
 drivers/android/binder_alloc.c |   1 -
 fs/inode.c                     |   1 -
 fs/xfs/xfs_qm.c                |   1 -
 include/linux/list_lru.h       |   6 +-
 mm/list_lru.c                  | 224 +++++++++++++++++++--------------
 mm/memcontrol.c                |   7 +-
 mm/workingset.c                |   1 -
 mm/zswap.c                     |   5 +-
 8 files changed, 141 insertions(+), 105 deletions(-)
diff mbox series

Patch

diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 2e1f261ec5c8..dd47d621e561 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -1106,7 +1106,6 @@  enum lru_status binder_alloc_free_page(struct list_head *item,
 	mmput_async(mm);
 	__free_page(page_to_free);
 
-	spin_lock(lock);
 	return LRU_REMOVED_RETRY;
 
 err_invalid_vma:
diff --git a/fs/inode.c b/fs/inode.c
index 3a41f83a4ba5..35da4e54e365 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -856,7 +856,6 @@  static enum lru_status inode_lru_isolate(struct list_head *item,
 			mm_account_reclaimed_pages(reap);
 		}
 		iput(inode);
-		spin_lock(lru_lock);
 		return LRU_RETRY;
 	}
 
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 47120b745c47..8d17099765ae 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -496,7 +496,6 @@  xfs_qm_dquot_isolate(
 	trace_xfs_dqreclaim_busy(dqp);
 	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
 	xfs_dqunlock(dqp);
-	spin_lock(lru_lock);
 	return LRU_RETRY;
 }
 
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 2e5132905f42..b84483ef93a7 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -32,6 +32,8 @@  struct list_lru_one {
 	struct list_head	list;
 	/* may become negative during memcg reparenting */
 	long			nr_items;
+	/* protects all fields above */
+	spinlock_t		lock;
 };
 
 struct list_lru_memcg {
@@ -41,11 +43,9 @@  struct list_lru_memcg {
 };
 
 struct list_lru_node {
-	/* protects all lists on the node, including per cgroup */
-	spinlock_t		lock;
 	/* global list, used for the root cgroup in cgroup aware lrus */
 	struct list_lru_one	lru;
-	long			nr_items;
+	atomic_long_t		nr_items;
 } ____cacheline_aligned_in_smp;
 
 struct list_lru {
diff --git a/mm/list_lru.c b/mm/list_lru.c
index ac8aec8451dd..c503921cbb13 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -61,18 +61,48 @@  list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
 }
 
 static inline struct list_lru_one *
-list_lru_from_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg)
+lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
+		       bool irq, bool skip_empty)
 {
 	struct list_lru_one *l;
+	rcu_read_lock();
 again:
 	l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
-	if (likely(l))
-		return l;
-
-	memcg = parent_mem_cgroup(memcg);
+	if (likely(l)) {
+		if (irq)
+			spin_lock_irq(&l->lock);
+		else
+			spin_lock(&l->lock);
+		if (likely(READ_ONCE(l->nr_items) != LONG_MIN)) {
+			WARN_ON(l->nr_items < 0);
+			rcu_read_unlock();
+			return l;
+		}
+		if (irq)
+			spin_unlock_irq(&l->lock);
+		else
+			spin_unlock(&l->lock);
+	}
+	/*
+	 * Caller may simply bail out if raced with reparenting or
+	 * may iterate through the list_lru and expect empty slots.
+	 */
+	if (skip_empty) {
+		rcu_read_unlock();
+		return NULL;
+	}
 	WARN_ON(!css_is_dying(&memcg->css));
+	memcg = parent_mem_cgroup(memcg);
 	goto again;
 }
+
+static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
+{
+	if (irq_off)
+		spin_unlock_irq(&l->lock);
+	else
+		spin_unlock(&l->lock);
+}
 #else
 static void list_lru_register(struct list_lru *lru)
 {
@@ -99,30 +129,47 @@  list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
 }
 
 static inline struct list_lru_one *
-list_lru_from_memcg(struct list_lru *lru, int nid, int idx)
+lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
+		       bool irq, bool skip_empty)
 {
-	return &lru->node[nid].lru;
+	struct list_lru_one *l = &lru->node[nid].lru;
+
+	if (irq)
+		spin_lock_irq(&l->lock);
+	else
+		spin_lock(&l->lock);
+
+	return l;
+}
+
+static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
+{
+	if (irq_off)
+		spin_unlock_irq(&l->lock);
+	else
+		spin_unlock(&l->lock);
 }
 #endif /* CONFIG_MEMCG_KMEM */
 
 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
-		    struct mem_cgroup *memcg)
+		  struct mem_cgroup *memcg)
 {
 	struct list_lru_node *nlru = &lru->node[nid];
 	struct list_lru_one *l;
 
-	spin_lock(&nlru->lock);
+	l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
+	if (!l)
+		return false;
 	if (list_empty(item)) {
-		l = list_lru_from_memcg(lru, nid, memcg);
 		list_add_tail(item, &l->list);
 		/* Set shrinker bit if the first element was added */
 		if (!l->nr_items++)
 			set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
-		nlru->nr_items++;
-		spin_unlock(&nlru->lock);
+		unlock_list_lru(l, false);
+		atomic_long_inc(&nlru->nr_items);
 		return true;
 	}
-	spin_unlock(&nlru->lock);
+	unlock_list_lru(l, false);
 	return false;
 }
 
@@ -137,24 +184,23 @@  bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)
 EXPORT_SYMBOL_GPL(list_lru_add_obj);
 
 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
-		    struct mem_cgroup *memcg)
+		  struct mem_cgroup *memcg)
 {
 	struct list_lru_node *nlru = &lru->node[nid];
 	struct list_lru_one *l;
-
-	spin_lock(&nlru->lock);
+	l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
+	if (!l)
+		return false;
 	if (!list_empty(item)) {
-		l = list_lru_from_memcg(lru, nid, memcg);
 		list_del_init(item);
 		l->nr_items--;
-		nlru->nr_items--;
-		spin_unlock(&nlru->lock);
+		unlock_list_lru(l, false);
+		atomic_long_dec(&nlru->nr_items);
 		return true;
 	}
-	spin_unlock(&nlru->lock);
+	unlock_list_lru(l, false);
 	return false;
 }
-EXPORT_SYMBOL_GPL(list_lru_del);
 
 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
 {
@@ -204,25 +250,24 @@  unsigned long list_lru_count_node(struct list_lru *lru, int nid)
 	struct list_lru_node *nlru;
 
 	nlru = &lru->node[nid];
-	return nlru->nr_items;
+	return atomic_long_read(&nlru->nr_items);
 }
 EXPORT_SYMBOL_GPL(list_lru_count_node);
 
 static unsigned long
-__list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
+__list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 		    list_lru_walk_cb isolate, void *cb_arg,
-		    unsigned long *nr_to_walk)
+		    unsigned long *nr_to_walk, bool irq_off)
 {
 	struct list_lru_node *nlru = &lru->node[nid];
-	struct list_lru_one *l;
+	struct list_lru_one *l = NULL;
 	struct list_head *item, *n;
 	unsigned long isolated = 0;
 
 restart:
-	l = list_lru_from_memcg_idx(lru, nid, memcg_idx);
+	l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true);
 	if (!l)
-		goto out;
-
+		return isolated;
 	list_for_each_safe(item, n, &l->list) {
 		enum lru_status ret;
 
@@ -234,19 +279,20 @@  __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
 			break;
 		--*nr_to_walk;
 
-		ret = isolate(item, l, &nlru->lock, cb_arg);
+		ret = isolate(item, l, &l->lock, cb_arg);
 		switch (ret) {
+		/*
+		 * LRU_RETRY and LRU_REMOVED_RETRY will drop the lru lock,
+		 * the list traversal will be invalid and have to restart from
+		 * scratch.
+		 */
+		case LRU_RETRY:
+			goto restart;
 		case LRU_REMOVED_RETRY:
-			assert_spin_locked(&nlru->lock);
 			fallthrough;
 		case LRU_REMOVED:
 			isolated++;
-			nlru->nr_items--;
-			/*
-			 * If the lru lock has been dropped, our list
-			 * traversal is now invalid and so we have to
-			 * restart from scratch.
-			 */
+			atomic_long_dec(&nlru->nr_items);
 			if (ret == LRU_REMOVED_RETRY)
 				goto restart;
 			break;
@@ -255,21 +301,15 @@  __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
 			break;
 		case LRU_SKIP:
 			break;
-		case LRU_RETRY:
-			/*
-			 * The lru lock has been dropped, our list traversal is
-			 * now invalid and so we have to restart from scratch.
-			 */
-			assert_spin_locked(&nlru->lock);
-			goto restart;
 		case LRU_STOP:
-			assert_spin_locked(&nlru->lock);
+			assert_spin_locked(&l->lock);
 			goto out;
 		default:
 			BUG();
 		}
 	}
 out:
+	unlock_list_lru(l, irq_off);
 	return isolated;
 }
 
@@ -278,14 +318,8 @@  list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 		  list_lru_walk_cb isolate, void *cb_arg,
 		  unsigned long *nr_to_walk)
 {
-	struct list_lru_node *nlru = &lru->node[nid];
-	unsigned long ret;
-
-	spin_lock(&nlru->lock);
-	ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
-				  cb_arg, nr_to_walk);
-	spin_unlock(&nlru->lock);
-	return ret;
+	return __list_lru_walk_one(lru, nid, memcg, isolate,
+				   cb_arg, nr_to_walk, false);
 }
 EXPORT_SYMBOL_GPL(list_lru_walk_one);
 
@@ -294,14 +328,8 @@  list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 		      list_lru_walk_cb isolate, void *cb_arg,
 		      unsigned long *nr_to_walk)
 {
-	struct list_lru_node *nlru = &lru->node[nid];
-	unsigned long ret;
-
-	spin_lock_irq(&nlru->lock);
-	ret = __list_lru_walk_one(lru, nid, memcg_kmem_id(memcg), isolate,
-				  cb_arg, nr_to_walk);
-	spin_unlock_irq(&nlru->lock);
-	return ret;
+	return __list_lru_walk_one(lru, nid, memcg, isolate,
+				   cb_arg, nr_to_walk, true);
 }
 
 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
@@ -316,16 +344,21 @@  unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
 #ifdef CONFIG_MEMCG_KMEM
 	if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
 		struct list_lru_memcg *mlru;
+		struct mem_cgroup *memcg;
 		unsigned long index;
 
 		xa_for_each(&lru->xa, index, mlru) {
-			struct list_lru_node *nlru = &lru->node[nid];
-
-			spin_lock(&nlru->lock);
-			isolated += __list_lru_walk_one(lru, nid, index,
+			rcu_read_lock();
+			memcg = mem_cgroup_from_id(index);
+			if (!mem_cgroup_tryget(memcg)) {
+				rcu_read_unlock();
+				continue;
+			}
+			rcu_read_unlock();
+			isolated += __list_lru_walk_one(lru, nid, memcg,
 							isolate, cb_arg,
-							nr_to_walk);
-			spin_unlock(&nlru->lock);
+							nr_to_walk, false);
+			mem_cgroup_put(memcg);
 
 			if (*nr_to_walk <= 0)
 				break;
@@ -337,14 +370,19 @@  unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
 }
 EXPORT_SYMBOL_GPL(list_lru_walk_node);
 
-static void init_one_lru(struct list_lru_one *l)
+static void init_one_lru(struct list_lru *lru, struct list_lru_one *l)
 {
 	INIT_LIST_HEAD(&l->list);
+	spin_lock_init(&l->lock);
 	l->nr_items = 0;
+#ifdef CONFIG_LOCKDEP
+	if (lru->key)
+		lockdep_set_class(&l->lock, lru->key);
+#endif
 }
 
 #ifdef CONFIG_MEMCG_KMEM
-static struct list_lru_memcg *memcg_init_list_lru_one(gfp_t gfp)
+static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp)
 {
 	int nid;
 	struct list_lru_memcg *mlru;
@@ -354,7 +392,7 @@  static struct list_lru_memcg *memcg_init_list_lru_one(gfp_t gfp)
 		return NULL;
 
 	for_each_node(nid)
-		init_one_lru(&mlru->node[nid]);
+		init_one_lru(lru, &mlru->node[nid]);
 
 	return mlru;
 }
@@ -382,28 +420,27 @@  static void memcg_destroy_list_lru(struct list_lru *lru)
 	xas_unlock_irq(&xas);
 }
 
-static void memcg_reparent_list_lru_node(struct list_lru *lru, int nid,
-					 struct list_lru_one *src,
-					 struct mem_cgroup *dst_memcg)
+static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid,
+					struct list_lru_one *src,
+					struct mem_cgroup *dst_memcg)
 {
-	struct list_lru_node *nlru = &lru->node[nid];
+	int dst_idx = dst_memcg->kmemcg_id;
 	struct list_lru_one *dst;
 
-	/*
-	 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
-	 * we have to use IRQ-safe primitives here to avoid deadlock.
-	 */
-	spin_lock_irq(&nlru->lock);
-	dst = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(dst_memcg));
+	spin_lock_irq(&src->lock);
+	dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
+	spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING);
 
 	list_splice_init(&src->list, &dst->list);
-
 	if (src->nr_items) {
 		dst->nr_items += src->nr_items;
 		set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
-		src->nr_items = 0;
 	}
-	spin_unlock_irq(&nlru->lock);
+	/* Mark the list_lru_one dead */
+	src->nr_items = LONG_MIN;
+
+	spin_unlock(&dst->lock);
+	spin_unlock_irq(&src->lock);
 }
 
 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
@@ -422,8 +459,6 @@  void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren
 		 */
 		xas_lock_irq(&xas);
 		mlru = xas_load(&xas);
-		if (mlru)
-			xas_store(&xas, NULL);
 		xas_unlock_irq(&xas);
 		if (!mlru)
 			continue;
@@ -434,13 +469,20 @@  void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *paren
 		 * safe to reparent.
 		 */
 		for_each_node(i)
-			memcg_reparent_list_lru_node(lru, i, &mlru->node[i], parent);
+			memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent);
 
 		/*
 		 * Here all list_lrus corresponding to the cgroup are guaranteed
 		 * to remain empty, we can safely free this lru, any further
 		 * memcg_list_lru_alloc() call will simply bail out.
+		 *
+		 * To ensure callers see a stable list_lru_one, have to set it
+		 * to NULL after memcg_reparent_list_lru_one().
 		 */
+		xas_lock_irq(&xas);
+		xas_reset(&xas);
+		xas_store(&xas, NULL);
+		xas_unlock_irq(&xas);
 		kvfree_rcu(mlru, rcu);
 	}
 	mutex_unlock(&list_lrus_mutex);
@@ -483,7 +525,7 @@  int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
 			parent = parent_mem_cgroup(pos);
 		}
 
-		mlru = memcg_init_list_lru_one(gfp);
+		mlru = memcg_init_list_lru_one(lru, gfp);
 		do {
 			bool alloced = false;
 
@@ -532,14 +574,8 @@  int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shr
 	if (!lru->node)
 		return -ENOMEM;
 
-	for_each_node(i) {
-		spin_lock_init(&lru->node[i].lock);
-#ifdef CONFIG_LOCKDEP
-		if (lru->key)
-			lockdep_set_class(&lru->node[i].lock, lru->key);
-#endif
-		init_one_lru(&lru->node[i].lru);
-	}
+	for_each_node(i)
+		init_one_lru(lru, &lru->node[i].lru);
 
 	memcg_init_list_lru(lru, memcg_aware);
 	list_lru_register(lru);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fc35c1d3f109..945290a53bf1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4186,8 +4186,13 @@  static void memcg_offline_kmem(struct mem_cgroup *memcg)
 	if (!parent)
 		parent = root_mem_cgroup;
 
-	memcg_reparent_objcgs(memcg, parent);
 	memcg_reparent_list_lrus(memcg, parent);
+
+	/*
+	 * Objcg's reparenting must be after list_lru's, make sure list_lru
+	 * helpers won't use parent's list_lru until child is drained.
+	 */
+	memcg_reparent_objcgs(memcg, parent);
 }
 #else
 static int memcg_online_kmem(struct mem_cgroup *memcg)
diff --git a/mm/workingset.c b/mm/workingset.c
index 1801fbe5183c..947423c3e719 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -769,7 +769,6 @@  static enum lru_status shadow_lru_isolate(struct list_head *item,
 	ret = LRU_REMOVED_RETRY;
 out:
 	cond_resched();
-	spin_lock_irq(lru_lock);
 	return ret;
 }
 
diff --git a/mm/zswap.c b/mm/zswap.c
index c6e2256347ff..f7a2afaeea53 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -720,9 +720,9 @@  static void zswap_lru_add(struct list_lru *list_lru, struct zswap_entry *entry)
 	 * Note that it is safe to use rcu_read_lock() here, even in the face of
 	 * concurrent memcg offlining:
 	 *
-	 * 1. list_lru_add() is called before list_lru_memcg is erased. The
+	 * 1. list_lru_add() is called before list_lru_one is dead. The
 	 *    new entry will be reparented to memcg's parent's list_lru.
-	 * 2. list_lru_add() is called after list_lru_memcg is erased. The
+	 * 2. list_lru_add() is called after list_lru_one is dead. The
 	 *    new entry will be added directly to memcg's parent's list_lru.
 	 *
 	 * Similar reasoning holds for list_lru_del().
@@ -1164,7 +1164,6 @@  static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o
 		zswap_written_back_pages++;
 	}
 
-	spin_lock(lock);
 	return ret;
 }