diff mbox

[3/3] mm: list_lru: Add lock_irq member to __list_lru_init()

Message ID 20180622151221.28167-4-bigeasy@linutronix.de (mailing list archive)
State New, archived
Headers show

Commit Message

Sebastian Andrzej Siewior June 22, 2018, 3:12 p.m. UTC
scan_shadow_nodes() is the only user of __list_lru_walk_one() which
disables interrupts before invoking it. The reason is that nlru->lock is
nesting inside IRQ-safe i_pages lock. Some functions unconditionally
acquire the lock with the _irq() suffix.

__list_lru_walk_one() can't acquire the lock unconditionally with _irq()
suffix because it might invoke a callback which unlocks the nlru->lock
and invokes a sleeping function without enabling interrupts.

Add an argument to __list_lru_init() which identifies wheather the
nlru->lock needs to be acquired with disabling interrupts or without.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 include/linux/list_lru.h | 12 ++++++++----
 mm/list_lru.c            | 14 ++++++++++----
 mm/workingset.c          | 12 ++++--------
 3 files changed, 22 insertions(+), 16 deletions(-)

Comments

Vladimir Davydov June 24, 2018, 8:09 p.m. UTC | #1
On Fri, Jun 22, 2018 at 05:12:21PM +0200, Sebastian Andrzej Siewior wrote:
> scan_shadow_nodes() is the only user of __list_lru_walk_one() which
> disables interrupts before invoking it. The reason is that nlru->lock is
> nesting inside IRQ-safe i_pages lock. Some functions unconditionally
> acquire the lock with the _irq() suffix.
> 
> __list_lru_walk_one() can't acquire the lock unconditionally with _irq()
> suffix because it might invoke a callback which unlocks the nlru->lock
> and invokes a sleeping function without enabling interrupts.
> 
> Add an argument to __list_lru_init() which identifies wheather the
> nlru->lock needs to be acquired with disabling interrupts or without.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  include/linux/list_lru.h | 12 ++++++++----
>  mm/list_lru.c            | 14 ++++++++++----
>  mm/workingset.c          | 12 ++++--------
>  3 files changed, 22 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
> index 96def9d15b1b..c2161c3a1809 100644
> --- a/include/linux/list_lru.h
> +++ b/include/linux/list_lru.h
> @@ -51,18 +51,22 @@ struct list_lru_node {
>  
>  struct list_lru {
>  	struct list_lru_node	*node;
> +	bool			lock_irq;
>  #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
>  	struct list_head	list;
>  #endif
>  };

TBO I don't like this patch, because the new member of struct list_lru,
lock_irq, has rather obscure meaning IMHO: it makes list_lru_walk
disable irq before taking lru_lock, but at the same time list_lru_add
and list_lru_del never do that, no matter whether lock_irq is true or
false. That is, if a user of struct list_lru sets this flag, he's
supposed to disable irq for list_lru_add/del by himself (mm/workingset
does that). IMHO the code of mm/workingset is clear as it is. Since it
is the only place where this flag is used, I'd rather leave it as is.

>  
>  void list_lru_destroy(struct list_lru *lru);
> -int __list_lru_init(struct list_lru *lru, bool memcg_aware,
> +int __list_lru_init(struct list_lru *lru, bool memcg_aware, bool lock_irq,
>  		    struct lock_class_key *key);
>  
> -#define list_lru_init(lru)		__list_lru_init((lru), false, NULL)
> -#define list_lru_init_key(lru, key)	__list_lru_init((lru), false, (key))
> -#define list_lru_init_memcg(lru)	__list_lru_init((lru), true, NULL)
> +#define list_lru_init(lru)		__list_lru_init((lru), false, false, \
> +							NULL)
> +#define list_lru_init_key(lru, key)	__list_lru_init((lru), false, false, \
> +							(key))
> +#define list_lru_init_memcg(lru)	__list_lru_init((lru), true, false, \
> +							NULL)
>  
>  int memcg_update_all_list_lrus(int num_memcgs);
>  void memcg_drain_all_list_lrus(int src_idx, int dst_idx);
> diff --git a/mm/list_lru.c b/mm/list_lru.c
> index fcfb6c89ed47..1c49d48078e4 100644
> --- a/mm/list_lru.c
> +++ b/mm/list_lru.c
> @@ -204,7 +204,10 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
>  	struct list_head *item, *n;
>  	unsigned long isolated = 0;
>  
> -	spin_lock(&nlru->lock);
> +	if (lru->lock_irq)
> +		spin_lock_irq(&nlru->lock);
> +	else
> +		spin_lock(&nlru->lock);
>  	l = list_lru_from_memcg_idx(nlru, memcg_idx);
>  restart:
>  	list_for_each_safe(item, n, &l->list) {
> @@ -251,7 +254,10 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
>  		}
>  	}
>  
> -	spin_unlock(&nlru->lock);
> +	if (lru->lock_irq)
> +		spin_unlock_irq(&nlru->lock);
> +	else
> +		spin_unlock(&nlru->lock);
>  	return isolated;
>  }
>  
> @@ -553,7 +559,7 @@ static void memcg_destroy_list_lru(struct list_lru *lru)
>  }
>  #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
>  
> -int __list_lru_init(struct list_lru *lru, bool memcg_aware,
> +int __list_lru_init(struct list_lru *lru, bool memcg_aware, bool lock_irq,
>  		    struct lock_class_key *key)
>  {
>  	int i;
> @@ -580,7 +586,7 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
>  		lru->node = NULL;
>  		goto out;
>  	}
> -
> +	lru->lock_irq = lock_irq;
>  	list_lru_register(lru);
>  out:
>  	memcg_put_cache_ids();
> diff --git a/mm/workingset.c b/mm/workingset.c
> index 529480c21f93..23ce00f48212 100644
> --- a/mm/workingset.c
> +++ b/mm/workingset.c
> @@ -480,13 +480,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
>  static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
>  				       struct shrink_control *sc)
>  {
> -	unsigned long ret;
> -
> -	/* list_lru lock nests inside the IRQ-safe i_pages lock */
> -	local_irq_disable();
> -	ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);
> -	local_irq_enable();
> -	return ret;
> +	return list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate,
> +				    NULL);
>  }
>  
>  static struct shrinker workingset_shadow_shrinker = {
> @@ -523,7 +518,8 @@ static int __init workingset_init(void)
>  	pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
>  	       timestamp_bits, max_order, bucket_order);
>  
> -	ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key);
> +	/* list_lru lock nests inside the IRQ-safe i_pages lock */
> +	ret = __list_lru_init(&shadow_nodes, true, true, &shadow_nodes_key);
>  	if (ret)
>  		goto err;
>  	ret = register_shrinker(&workingset_shadow_shrinker);
Sebastian Andrzej Siewior July 3, 2018, 2:52 p.m. UTC | #2
My intepretation of situtation is that Vladimir Davydon is fine patch #1
and #2 of the series [0] but dislikes the irq argument and struct
member. It has been suggested to use list_lru_shrink_walk_irq() instead
the approach I went on in "mm: list_lru: Add lock_irq member to
__list_lru_init()".

This series is based on the former two patches and introduces
list_lru_shrink_walk_irq() (and makes the third patch of series
obsolete).
In patch 1-3 I tried a tiny cleanup so the different locking
(spin_lock() vs spin_lock_irq()) is simply lifted to the caller of the
function.

[0] The patch
      mm: workingset: remove local_irq_disable() from count_shadow_nodes() 
   and
      mm: workingset: make shadow_lru_isolate() use locking suffix

Sebastian
Andrew Morton July 3, 2018, 9:14 p.m. UTC | #3
> Reply-To: "[PATCH 0/4] mm/list_lru": add.list_lru_shrink_walk_irq@mail.linuxfoundation.org.and.use.it ()

Well that's messed up.

On Tue,  3 Jul 2018 16:52:31 +0200 Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:

> My intepretation of situtation is that Vladimir Davydon is fine patch #1
> and #2 of the series [0] but dislikes the irq argument and struct
> member. It has been suggested to use list_lru_shrink_walk_irq() instead
> the approach I went on in "mm: list_lru: Add lock_irq member to
> __list_lru_init()".
> 
> This series is based on the former two patches and introduces
> list_lru_shrink_walk_irq() (and makes the third patch of series
> obsolete).
> In patch 1-3 I tried a tiny cleanup so the different locking
> (spin_lock() vs spin_lock_irq()) is simply lifted to the caller of the
> function.
> 
> [0] The patch
>       mm: workingset: remove local_irq_disable() from count_shadow_nodes() 
>    and
>       mm: workingset: make shadow_lru_isolate() use locking suffix
> 

This isn't a very informative [0/n] changelog.  Some overall summary of
the patchset's objective, behaviour, use cases, testing results, etc.

I'm seeing significant conflicts with Kirill's "Improve shrink_slab()
scalability (old complexity was O(n^2), new is O(n))" series, which I
merged eight milliseconds ago.  Kirill's patchset is large but fairly
straightforward so I expect it's good for 4.18.  So I suggest we leave
things a week or more then please take a look at redoing this patchset
on top of that work?
Sebastian Andrzej Siewior July 3, 2018, 9:44 p.m. UTC | #4
On 2018-07-03 14:14:29 [-0700], Andrew Morton wrote:
> 
> > Reply-To: "[PATCH 0/4] mm/list_lru": add.list_lru_shrink_walk_irq@mail.linuxfoundation.org.and.use.it ()
> 
> Well that's messed up.

indeed it is. This should get into Subject:

> On Tue,  3 Jul 2018 16:52:31 +0200 Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> 
> > My intepretation of situtation is that Vladimir Davydon is fine patch #1
> > and #2 of the series [0] but dislikes the irq argument and struct
> > member. It has been suggested to use list_lru_shrink_walk_irq() instead
> > the approach I went on in "mm: list_lru: Add lock_irq member to
> > __list_lru_init()".
> > 
> > This series is based on the former two patches and introduces
> > list_lru_shrink_walk_irq() (and makes the third patch of series
> > obsolete).
> > In patch 1-3 I tried a tiny cleanup so the different locking
> > (spin_lock() vs spin_lock_irq()) is simply lifted to the caller of the
> > function.
> > 
> > [0] The patch
> >       mm: workingset: remove local_irq_disable() from count_shadow_nodes() 
> >    and
> >       mm: workingset: make shadow_lru_isolate() use locking suffix
> > 
> 
> This isn't a very informative [0/n] changelog.  Some overall summary of
> the patchset's objective, behaviour, use cases, testing results, etc.

The patches should be threaded as a reply to 3/3 of the series so I
assumed it was enough. And while Vladimir complained about 2/3 and 3/3
the discussion went on in 2/3 where he suggested to go on with the _irq
function. And testing, well with and without RT the function was invoked
as part of swapping (allocating memory until OOM) without complains.

> I'm seeing significant conflicts with Kirill's "Improve shrink_slab()
> scalability (old complexity was O(n^2), new is O(n))" series, which I
> merged eight milliseconds ago.  Kirill's patchset is large but fairly
> straightforward so I expect it's good for 4.18.  So I suggest we leave
> things a week or more then please take a look at redoing this patchset
> on top of that work?  

If Vladimir is okay with to redo and nobody else complains then I could
rebase these four patches on top of your tree next week.

Sebastian
Vladimir Davydov July 4, 2018, 2:44 p.m. UTC | #5
On Tue, Jul 03, 2018 at 11:44:29PM +0200, Sebastian Andrzej Siewior wrote:
> On 2018-07-03 14:14:29 [-0700], Andrew Morton wrote:
> > 
> > > Reply-To: "[PATCH 0/4] mm/list_lru": add.list_lru_shrink_walk_irq@mail.linuxfoundation.org.and.use.it ()
> > 
> > Well that's messed up.
> 
> indeed it is. This should get into Subject:
> 
> > On Tue,  3 Jul 2018 16:52:31 +0200 Sebastian Andrzej Siewior <bigeasy@linutronix.de> wrote:
> > 
> > > My intepretation of situtation is that Vladimir Davydon is fine patch #1
> > > and #2 of the series [0] but dislikes the irq argument and struct
> > > member. It has been suggested to use list_lru_shrink_walk_irq() instead
> > > the approach I went on in "mm: list_lru: Add lock_irq member to
> > > __list_lru_init()".
> > > 
> > > This series is based on the former two patches and introduces
> > > list_lru_shrink_walk_irq() (and makes the third patch of series
> > > obsolete).
> > > In patch 1-3 I tried a tiny cleanup so the different locking
> > > (spin_lock() vs spin_lock_irq()) is simply lifted to the caller of the
> > > function.
> > > 
> > > [0] The patch
> > >       mm: workingset: remove local_irq_disable() from count_shadow_nodes() 
> > >    and
> > >       mm: workingset: make shadow_lru_isolate() use locking suffix
> > > 
> > 
> > This isn't a very informative [0/n] changelog.  Some overall summary of
> > the patchset's objective, behaviour, use cases, testing results, etc.
> 
> The patches should be threaded as a reply to 3/3 of the series so I
> assumed it was enough. And while Vladimir complained about 2/3 and 3/3
> the discussion went on in 2/3 where he suggested to go on with the _irq
> function. And testing, well with and without RT the function was invoked
> as part of swapping (allocating memory until OOM) without complains.
> 
> > I'm seeing significant conflicts with Kirill's "Improve shrink_slab()
> > scalability (old complexity was O(n^2), new is O(n))" series, which I
> > merged eight milliseconds ago.  Kirill's patchset is large but fairly
> > straightforward so I expect it's good for 4.18.  So I suggest we leave
> > things a week or more then please take a look at redoing this patchset
> > on top of that work?  
> 
> If Vladimir is okay with to redo and nobody else complains then I could
> rebase these four patches on top of your tree next week.

IMHO this approach is more straightforward than the one with the per
list_lru flag. For all patches,

Reviewed-by: Vladimir Davydov <vdavydov.dev@gmail.com>
diff mbox

Patch

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 96def9d15b1b..c2161c3a1809 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -51,18 +51,22 @@  struct list_lru_node {
 
 struct list_lru {
 	struct list_lru_node	*node;
+	bool			lock_irq;
 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
 	struct list_head	list;
 #endif
 };
 
 void list_lru_destroy(struct list_lru *lru);
-int __list_lru_init(struct list_lru *lru, bool memcg_aware,
+int __list_lru_init(struct list_lru *lru, bool memcg_aware, bool lock_irq,
 		    struct lock_class_key *key);
 
-#define list_lru_init(lru)		__list_lru_init((lru), false, NULL)
-#define list_lru_init_key(lru, key)	__list_lru_init((lru), false, (key))
-#define list_lru_init_memcg(lru)	__list_lru_init((lru), true, NULL)
+#define list_lru_init(lru)		__list_lru_init((lru), false, false, \
+							NULL)
+#define list_lru_init_key(lru, key)	__list_lru_init((lru), false, false, \
+							(key))
+#define list_lru_init_memcg(lru)	__list_lru_init((lru), true, false, \
+							NULL)
 
 int memcg_update_all_list_lrus(int num_memcgs);
 void memcg_drain_all_list_lrus(int src_idx, int dst_idx);
diff --git a/mm/list_lru.c b/mm/list_lru.c
index fcfb6c89ed47..1c49d48078e4 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -204,7 +204,10 @@  __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
 	struct list_head *item, *n;
 	unsigned long isolated = 0;
 
-	spin_lock(&nlru->lock);
+	if (lru->lock_irq)
+		spin_lock_irq(&nlru->lock);
+	else
+		spin_lock(&nlru->lock);
 	l = list_lru_from_memcg_idx(nlru, memcg_idx);
 restart:
 	list_for_each_safe(item, n, &l->list) {
@@ -251,7 +254,10 @@  __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
 		}
 	}
 
-	spin_unlock(&nlru->lock);
+	if (lru->lock_irq)
+		spin_unlock_irq(&nlru->lock);
+	else
+		spin_unlock(&nlru->lock);
 	return isolated;
 }
 
@@ -553,7 +559,7 @@  static void memcg_destroy_list_lru(struct list_lru *lru)
 }
 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */
 
-int __list_lru_init(struct list_lru *lru, bool memcg_aware,
+int __list_lru_init(struct list_lru *lru, bool memcg_aware, bool lock_irq,
 		    struct lock_class_key *key)
 {
 	int i;
@@ -580,7 +586,7 @@  int __list_lru_init(struct list_lru *lru, bool memcg_aware,
 		lru->node = NULL;
 		goto out;
 	}
-
+	lru->lock_irq = lock_irq;
 	list_lru_register(lru);
 out:
 	memcg_put_cache_ids();
diff --git a/mm/workingset.c b/mm/workingset.c
index 529480c21f93..23ce00f48212 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -480,13 +480,8 @@  static enum lru_status shadow_lru_isolate(struct list_head *item,
 static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
 				       struct shrink_control *sc)
 {
-	unsigned long ret;
-
-	/* list_lru lock nests inside the IRQ-safe i_pages lock */
-	local_irq_disable();
-	ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);
-	local_irq_enable();
-	return ret;
+	return list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate,
+				    NULL);
 }
 
 static struct shrinker workingset_shadow_shrinker = {
@@ -523,7 +518,8 @@  static int __init workingset_init(void)
 	pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
 	       timestamp_bits, max_order, bucket_order);
 
-	ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key);
+	/* list_lru lock nests inside the IRQ-safe i_pages lock */
+	ret = __list_lru_init(&shadow_nodes, true, true, &shadow_nodes_key);
 	if (ret)
 		goto err;
 	ret = register_shrinker(&workingset_shadow_shrinker);