SUNRPC/cache: Allow garbage collection of invalid cache entries
diff mbox series

Message ID 20200114165738.922961-1-trond.myklebust@hammerspace.com
State New
Headers show
Series
  • SUNRPC/cache: Allow garbage collection of invalid cache entries
Related show

Commit Message

Trond Myklebust Jan. 14, 2020, 4:57 p.m. UTC
If the cache entry never gets initialised, we want the garbage
collector to be able to evict it. Otherwise if the upcall daemon
fails to initialise the entry, we end up never expiring it.

Fixes: d6fc8821c2d2 ("SUNRPC/Cache: Always treat the invalid cache as unexpired")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/cache.h |  3 ---
 net/sunrpc/cache.c           | 36 +++++++++++++++++++-----------------
 2 files changed, 19 insertions(+), 20 deletions(-)

Comments

J. Bruce Fields Feb. 6, 2020, 4:33 p.m. UTC | #1
On Tue, Jan 14, 2020 at 11:57:38AM -0500, Trond Myklebust wrote:
> If the cache entry never gets initialised, we want the garbage
> collector to be able to evict it. Otherwise if the upcall daemon
> fails to initialise the entry, we end up never expiring it.

Could you tell us more about what motivated this?

It's causing failures on pynfs server-reboot tests.  I haven't pinned
down the cause yet, but it looks like it could be a regression to the
behavior Kinglong Mee describes in detail in his original patch.

Dropping for the time being.

--b.

> 
> Fixes: d6fc8821c2d2 ("SUNRPC/Cache: Always treat the invalid cache as unexpired")
> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> ---
>  include/linux/sunrpc/cache.h |  3 ---
>  net/sunrpc/cache.c           | 36 +++++++++++++++++++-----------------
>  2 files changed, 19 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
> index f8603724fbee..0428dd23fd79 100644
> --- a/include/linux/sunrpc/cache.h
> +++ b/include/linux/sunrpc/cache.h
> @@ -202,9 +202,6 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
>  
>  static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h)
>  {
> -	if (!test_bit(CACHE_VALID, &h->flags))
> -		return false;
> -
>  	return  (h->expiry_time < seconds_since_boot()) ||
>  		(detail->flush_time >= h->last_refresh);
>  }
> diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
> index 52d927210d32..99d630150af6 100644
> --- a/net/sunrpc/cache.c
> +++ b/net/sunrpc/cache.c
> @@ -65,13 +65,14 @@ static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
>  
>  	rcu_read_lock();
>  	hlist_for_each_entry_rcu(tmp, head, cache_list) {
> -		if (detail->match(tmp, key)) {
> -			if (cache_is_expired(detail, tmp))
> -				continue;
> -			tmp = cache_get_rcu(tmp);
> -			rcu_read_unlock();
> -			return tmp;
> -		}
> +		if (!detail->match(tmp, key))
> +			continue;
> +		if (test_bit(CACHE_VALID, &tmp->flags) &&
> +		    cache_is_expired(detail, tmp))
> +			continue;
> +		tmp = cache_get_rcu(tmp);
> +		rcu_read_unlock();
> +		return tmp;
>  	}
>  	rcu_read_unlock();
>  	return NULL;
> @@ -114,17 +115,18 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
>  
>  	/* check if entry appeared while we slept */
>  	hlist_for_each_entry_rcu(tmp, head, cache_list) {
> -		if (detail->match(tmp, key)) {
> -			if (cache_is_expired(detail, tmp)) {
> -				sunrpc_begin_cache_remove_entry(tmp, detail);
> -				freeme = tmp;
> -				break;
> -			}
> -			cache_get(tmp);
> -			spin_unlock(&detail->hash_lock);
> -			cache_put(new, detail);
> -			return tmp;
> +		if (!detail->match(tmp, key))
> +			continue;
> +		if (test_bit(CACHE_VALID, &tmp->flags) &&
> +		    cache_is_expired(detail, tmp)) {
> +			sunrpc_begin_cache_remove_entry(tmp, detail);
> +			freeme = tmp;
> +			break;
>  		}
> +		cache_get(tmp);
> +		spin_unlock(&detail->hash_lock);
> +		cache_put(new, detail);
> +		return tmp;
>  	}
>  
>  	hlist_add_head_rcu(&new->cache_list, head);
> -- 
> 2.24.1
Trond Myklebust Feb. 7, 2020, 2:25 p.m. UTC | #2
On Thu, 2020-02-06 at 11:33 -0500, J. Bruce Fields wrote:
> On Tue, Jan 14, 2020 at 11:57:38AM -0500, Trond Myklebust wrote:
> > If the cache entry never gets initialised, we want the garbage
> > collector to be able to evict it. Otherwise if the upcall daemon
> > fails to initialise the entry, we end up never expiring it.
> 
> Could you tell us more about what motivated this?
> 
> It's causing failures on pynfs server-reboot tests.  I haven't pinned
> down the cause yet, but it looks like it could be a regression to the
> behavior Kinglong Mee describes in detail in his original patch.
> 

Can you point me to the tests that are failing?

The motivation here is to allow the garbage collector to do its job of
evicting cache entries after they are supposed to have timed out. The
fact that uninitialised cache entries are given an infinite lifetime,
and are never evicted is a de facto memory leak if, for instance, the
mountd daemon ignores the cache request, or the downcall in
expkey_parse() or svc_export_parse() fails without being able to update
the request.

The threads that are waiting for the cache replies already have a
mechanism for dealing with timeouts (with cache_wait_req() and deferred
requests), so the question is what is so special about uninitialised
requests that we have to leak them in order to avoid a problem with
reboot?

> Dropping for the time being.
> 
> --b.
> 
> > Fixes: d6fc8821c2d2 ("SUNRPC/Cache: Always treat the invalid cache
> > as unexpired")
> > Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
> > ---
> >  include/linux/sunrpc/cache.h |  3 ---
> >  net/sunrpc/cache.c           | 36 +++++++++++++++++++-------------
> > ----
> >  2 files changed, 19 insertions(+), 20 deletions(-)
> > 
> > diff --git a/include/linux/sunrpc/cache.h
> > b/include/linux/sunrpc/cache.h
> > index f8603724fbee..0428dd23fd79 100644
> > --- a/include/linux/sunrpc/cache.h
> > +++ b/include/linux/sunrpc/cache.h
> > @@ -202,9 +202,6 @@ static inline void cache_put(struct cache_head
> > *h, struct cache_detail *cd)
> >  
> >  static inline bool cache_is_expired(struct cache_detail *detail,
> > struct cache_head *h)
> >  {
> > -	if (!test_bit(CACHE_VALID, &h->flags))
> > -		return false;
> > -
> >  	return  (h->expiry_time < seconds_since_boot()) ||
> >  		(detail->flush_time >= h->last_refresh);
> >  }
> > diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
> > index 52d927210d32..99d630150af6 100644
> > --- a/net/sunrpc/cache.c
> > +++ b/net/sunrpc/cache.c
> > @@ -65,13 +65,14 @@ static struct cache_head
> > *sunrpc_cache_find_rcu(struct cache_detail *detail,
> >  
> >  	rcu_read_lock();
> >  	hlist_for_each_entry_rcu(tmp, head, cache_list) {
> > -		if (detail->match(tmp, key)) {
> > -			if (cache_is_expired(detail, tmp))
> > -				continue;
> > -			tmp = cache_get_rcu(tmp);
> > -			rcu_read_unlock();
> > -			return tmp;
> > -		}
> > +		if (!detail->match(tmp, key))
> > +			continue;
> > +		if (test_bit(CACHE_VALID, &tmp->flags) &&
> > +		    cache_is_expired(detail, tmp))
> > +			continue;
> > +		tmp = cache_get_rcu(tmp);
> > +		rcu_read_unlock();
> > +		return tmp;
> >  	}
> >  	rcu_read_unlock();
> >  	return NULL;
> > @@ -114,17 +115,18 @@ static struct cache_head
> > *sunrpc_cache_add_entry(struct cache_detail *detail,
> >  
> >  	/* check if entry appeared while we slept */
> >  	hlist_for_each_entry_rcu(tmp, head, cache_list) {
> > -		if (detail->match(tmp, key)) {
> > -			if (cache_is_expired(detail, tmp)) {
> > -				sunrpc_begin_cache_remove_entry(tmp,
> > detail);
> > -				freeme = tmp;
> > -				break;
> > -			}
> > -			cache_get(tmp);
> > -			spin_unlock(&detail->hash_lock);
> > -			cache_put(new, detail);
> > -			return tmp;
> > +		if (!detail->match(tmp, key))
> > +			continue;
> > +		if (test_bit(CACHE_VALID, &tmp->flags) &&
> > +		    cache_is_expired(detail, tmp)) {
> > +			sunrpc_begin_cache_remove_entry(tmp, detail);
> > +			freeme = tmp;
> > +			break;
> >  		}
> > +		cache_get(tmp);
> > +		spin_unlock(&detail->hash_lock);
> > +		cache_put(new, detail);
> > +		return tmp;
> >  	}
> >  
> >  	hlist_add_head_rcu(&new->cache_list, head);
> > -- 
> > 2.24.1
J. Bruce Fields Feb. 7, 2020, 6:18 p.m. UTC | #3
On Fri, Feb 07, 2020 at 02:25:27PM +0000, Trond Myklebust wrote:
> On Thu, 2020-02-06 at 11:33 -0500, J. Bruce Fields wrote:
> > On Tue, Jan 14, 2020 at 11:57:38AM -0500, Trond Myklebust wrote:
> > > If the cache entry never gets initialised, we want the garbage
> > > collector to be able to evict it. Otherwise if the upcall daemon
> > > fails to initialise the entry, we end up never expiring it.
> > 
> > Could you tell us more about what motivated this?
> > 
> > It's causing failures on pynfs server-reboot tests.  I haven't pinned
> > down the cause yet, but it looks like it could be a regression to the
> > behavior Kinglong Mee describes in detail in his original patch.
> > 
> 
> Can you point me to the tests that are failing?

I'm basically doing

	./nfs4.1/testserver.py myserver:/path reboot
			--serverhelper=examples/server_helper.sh
			--serverhelperarg=myserver

For all I know at this point, the change could be exposing a pynfs-side
bug.

> The motivation here is to allow the garbage collector to do its job of
> evicting cache entries after they are supposed to have timed out.

Understood.  I was curious whether this was found by code inspection or
because you'd run across a case where the leak was causing a practical
problem.

--b.

> The fact that uninitialised cache entries are given an infinite
> lifetime, and are never evicted is a de facto memory leak if, for
> instance, the mountd daemon ignores the cache request, or the downcall
> in expkey_parse() or svc_export_parse() fails without being able to
> update the request.
> 
> The threads that are waiting for the cache replies already have a
> mechanism for dealing with timeouts (with cache_wait_req() and
> deferred requests), so the question is what is so special about
> uninitialised requests that we have to leak them in order to avoid a
> problem with reboot?
Trond Myklebust Feb. 10, 2020, 6:47 p.m. UTC | #4
On Fri, 2020-02-07 at 13:18 -0500, bfields@fieldses.org wrote:
> On Fri, Feb 07, 2020 at 02:25:27PM +0000, Trond Myklebust wrote:
> > On Thu, 2020-02-06 at 11:33 -0500, J. Bruce Fields wrote:
> > > On Tue, Jan 14, 2020 at 11:57:38AM -0500, Trond Myklebust wrote:
> > > > If the cache entry never gets initialised, we want the garbage
> > > > collector to be able to evict it. Otherwise if the upcall
> > > > daemon
> > > > fails to initialise the entry, we end up never expiring it.
> > > 
> > > Could you tell us more about what motivated this?
> > > 
> > > It's causing failures on pynfs server-reboot tests.  I haven't
> > > pinned
> > > down the cause yet, but it looks like it could be a regression to
> > > the
> > > behavior Kinglong Mee describes in detail in his original patch.
> > > 
> > 
> > Can you point me to the tests that are failing?
> 
> I'm basically doing
> 
> 	./nfs4.1/testserver.py myserver:/path reboot
> 			--serverhelper=examples/server_helper.sh
> 			--serverhelperarg=myserver
> 
> For all I know at this point, the change could be exposing a pynfs-
> side
> bug.
> 
> > The motivation here is to allow the garbage collector to do its job
> > of
> > evicting cache entries after they are supposed to have timed out.
> 
> Understood.  I was curious whether this was found by code inspection
> or
> because you'd run across a case where the leak was causing a
> practical
> problem.

I was seeing hangs in some cases where the path lookup was temporarily
failing in the export path downcall. In that case, the mount daemon
just ignores the returned error, and leaves it up to the kernel to fix
what is happening.

The problem is that since the garbage collector is currently told to
ignore stuff if CACHE_VALID is not set, and since cache_check() won't
do a second upcall, then any RPC call trying to access data on the path
that was affected just hangs and eventually times out. Nothing prompts
for this condition to be fixed.

> 
> --b.
> 
> > The fact that uninitialised cache entries are given an infinite
> > lifetime, and are never evicted is a de facto memory leak if, for
> > instance, the mountd daemon ignores the cache request, or the
> > downcall
> > in expkey_parse() or svc_export_parse() fails without being able to
> > update the request.
> > 
> > The threads that are waiting for the cache replies already have a
> > mechanism for dealing with timeouts (with cache_wait_req() and
> > deferred requests), so the question is what is so special about
> > uninitialised requests that we have to leak them in order to avoid
> > a
> > problem with reboot?
Trond Myklebust
CTO, Hammerspace Inc
4300 El Camino Real, Suite 105
Los Altos, CA 94022
www.hammer.space

Patch
diff mbox series

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index f8603724fbee..0428dd23fd79 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -202,9 +202,6 @@  static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 
 static inline bool cache_is_expired(struct cache_detail *detail, struct cache_head *h)
 {
-	if (!test_bit(CACHE_VALID, &h->flags))
-		return false;
-
 	return  (h->expiry_time < seconds_since_boot()) ||
 		(detail->flush_time >= h->last_refresh);
 }
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 52d927210d32..99d630150af6 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -65,13 +65,14 @@  static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
 
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(tmp, head, cache_list) {
-		if (detail->match(tmp, key)) {
-			if (cache_is_expired(detail, tmp))
-				continue;
-			tmp = cache_get_rcu(tmp);
-			rcu_read_unlock();
-			return tmp;
-		}
+		if (!detail->match(tmp, key))
+			continue;
+		if (test_bit(CACHE_VALID, &tmp->flags) &&
+		    cache_is_expired(detail, tmp))
+			continue;
+		tmp = cache_get_rcu(tmp);
+		rcu_read_unlock();
+		return tmp;
 	}
 	rcu_read_unlock();
 	return NULL;
@@ -114,17 +115,18 @@  static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 
 	/* check if entry appeared while we slept */
 	hlist_for_each_entry_rcu(tmp, head, cache_list) {
-		if (detail->match(tmp, key)) {
-			if (cache_is_expired(detail, tmp)) {
-				sunrpc_begin_cache_remove_entry(tmp, detail);
-				freeme = tmp;
-				break;
-			}
-			cache_get(tmp);
-			spin_unlock(&detail->hash_lock);
-			cache_put(new, detail);
-			return tmp;
+		if (!detail->match(tmp, key))
+			continue;
+		if (test_bit(CACHE_VALID, &tmp->flags) &&
+		    cache_is_expired(detail, tmp)) {
+			sunrpc_begin_cache_remove_entry(tmp, detail);
+			freeme = tmp;
+			break;
 		}
+		cache_get(tmp);
+		spin_unlock(&detail->hash_lock);
+		cache_put(new, detail);
+		return tmp;
 	}
 
 	hlist_add_head_rcu(&new->cache_list, head);