diff mbox series

[11/14] nfsd: don't use sv_nrthreads in connection limiting calculations.

Message ID 20240715074657.18174-12-neilb@suse.de (mailing list archive)
State New
Headers show
Series support automatic changes to nfsd thread count | expand

Commit Message

NeilBrown July 15, 2024, 7:14 a.m. UTC
The heuristic for limiting the number of incoming connections to nfsd
currently uses sv_nrthreads - allowing more connections if more threads
were configured.

A future patch will allow number of threads to grow dynamically so that
there is no need to configure sv_nrthreads.  So we need a different
solution for limiting connections.

It isn't clear what problem is solved by limiting connections (as
mentioned in a code comment) but the most likely problem is a connection
storm - many connections that are not doing productive work.  These will
be closed after about 6 minutes already but it might help to slow down a
storm.

This patch add a per-connection flag XPT_PEER_VALID which indicates
that the peer has presented a filehandle for which it has some sort of
access.  i.e the peer is known to be trusted in some way.  We now only
count connections which have NOT be determined to be valid.  There
should be relative few of these at any given time.

If the number of non-validated peer exceed as limit - currently 64 - we
close the oldest non-validated peer to avoid having too many of these
useless connections.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/netns.h                 |  4 ++--
 fs/nfsd/nfsfh.c                 |  8 ++++++++
 include/linux/sunrpc/svc.h      |  2 +-
 include/linux/sunrpc/svc_xprt.h |  4 ++++
 net/sunrpc/svc_xprt.c           | 33 +++++++++++++++++----------------
 5 files changed, 32 insertions(+), 19 deletions(-)

Comments

Jeff Layton July 15, 2024, 3:52 p.m. UTC | #1
On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote:
> The heuristic for limiting the number of incoming connections to nfsd
> currently uses sv_nrthreads - allowing more connections if more threads
> were configured.
> 
> A future patch will allow number of threads to grow dynamically so that
> there is no need to configure sv_nrthreads.  So we need a different
> solution for limiting connections.
> 
> It isn't clear what problem is solved by limiting connections (as
> mentioned in a code comment) but the most likely problem is a connection
> storm - many connections that are not doing productive work.  These will
> be closed after about 6 minutes already but it might help to slow down a
> storm.
> 
> This patch add a per-connection flag XPT_PEER_VALID which indicates
> that the peer has presented a filehandle for which it has some sort of
> access.  i.e the peer is known to be trusted in some way.  We now only
> count connections which have NOT be determined to be valid.  There
> should be relative few of these at any given time.
> 
> If the number of non-validated peer exceed as limit - currently 64 - we
> close the oldest non-validated peer to avoid having too many of these
> useless connections.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/netns.h                 |  4 ++--
>  fs/nfsd/nfsfh.c                 |  8 ++++++++
>  include/linux/sunrpc/svc.h      |  2 +-
>  include/linux/sunrpc/svc_xprt.h |  4 ++++
>  net/sunrpc/svc_xprt.c           | 33 +++++++++++++++++----------------
>  5 files changed, 32 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 238fc4e56e53..0d2ac15a5003 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -128,8 +128,8 @@ struct nfsd_net {
>  	unsigned char writeverf[8];
>  
>  	/*
> -	 * Max number of connections this nfsd container will allow. Defaults
> -	 * to '0' which is means that it bases this on the number of threads.
> +	 * Max number of non-validated connections this nfsd container
> +	 * will allow.  Defaults to '0' gets mapped to 64.
>  	 */
>  	unsigned int max_connections;
>  
> diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
> index 0b75305fb5f5..08742bf8de02 100644
> --- a/fs/nfsd/nfsfh.c
> +++ b/fs/nfsd/nfsfh.c
> @@ -391,6 +391,14 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
>  		goto out;
>  
>  skip_pseudoflavor_check:
> +	if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags) &&
> +	    !test_and_set_bit(XPT_PEER_VALID, &rqstp->rq_xprt->xpt_flags)) {
> +		struct svc_serv *serv = rqstp->rq_server;
> +		spin_lock(&serv->sv_lock);
> +		serv->sv_tmpcnt -= 1;
> +		spin_unlock(&serv->sv_lock);
> +	}
> +

This is the only place you set XPT_PEER_VALID, but this change affects
more services than just nfsd. What about lockd? Do we need a similar
change there?

>  	/* Finally, check access permissions. */
>  	error = nfsd_permission(rqstp, exp, dentry, access);
>  out:
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 99e9345d829e..0b414af448e0 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -79,7 +79,7 @@ struct svc_serv {
>  	unsigned int		sv_xdrsize;	/* XDR buffer size */
>  	struct list_head	sv_permsocks;	/* all permanent sockets */
>  	struct list_head	sv_tempsocks;	/* all temporary sockets */
> -	int			sv_tmpcnt;	/* count of temporary sockets */
> +	int			sv_tmpcnt;	/* count of temporary "valid" sockets */
>  	struct timer_list	sv_temptimer;	/* timer for aging temporary sockets */
>  
>  	char *			sv_name;	/* service name */
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
> index 0981e35a9fed..92565133b3b6 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -99,6 +99,10 @@ enum {
>  	XPT_HANDSHAKE,		/* xprt requests a handshake */
>  	XPT_TLS_SESSION,	/* transport-layer security established */
>  	XPT_PEER_AUTH,		/* peer has been authenticated */
> +	XPT_PEER_VALID,		/* peer has presented a filehandle that
> +				 * it has access to.  It is NOT counted
> +				 * in ->sv_tmpcnt.
> +				 */
>  };
>  
>  static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 53ebc719ff5a..a9215e1a2f38 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -606,7 +606,8 @@ int svc_port_is_privileged(struct sockaddr *sin)
>  }
>  
>  /*
> - * Make sure that we don't have too many active connections. If we have,
> + * Make sure that we don't have too many connections that have not yet
> + * demonstrated that they have access the the NFS server. If we have,
>   * something must be dropped. It's not clear what will happen if we allow
>   * "too many" connections, but when dealing with network-facing software,
>   * we have to code defensively. Here we do that by imposing hard limits.
> @@ -625,27 +626,26 @@ int svc_port_is_privileged(struct sockaddr *sin)
>   */
>  static void svc_check_conn_limits(struct svc_serv *serv)
>  {
> -	unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn :
> -				(serv->sv_nrthreads+3) * 20;
> +	unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn : 64;
>  
>  	if (serv->sv_tmpcnt > limit) {
> -		struct svc_xprt *xprt = NULL;
> +		struct svc_xprt *xprt = NULL, *xprti;
>  		spin_lock_bh(&serv->sv_lock);
>  		if (!list_empty(&serv->sv_tempsocks)) {
> -			/* Try to help the admin */
> -			net_notice_ratelimited("%s: too many open connections, consider increasing the %s\n",
> -					       serv->sv_name, serv->sv_maxconn ?
> -					       "max number of connections" :
> -					       "number of threads");
>  			/*
>  			 * Always select the oldest connection. It's not fair,
> -			 * but so is life
> +			 * but nor is life.
>  			 */
> -			xprt = list_entry(serv->sv_tempsocks.prev,
> -					  struct svc_xprt,
> -					  xpt_list);
> -			set_bit(XPT_CLOSE, &xprt->xpt_flags);
> -			svc_xprt_get(xprt);
> +			list_for_each_entry_reverse(xprti, &serv->sv_tempsocks,
> +						    xpt_list)
> +			{
> +				if (!test_bit(XPT_PEER_VALID, &xprti->xpt_flags)) {
> +					xprt = xprti;
> +					set_bit(XPT_CLOSE, &xprt->xpt_flags);
> +					svc_xprt_get(xprt);
> +					break;
> +				}
> +			}
>  		}
>  		spin_unlock_bh(&serv->sv_lock);
>  
> @@ -1039,7 +1039,8 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
>  
>  	spin_lock_bh(&serv->sv_lock);
>  	list_del_init(&xprt->xpt_list);
> -	if (test_bit(XPT_TEMP, &xprt->xpt_flags))
> +	if (test_bit(XPT_TEMP, &xprt->xpt_flags) &&
> +	    !test_bit(XPT_PEER_VALID, &xprt->xpt_flags))
>  		serv->sv_tmpcnt--;
>  	spin_unlock_bh(&serv->sv_lock);
>
NeilBrown July 16, 2024, 2:04 a.m. UTC | #2
On Tue, 16 Jul 2024, Jeff Layton wrote:
> On Mon, 2024-07-15 at 17:14 +1000, NeilBrown wrote:
> > The heuristic for limiting the number of incoming connections to nfsd
> > currently uses sv_nrthreads - allowing more connections if more threads
> > were configured.
> > 
> > A future patch will allow number of threads to grow dynamically so that
> > there is no need to configure sv_nrthreads.  So we need a different
> > solution for limiting connections.
> > 
> > It isn't clear what problem is solved by limiting connections (as
> > mentioned in a code comment) but the most likely problem is a connection
> > storm - many connections that are not doing productive work.  These will
> > be closed after about 6 minutes already but it might help to slow down a
> > storm.
> > 
> > This patch add a per-connection flag XPT_PEER_VALID which indicates
> > that the peer has presented a filehandle for which it has some sort of
> > access.  i.e the peer is known to be trusted in some way.  We now only
> > count connections which have NOT be determined to be valid.  There
> > should be relative few of these at any given time.
> > 
> > If the number of non-validated peer exceed as limit - currently 64 - we
> > close the oldest non-validated peer to avoid having too many of these
> > useless connections.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/netns.h                 |  4 ++--
> >  fs/nfsd/nfsfh.c                 |  8 ++++++++
> >  include/linux/sunrpc/svc.h      |  2 +-
> >  include/linux/sunrpc/svc_xprt.h |  4 ++++
> >  net/sunrpc/svc_xprt.c           | 33 +++++++++++++++++----------------
> >  5 files changed, 32 insertions(+), 19 deletions(-)
> > 
> > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > index 238fc4e56e53..0d2ac15a5003 100644
> > --- a/fs/nfsd/netns.h
> > +++ b/fs/nfsd/netns.h
> > @@ -128,8 +128,8 @@ struct nfsd_net {
> >  	unsigned char writeverf[8];
> >  
> >  	/*
> > -	 * Max number of connections this nfsd container will allow. Defaults
> > -	 * to '0' which is means that it bases this on the number of threads.
> > +	 * Max number of non-validated connections this nfsd container
> > +	 * will allow.  Defaults to '0' gets mapped to 64.
> >  	 */
> >  	unsigned int max_connections;
> >  
> > diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
> > index 0b75305fb5f5..08742bf8de02 100644
> > --- a/fs/nfsd/nfsfh.c
> > +++ b/fs/nfsd/nfsfh.c
> > @@ -391,6 +391,14 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
> >  		goto out;
> >  
> >  skip_pseudoflavor_check:
> > +	if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags) &&
> > +	    !test_and_set_bit(XPT_PEER_VALID, &rqstp->rq_xprt->xpt_flags)) {
> > +		struct svc_serv *serv = rqstp->rq_server;
> > +		spin_lock(&serv->sv_lock);
> > +		serv->sv_tmpcnt -= 1;
> > +		spin_unlock(&serv->sv_lock);
> > +	}
> > +
> 
> This is the only place you set XPT_PEER_VALID, but this change affects
> more services than just nfsd. What about lockd? Do we need a similar
> change there?

Lockd calls nlmsvc_ops->fopen which is nlm_fopen() which calls
nfsd_open() which calls fh_verify().  So lockd is safe.

The nfs callback handler might need help, but it sets ->sv_maxconn=1024,
so I think it is safe for now.
(lockd defaults nlm_max_connections to 1024, so it is also safe without
calling fh_verify.  Maybe I should clean up)

Thanks,
NeilBrown
diff mbox series

Patch

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 238fc4e56e53..0d2ac15a5003 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -128,8 +128,8 @@  struct nfsd_net {
 	unsigned char writeverf[8];
 
 	/*
-	 * Max number of connections this nfsd container will allow. Defaults
-	 * to '0' which is means that it bases this on the number of threads.
+	 * Max number of non-validated connections this nfsd container
+	 * will allow.  Defaults to '0' gets mapped to 64.
 	 */
 	unsigned int max_connections;
 
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 0b75305fb5f5..08742bf8de02 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -391,6 +391,14 @@  fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
 		goto out;
 
 skip_pseudoflavor_check:
+	if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags) &&
+	    !test_and_set_bit(XPT_PEER_VALID, &rqstp->rq_xprt->xpt_flags)) {
+		struct svc_serv *serv = rqstp->rq_server;
+		spin_lock(&serv->sv_lock);
+		serv->sv_tmpcnt -= 1;
+		spin_unlock(&serv->sv_lock);
+	}
+
 	/* Finally, check access permissions. */
 	error = nfsd_permission(rqstp, exp, dentry, access);
 out:
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 99e9345d829e..0b414af448e0 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -79,7 +79,7 @@  struct svc_serv {
 	unsigned int		sv_xdrsize;	/* XDR buffer size */
 	struct list_head	sv_permsocks;	/* all permanent sockets */
 	struct list_head	sv_tempsocks;	/* all temporary sockets */
-	int			sv_tmpcnt;	/* count of temporary sockets */
+	int			sv_tmpcnt;	/* count of temporary "valid" sockets */
 	struct timer_list	sv_temptimer;	/* timer for aging temporary sockets */
 
 	char *			sv_name;	/* service name */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 0981e35a9fed..92565133b3b6 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -99,6 +99,10 @@  enum {
 	XPT_HANDSHAKE,		/* xprt requests a handshake */
 	XPT_TLS_SESSION,	/* transport-layer security established */
 	XPT_PEER_AUTH,		/* peer has been authenticated */
+	XPT_PEER_VALID,		/* peer has presented a filehandle that
+				 * it has access to.  It is NOT counted
+				 * in ->sv_tmpcnt.
+				 */
 };
 
 static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 53ebc719ff5a..a9215e1a2f38 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -606,7 +606,8 @@  int svc_port_is_privileged(struct sockaddr *sin)
 }
 
 /*
- * Make sure that we don't have too many active connections. If we have,
+ * Make sure that we don't have too many connections that have not yet
+ * demonstrated that they have access the the NFS server. If we have,
  * something must be dropped. It's not clear what will happen if we allow
  * "too many" connections, but when dealing with network-facing software,
  * we have to code defensively. Here we do that by imposing hard limits.
@@ -625,27 +626,26 @@  int svc_port_is_privileged(struct sockaddr *sin)
  */
 static void svc_check_conn_limits(struct svc_serv *serv)
 {
-	unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn :
-				(serv->sv_nrthreads+3) * 20;
+	unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn : 64;
 
 	if (serv->sv_tmpcnt > limit) {
-		struct svc_xprt *xprt = NULL;
+		struct svc_xprt *xprt = NULL, *xprti;
 		spin_lock_bh(&serv->sv_lock);
 		if (!list_empty(&serv->sv_tempsocks)) {
-			/* Try to help the admin */
-			net_notice_ratelimited("%s: too many open connections, consider increasing the %s\n",
-					       serv->sv_name, serv->sv_maxconn ?
-					       "max number of connections" :
-					       "number of threads");
 			/*
 			 * Always select the oldest connection. It's not fair,
-			 * but so is life
+			 * but nor is life.
 			 */
-			xprt = list_entry(serv->sv_tempsocks.prev,
-					  struct svc_xprt,
-					  xpt_list);
-			set_bit(XPT_CLOSE, &xprt->xpt_flags);
-			svc_xprt_get(xprt);
+			list_for_each_entry_reverse(xprti, &serv->sv_tempsocks,
+						    xpt_list)
+			{
+				if (!test_bit(XPT_PEER_VALID, &xprti->xpt_flags)) {
+					xprt = xprti;
+					set_bit(XPT_CLOSE, &xprt->xpt_flags);
+					svc_xprt_get(xprt);
+					break;
+				}
+			}
 		}
 		spin_unlock_bh(&serv->sv_lock);
 
@@ -1039,7 +1039,8 @@  static void svc_delete_xprt(struct svc_xprt *xprt)
 
 	spin_lock_bh(&serv->sv_lock);
 	list_del_init(&xprt->xpt_list);
-	if (test_bit(XPT_TEMP, &xprt->xpt_flags))
+	if (test_bit(XPT_TEMP, &xprt->xpt_flags) &&
+	    !test_bit(XPT_PEER_VALID, &xprt->xpt_flags))
 		serv->sv_tmpcnt--;
 	spin_unlock_bh(&serv->sv_lock);