diff mbox series

[2/5] nfs: Add mount option for forcing RPC requests to one file over one connection

Message ID SG2P153MB0361FD1C5138A0C1FCD7DB719E649@SG2P153MB0361.APCP153.PROD.OUTLOOK.COM (mailing list archive)
State New
Headers show
Series nfs: Add mount option for forcing RPC requests for one file over one connection | expand

Commit Message

Nagendra Tomar March 23, 2021, 5:48 a.m. UTC
From: Nagendra S Tomar <natomar@microsoft.com>

Adds a new mount option ncpolicy=roundrobin|hash which allows user to
select the nconnect policy for the given mount. Defaults to roundrobin.
We store the user selected policy inside the rpc_clnt structure and
pass it down to the RPC client where the transport selection can be
accordingly done.
Also adds a new function pointer p_fhhash to struct rpc_procinfo.
This can be supplied to find the target file's hash for the given RPC
which will then be used to affine RPCs for a file to one xprt.

Signed-off-by: Nagendra S Tomar <natomar@microsoft.com>
---
 fs/nfs/client.c             |  3 +++
 fs/nfs/fs_context.c         | 26 ++++++++++++++++++++++++++
 fs/nfs/internal.h           |  2 ++
 fs/nfs/nfs3client.c         |  4 +++-
 fs/nfs/nfs4client.c         | 14 +++++++++++---
 fs/nfs/super.c              |  7 ++++++-
 include/linux/nfs_fs_sb.h   |  1 +
 include/linux/sunrpc/clnt.h | 15 +++++++++++++++
 net/sunrpc/clnt.c           | 34 ++++++++++++++++++++++++++++------
 9 files changed, 95 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ff5c4d0d6d13..5c2809d8368a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -179,6 +179,7 @@  struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
 
 	clp->cl_proto = cl_init->proto;
 	clp->cl_nconnect = cl_init->nconnect;
+	clp->cl_ncpolicy = cl_init->ncpolicy;
 	clp->cl_net = get_net(cl_init->net);
 
 	clp->cl_principal = "*";
@@ -506,6 +507,7 @@  int nfs_create_rpc_client(struct nfs_client *clp,
 		.net		= clp->cl_net,
 		.protocol	= clp->cl_proto,
 		.nconnect	= clp->cl_nconnect,
+		.ncpolicy	= clp->cl_ncpolicy,
 		.address	= (struct sockaddr *)&clp->cl_addr,
 		.addrsize	= clp->cl_addrlen,
 		.timeout	= cl_init->timeparms,
@@ -678,6 +680,7 @@  static int nfs_init_server(struct nfs_server *server,
 		.timeparms = &timeparms,
 		.cred = server->cred,
 		.nconnect = ctx->nfs_server.nconnect,
+		.ncpolicy = ctx->nfs_server.ncpolicy,
 		.init_flags = (1UL << NFS_CS_REUSEPORT),
 	};
 	struct nfs_client *clp;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 971a9251c1d9..7bb8f1c8356f 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -60,6 +60,7 @@  enum nfs_param {
 	Opt_mountvers,
 	Opt_namelen,
 	Opt_nconnect,
+	Opt_ncpolicy,
 	Opt_port,
 	Opt_posix,
 	Opt_proto,
@@ -127,6 +128,18 @@  static const struct constant_table nfs_param_enums_write[] = {
 	{}
 };
 
+enum {
+	Opt_ncpolicy_roundrobin,
+	Opt_ncpolicy_hash,
+};
+
+static const struct constant_table nfs_param_enums_ncpolicy[] = {
+	{ "hash",		Opt_ncpolicy_hash },
+	{ "roundrobin",		Opt_ncpolicy_roundrobin },
+	{ "rr",			Opt_ncpolicy_roundrobin },
+	{}
+};
+
 static const struct fs_parameter_spec nfs_fs_parameters[] = {
 	fsparam_flag_no("ac",		Opt_ac),
 	fsparam_u32   ("acdirmax",	Opt_acdirmax),
@@ -158,6 +171,7 @@  static const struct fs_parameter_spec nfs_fs_parameters[] = {
 	fsparam_u32   ("mountvers",	Opt_mountvers),
 	fsparam_u32   ("namlen",	Opt_namelen),
 	fsparam_u32   ("nconnect",	Opt_nconnect),
+	fsparam_enum  ("ncpolicy",	Opt_ncpolicy, nfs_param_enums_ncpolicy),
 	fsparam_string("nfsvers",	Opt_vers),
 	fsparam_u32   ("port",		Opt_port),
 	fsparam_flag_no("posix",	Opt_posix),
@@ -749,6 +763,18 @@  static int nfs_fs_context_parse_param(struct fs_context *fc,
 			goto out_of_bounds;
 		ctx->nfs_server.nconnect = result.uint_32;
 		break;
+	case Opt_ncpolicy:
+		switch (result.uint_32) {
+		case Opt_ncpolicy_roundrobin:
+			ctx->nfs_server.ncpolicy = ncpolicy_roundrobin;
+			break;
+		case Opt_ncpolicy_hash:
+			ctx->nfs_server.ncpolicy = ncpolicy_hash;
+			break;
+		default:
+			goto out_invalid_value;
+		}
+		break;
 	case Opt_lookupcache:
 		switch (result.uint_32) {
 		case Opt_lookupcache_all:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7b644d6c09e4..e6ca664d7e91 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -67,6 +67,7 @@  struct nfs_client_initdata {
 	int proto;
 	u32 minorversion;
 	unsigned int nconnect;
+	enum ncpolicy ncpolicy;
 	struct net *net;
 	const struct rpc_timeout *timeparms;
 	const struct cred *cred;
@@ -120,6 +121,7 @@  struct nfs_fs_context {
 		int			port;
 		unsigned short		protocol;
 		unsigned short		nconnect;
+		enum ncpolicy		ncpolicy;
 		unsigned short		export_path_len;
 	} nfs_server;
 
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 5601e47360c2..f8a648f7492a 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -102,8 +102,10 @@  struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
 		return ERR_PTR(-EINVAL);
 	cl_init.hostname = buf;
 
-	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
+	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
 		cl_init.nconnect = mds_clp->cl_nconnect;
+		cl_init.ncpolicy = mds_clp->cl_ncpolicy;
+	}
 
 	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 889a9f4c0310..c967c214129a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -863,6 +863,7 @@  static int nfs4_set_client(struct nfs_server *server,
 		const char *ip_addr,
 		int proto, const struct rpc_timeout *timeparms,
 		u32 minorversion, unsigned int nconnect,
+		enum ncpolicy ncpolicy,
 		struct net *net)
 {
 	struct nfs_client_initdata cl_init = {
@@ -881,8 +882,10 @@  static int nfs4_set_client(struct nfs_server *server,
 
 	if (minorversion == 0)
 		__set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags);
-	if (proto == XPRT_TRANSPORT_TCP)
+	if (proto == XPRT_TRANSPORT_TCP) {
 		cl_init.nconnect = nconnect;
+		cl_init.ncpolicy = ncpolicy;
+	}
 
 	if (server->flags & NFS_MOUNT_NORESVPORT)
 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -950,8 +953,10 @@  struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
 		return ERR_PTR(-EINVAL);
 	cl_init.hostname = buf;
 
-	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP)
+	if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) {
 		cl_init.nconnect = mds_clp->cl_nconnect;
+		cl_init.ncpolicy = mds_clp->cl_ncpolicy;
+	}
 
 	if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
 		__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -1120,6 +1125,7 @@  static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
 				&timeparms,
 				ctx->minorversion,
 				ctx->nfs_server.nconnect,
+				ctx->nfs_server.ncpolicy,
 				fc->net_ns);
 	if (error < 0)
 		return error;
@@ -1209,6 +1215,7 @@  struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
 				parent_server->client->cl_timeout,
 				parent_client->cl_mvops->minor_version,
 				parent_client->cl_nconnect,
+				parent_client->cl_ncpolicy,
 				parent_client->cl_net);
 	if (!error)
 		goto init_server;
@@ -1224,6 +1231,7 @@  struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
 				parent_server->client->cl_timeout,
 				parent_client->cl_mvops->minor_version,
 				parent_client->cl_nconnect,
+				parent_client->cl_ncpolicy,
 				parent_client->cl_net);
 	if (error < 0)
 		goto error;
@@ -1321,7 +1329,7 @@  int nfs4_update_server(struct nfs_server *server, const char *hostname,
 	error = nfs4_set_client(server, hostname, sap, salen, buf,
 				clp->cl_proto, clnt->cl_timeout,
 				clp->cl_minorversion,
-				clp->cl_nconnect, net);
+				clp->cl_nconnect, clp->cl_ncpolicy, net);
 	clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status);
 	if (error != 0) {
 		nfs_server_insert_lists(server);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 94885c6f8f54..8719be70051b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -481,8 +481,13 @@  static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	seq_printf(m, ",proto=%s",
 		   rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID));
 	rcu_read_unlock();
-	if (clp->cl_nconnect > 0)
+	if (clp->cl_nconnect > 0) {
 		seq_printf(m, ",nconnect=%u", clp->cl_nconnect);
+		if (clp->cl_ncpolicy == ncpolicy_roundrobin)
+			seq_puts(m, ",ncpolicy=roundrobin");
+		else if (clp->cl_ncpolicy == ncpolicy_hash)
+			seq_puts(m, ",ncpolicy=hash");
+	}
 	if (version == 4) {
 		if (nfss->port != NFS_PORT)
 			seq_printf(m, ",port=%u", nfss->port);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6f76b32a0238..737f4d231e23 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -62,6 +62,7 @@  struct nfs_client {
 
 	u32			cl_minorversion;/* NFSv4 minorversion */
 	unsigned int		cl_nconnect;	/* Number of connections */
+	enum ncpolicy		cl_ncpolicy;	/* nconnect policy */
 	const char *		cl_principal;  /* used for machine cred */
 
 #if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 02e7a5863d28..aa1c1706f4d5 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -28,6 +28,15 @@ 
 #include <net/ipv6.h>
 #include <linux/sunrpc/xprtmultipath.h>
 
+/*
+ * Policies for controlling distribution of RPC requests over multiple
+ * nconnect connections.
+ */
+enum ncpolicy {
+	ncpolicy_roundrobin,	// Select roundrobin.
+	ncpolicy_hash,		// Select based on target filehandle hash.
+};
+
 struct rpc_inode;
 
 /*
@@ -40,6 +49,7 @@  struct rpc_clnt {
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
 	struct rpc_xprt __rcu *	cl_xprt;	/* transport */
+	enum ncpolicy		cl_ncpolicy;	/* nconnect policy */
 	const struct rpc_procinfo *cl_procinfo;	/* procedure info */
 	u32			cl_prog,	/* RPC program number */
 				cl_vers,	/* RPC version number */
@@ -101,6 +111,8 @@  struct rpc_version {
 	unsigned int		*counts;	/* call counts */
 };
 
+typedef u32 (*getfhhash_t)(const void *obj);
+
 /*
  * Procedure information
  */
@@ -108,6 +120,7 @@  struct rpc_procinfo {
 	u32			p_proc;		/* RPC procedure number */
 	kxdreproc_t		p_encode;	/* XDR encode function */
 	kxdrdproc_t		p_decode;	/* XDR decode function */
+	getfhhash_t		p_fhhash;	/* Returns target fh hash */
 	unsigned int		p_arglen;	/* argument hdr length (u32) */
 	unsigned int		p_replen;	/* reply hdr length (u32) */
 	unsigned int		p_timer;	/* Which RTT timer to use */
@@ -129,6 +142,7 @@  struct rpc_create_args {
 	u32			version;
 	rpc_authflavor_t	authflavor;
 	u32			nconnect;
+	enum ncpolicy		ncpolicy;
 	unsigned long		flags;
 	char			*client_name;
 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
@@ -247,4 +261,5 @@  static inline void rpc_task_close_connection(struct rpc_task *task)
 	if (task->tk_xprt)
 		xprt_force_disconnect(task->tk_xprt);
 }
+
 #endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1b2a02460601..ed470a75e91d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -410,6 +410,7 @@  static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
 	}
 
 	rpc_clnt_set_transport(clnt, xprt, timeout);
+	clnt->cl_ncpolicy = args->ncpolicy;
 	xprt_iter_init(&clnt->cl_xpi, xps);
 	xprt_switch_put(xps);
 
@@ -640,6 +641,7 @@  static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
 	new->cl_discrtry = clnt->cl_discrtry;
 	new->cl_chatty = clnt->cl_chatty;
 	new->cl_principal = clnt->cl_principal;
+	new->cl_ncpolicy = clnt->cl_ncpolicy;
 	return new;
 
 out_err:
@@ -1053,9 +1055,10 @@  rpc_task_get_first_xprt(struct rpc_clnt *clnt)
 }
 
 static struct rpc_xprt *
-rpc_task_get_next_xprt(struct rpc_clnt *clnt)
+rpc_task_get_next_xprt(struct rpc_clnt *clnt, u32 hash)
 {
-	return rpc_task_get_xprt(clnt, xprt_iter_get_next(&clnt->cl_xpi, 0));
+	return rpc_task_get_xprt(clnt,
+			xprt_iter_get_next(&clnt->cl_xpi, hash));
 }
 
 static
@@ -1065,8 +1068,16 @@  void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
 		return;
 	if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN)
 		task->tk_xprt = rpc_task_get_first_xprt(clnt);
-	else
-		task->tk_xprt = rpc_task_get_next_xprt(clnt);
+	else {
+		u32 xprt_hint = 0;
+
+		if (clnt->cl_ncpolicy == ncpolicy_hash &&
+		    task->tk_msg.rpc_proc->p_fhhash) {
+			xprt_hint = task->tk_msg.rpc_proc->p_fhhash(
+						task->tk_msg.rpc_argp);
+		}
+		task->tk_xprt = rpc_task_get_next_xprt(clnt, xprt_hint);
+	}
 }
 
 static
@@ -1130,8 +1141,8 @@  struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 	if (!RPC_IS_ASYNC(task))
 		task->tk_flags |= RPC_TASK_CRED_NOREF;
 
-	rpc_task_set_client(task, task_setup_data->rpc_client);
 	rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
+	rpc_task_set_client(task, task_setup_data->rpc_client);
 
 	if (task->tk_action == NULL)
 		rpc_call_start(task);
@@ -1636,6 +1647,7 @@  call_start(struct rpc_task *task)
 	/* Increment call count (version might not be valid for ping) */
 	if (clnt->cl_program->version[clnt->cl_vers])
 		clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
+
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
 	rpc_task_set_transport(task, clnt);
@@ -2888,7 +2900,17 @@  int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
 				connect_timeout,
 				reconnect_timeout);
 
-	rpc_xprt_switch_set_roundrobin(xps);
+	switch (clnt->cl_ncpolicy) {
+	case ncpolicy_roundrobin:
+	default:
+		WARN_ON(clnt->cl_ncpolicy != ncpolicy_roundrobin);
+		rpc_xprt_switch_set_roundrobin(xps);
+		break;
+	case ncpolicy_hash:
+		rpc_xprt_switch_set_hash(xps);
+		break;
+	}
+
 	if (setup) {
 		ret = setup(clnt, xps, xprt, data);
 		if (ret != 0)