diff mbox

pnfs support servers with multiple layout types

Message ID 1456927079-4788-1-git-send-email-tigran.mkrtchyan@desy.de (mailing list archive)
State New, archived
Headers show

Commit Message

Mkrtchyan, Tigran March 2, 2016, 1:57 p.m. UTC
current NFSv4.1/pNFS client assumes that MDS supports
only one layout type. While it's true for most existing servers,
nevertheless, this can be change in the near future.

This patch is an attempt to multi layouttype MDS support. To make
it possible for such servers to function with existing clients,
server must always send default layout type first in the list. The
client starts processing layout types starting from the second element
and will fall back to the wfirst one, if none of presented types
is supported.

Testing done:

  - started a server with nfs4_file and flex_file layout
  - new kernel picked flexr_-file layout
  - old complained about multiple layout types and proceeded  nfs4_file layout

Signed-off-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
---
 fs/nfs/client.c         |  2 +-
 fs/nfs/nfs4xdr.c        | 23 ++++++++++-------------
 fs/nfs/pnfs.c           | 45 ++++++++++++++++++++++++++++++++-------------
 fs/nfs/pnfs.h           |  4 ++--
 include/linux/nfs_xdr.h |  8 +++++++-
 5 files changed, 52 insertions(+), 30 deletions(-)

Comments

Christoph Hellwig March 2, 2016, 4:37 p.m. UTC | #1
> +	for(i = 0; i < num && i < NFS_MAX_LAYOUT_TYPES; i++)
> +		layouttype[i] = be32_to_cpup(p++);

Can we use a bitmap and do something like

	layouttype |= (1 << be32_to_cpup(p++));

that's what I did for my unsubmitted patches to submit multiple
layouttypes in the Linux server..
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mkrtchyan, Tigran March 2, 2016, 7:22 p.m. UTC | #2
----- Original Message -----
> From: "Christoph Hellwig" <hch@infradead.org>
> To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> Cc: "Trond Myklebust" <trond.myklebust@primarydata.com>, linux-nfs@vger.kernel.org
> Sent: Wednesday, March 2, 2016 5:37:46 PM
> Subject: Re: [PATCH] pnfs  support servers with multiple layout types

>> +	for(i = 0; i < num && i < NFS_MAX_LAYOUT_TYPES; i++)
>> +		layouttype[i] = be32_to_cpup(p++);
> 
> Can we use a bitmap and do something like
> 
>	layouttype |= (1 << be32_to_cpup(p++));
> 

Sure, if we describe in spec that layout type must be power of two
and max value is 32.

Tigran.

> that's what I did for my unsubmitted patches to submit multiple
> layouttypes in the Linux server..
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mkrtchyan, Tigran March 3, 2016, 1:50 p.m. UTC | #3
----- Original Message -----
> From: "Christoph Hellwig" <hch@infradead.org>
> To: "Tigran Mkrtchyan" <tigran.mkrtchyan@desy.de>
> Cc: "Trond Myklebust" <trond.myklebust@primarydata.com>, linux-nfs@vger.kernel.org
> Sent: Wednesday, March 2, 2016 5:37:46 PM
> Subject: Re: [PATCH] pnfs  support servers with multiple layout types

>> +	for(i = 0; i < num && i < NFS_MAX_LAYOUT_TYPES; i++)
>> +		layouttype[i] = be32_to_cpup(p++);
> 
> Can we use a bitmap and do something like
> 
>	layouttype |= (1 << be32_to_cpup(p++));
> 

Sorry, took me a bit longer to understand what you actually was saying.
But still won't work, as we will loose the order of supported layouts.

To make it working, we can add a mount option for preferred layout:

mount -o preferred_layout=nfs4_file,vers=4.1 ....

Then client will try preferred first, and if it's not provided, 
then will go through the bitmask and pick first working.

Shall I do that? Trond, any comments?

Tigran.

> that's what I did for my unsubmitted patches to submit multiple
> layouttypes in the Linux server..
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d6d5d2a..ec8e7db 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -787,7 +787,7 @@  int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
 	}
 
 	fsinfo.fattr = fattr;
-	fsinfo.layouttype = 0;
+	memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype));
 	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
 	if (error < 0)
 		goto out_error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e44412..ac8747b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4702,14 +4702,13 @@  static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
 }
 
 /*
- * Decode potentially multiple layout types. Currently we only support
- * one layout driver per file system.
+ * Decode potentially multiple layout types.
  */
-static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
+static int decode_pnfs_layout_types(struct xdr_stream *xdr,
 					 uint32_t *layouttype)
 {
 	__be32 *p;
-	int num;
+	uint32_t num, i;
 
 	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(!p))
@@ -4718,18 +4717,17 @@  static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
 
 	/* pNFS is not supported by the underlying file system */
 	if (num == 0) {
-		*layouttype = 0;
 		return 0;
 	}
-	if (num > 1)
-		printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout "
-			"drivers per filesystem not supported\n", __func__);
+	if (num > NFS_MAX_LAYOUT_TYPES)
+		printk(KERN_INFO "NFS: %s: Warning: Too many (%d) pNFS layout types\n", __func__, num);
 
 	/* Decode and set first layout type, move xdr->p past unused types */
 	p = xdr_inline_decode(xdr, num * 4);
 	if (unlikely(!p))
 		goto out_overflow;
-	*layouttype = be32_to_cpup(p);
+	for(i = 0; i < num && i < NFS_MAX_LAYOUT_TYPES; i++)
+		layouttype[i] = be32_to_cpup(p++);
 	return 0;
 out_overflow:
 	print_overflow_msg(__func__, xdr);
@@ -4749,10 +4747,9 @@  static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
 	if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
 		return -EIO;
 	if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
-		status = decode_first_pnfs_layout_type(xdr, layouttype);
+		status = decode_pnfs_layout_types(xdr, layouttype);
 		bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
-	} else
-		*layouttype = 0;
+	}
 	return status;
 }
 
@@ -4833,7 +4830,7 @@  static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 	status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
 	if (status != 0)
 		goto xdr_error;
-	status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
+	status = decode_attr_pnfstype(xdr, bitmap, fsinfo->layouttype);
 	if (status != 0)
 		goto xdr_error;
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2fa483e..6b3b67a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -102,32 +102,51 @@  unset_pnfs_layoutdriver(struct nfs_server *nfss)
  * Try to set the server's pnfs module to the pnfs layout type specified by id.
  * Currently only one pNFS layout driver per filesystem is supported.
  *
- * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
+ * @ids array of layout types supported by MDS.
  */
 void
 set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
-		      u32 id)
+		      u32 *ids)
 {
 	struct pnfs_layoutdriver_type *ld_type = NULL;
+	u32 id;
+	int i;
 
-	if (id == 0)
-		goto out_no_driver;
 	if (!(server->nfs_client->cl_exchange_flags &
 		 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
-		printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n",
-			__func__, id, server->nfs_client->cl_exchange_flags);
+		printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n",
+			__func__, server->nfs_client->cl_exchange_flags);
 		goto out_no_driver;
 	}
-	ld_type = find_pnfs_driver(id);
-	if (!ld_type) {
+	/*
+	 * If server supports more than one layout types.
+	 * By assuming, that server will put 'common default' as the first
+	 * entry, try all following entries ibefore and fall back to the default
+	 * if we did not found a matching one.
+	 */
+	for(i = 1; i < NFS_MAX_LAYOUT_TYPES && ids[i] != 0; i++) {
+		id = ids[i];
 		request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
 		ld_type = find_pnfs_driver(id);
-		if (!ld_type) {
-			dprintk("%s: No pNFS module found for %u.\n",
-				__func__, id);
-			goto out_no_driver;
-		}
+		if(ld_type)
+			goto found_module;
+
+		dprintk("%s: No pNFS module found for %u.\n", __func__, id);
+	}
+
+	/*
+	 * no other layout types found. Try default one.
+	 */
+	id = ids[0];
+	request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
+	ld_type = find_pnfs_driver(id);
+
+	if (!ld_type) {
+		dprintk("%s: No pNFS module found for %u.\n", __func__, id);
+		goto out_no_driver;
 	}
+
+found_module:
 	server->pnfs_curr_ld = ld_type;
 	if (ld_type->set_layoutdriver
 	    && ld_type->set_layoutdriver(server, mntfh)) {
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1ac1db5..bfb5f3f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -234,7 +234,7 @@  void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
 void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
 void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);
 
-void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
+void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32 *);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
 int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
@@ -655,7 +655,7 @@  pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
 }
 
 static inline void set_pnfs_layoutdriver(struct nfs_server *s,
-					 const struct nfs_fh *mntfh, u32 id)
+					 const struct nfs_fh *mntfh, u32 *ids)
 {
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d320906..e45bcb9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -125,6 +125,12 @@  struct nfs_fattr {
 		| NFS_ATTR_FATTR_V4_SECURITY_LABEL)
 
 /*
+ * Maximal number of supported layout drivers.
+ */
+#define NFS_MAX_LAYOUT_TYPES 8
+
+
+/*
  * Info on the file system
  */
 struct nfs_fsinfo {
@@ -139,7 +145,7 @@  struct nfs_fsinfo {
 	__u64			maxfilesize;
 	struct timespec		time_delta; /* server time granularity */
 	__u32			lease_time; /* in seconds */
-	__u32			layouttype; /* supported pnfs layout driver */
+	__u32			layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
 	__u32			blksize; /* preferred pnfs io block size */
 	__u32			clone_blksize; /* granularity of a CLONE operation */
 };