[v3,3/3] NFSv4: Refactor NFS to be use user namespaces

Sargun Dhillon Oct. 21, 2020
In several patches work has been done to enable NFSv4 to use user
namespaces: 58002399da65: NFSv4: Convert the NFS client idmapper to use the
container user namespace 3b7eb5e35d0f: NFS: When mounting, don't share
filesystems between different user namespaces

Unfortunately, the userspace APIs were only such that the userspace facing side of the
filesystem (superblock s_user_ns) could be set to a non init user namespace. This furthers
the fs_context related refactoring, and piggybacks on top of that logic, so the superblock
user namespace, and the NFS user namespace are the same.

This change only allows those users whom are not using ID mapping to use user namespaces
because the upcall mechanism still needs to be made fully namespace aware. Currently,
it is only network namespace aware (and this patch doesn't impede that behaviour).
Also, there is currently a limitation that enabling / disabling ID mapping can only
be done on a machine-wide basis.

Eventually, we will need to at least:
  * Separate out the keyring cache by namespace
  * Come up with an upcall mechanism that can be triggered inside of the container,
    or safely triggered outside, with the requisite context to do the right mapping.
  * Handle whatever refactoring needs to be done in net/sunrpc.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index daacc78a3d48..0811e9540bf5 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1151,7 +1151,19 @@  struct nfs_server *nfs4_create_server(struct fs_context *fc)
 	if (!server)
 		return ERR_PTR(-ENOMEM);
-	server->cred = get_cred(current_cred());
+	/*
+	 * current_cred() must have CAP_SYS_ADMIN in init_user_ns. All non
+	 * init user namespaces cannot mount NFS, but the fs_context
+	 * can be created in any user namespace.
+	 */
+	if (fc->cred->user_ns != &init_user_ns) {
+		dprintk("%s: Using creds from non-init userns\n", __func__);
+	} else if (fc->cred != current_cred()) {
+		dprintk("%s: Using creds from fs_context which are different than current_creds\n",
+			__func__);
+	}
+	server->cred = get_cred(fc->cred);
 	auth_probe = ctx->auth_info.flavor_len < 1;
@@ -1164,6 +1176,19 @@  struct nfs_server *nfs4_create_server(struct fs_context *fc)
 	if (error < 0)
 		goto error;
+	/*
+	 * nfs4idmap is not fully isolated by user namespaces. It is currently
+	 * only network namespace aware. If upcalls never happen, we do not
+	 * need to worry as nfs_client instances aren't shared between
+	 * user namespaces.
+	 */
+	if (idmap_userns(server->nfs_client->cl_idmap) != &init_user_ns &&
+		!(server->caps & NFS_CAP_UIDGID_NOMAP)) {
+		error = -EINVAL;
+		errorf(fc, "Mount credentials are from non init user namespace and ID mapping is enabled. This is not allowed.");
+		goto error;
+	}
 	return server;
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 8d8aba305ecc..33dc9b76dc17 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -73,7 +73,7 @@  struct idmap {
 	struct user_namespace	*user_ns;
-static struct user_namespace *idmap_userns(const struct idmap *idmap)
+struct user_namespace *idmap_userns(const struct idmap *idmap)
 	if (idmap && idmap->user_ns)
 		return idmap->user_ns;
diff --git a/fs/nfs/nfs4idmap.h b/fs/nfs/nfs4idmap.h
index de44d7330ab3..2f5296497887 100644
--- a/fs/nfs/nfs4idmap.h
+++ b/fs/nfs/nfs4idmap.h
@@ -38,7 +38,7 @@ 
 #include <linux/uidgid.h>
 #include <uapi/linux/nfs_idmap.h>
+#include <linux/user_namespace.h>
 /* Forward declaration to make this header independent of others */
 struct nfs_client;
@@ -50,6 +50,7 @@  int nfs_idmap_init(void);
 void nfs_idmap_quit(void);
 int nfs_idmap_new(struct nfs_client *);
 void nfs_idmap_delete(struct nfs_client *);
+struct user_namespace *idmap_userns(const struct idmap *idmap);
 void nfs_fattr_init_names(struct nfs_fattr *fattr,
 		struct nfs4_string *owner_name,