diff mbox

[RFC,v2,6/8] VFS:userns: shift UID/GID to on-disk view before any write to disk

Message ID 1462372014-3786-7-git-send-email-tixxdz@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Djalal Harouni May 4, 2016, 2:26 p.m. UTC
If both the mount namespace and the mount point support UID/GID shifts,
then during inode creation or during a chown call on an inode, make sure
that kuid and kgid that will be used to set inode->{i_uid|i_gid} are in
on-disk view.

Perform the shift to on-disk view during inode initialization or during
notify_change() calls. Usually in this case inode's uid/gid will contain
a kuid and kgid that are valid in the context of the caller and its view
inside the global init_user_ns user namespace. They will always end up
either with current_fsuid() value or the attr->ia_uid of the struct iattr.

inode->{i_uid|i_gid} on-disk writes inside user_ns_X
----------------------------------------------------

Without this Patch:
------------------------------------------------------------
user_ns_X uid   | init_user_ns uid    | inode->i_uid on-disk
------------------------------------------------------------
0               | 1000000             | 1000000
------------------------------------------------------------
999             | 1000999             | 1000999
------------------------------------------------------------
1000            | 1001000             | 1001000
------------------------------------------------------------

inode->{i_uid|i_gid} always end up with global kuid/kgid of the caller
in the init_user_ns.

With this patch:
------------------------------------------------------------
user_ns_X uid   | init_user_ns uid    | inode->i_uid on-disk
------------------------------------------------------------
0               | 1000000             | 0
------------------------------------------------------------
999             | 1000999             | 999
------------------------------------------------------------
1000            | 1001000             | 1000
------------------------------------------------------------

inode->{i_uid|i_gid} will have the values of the uid_t and gid_t that
are shown inside the user namespace of the caller.

Of course this works only on mounts that support VFS UID/GID shift and
are inside a mount namespace that also supports the above. The shift into
on-disk is done inside notify_change() to give a chance to
notify_change_ok() to catch permissions access. At the same time we
adapt notify_change_ok() and make the necessary translation when it's
needed from virtual ot on-disk and vice versa.

The approach is to always keep inode->{i_uid|i_gid} even in memory with
on-disk values. The virtual translation is only done when needed for
permission access or stat() calls.

Signed-off-by: Dongsu Park <dongsu@endocode.com>
Signed-off-by: Djalal Harouni <tixxdz@opendz.org>
---
 fs/attr.c  | 44 +++++++++++++++++++++++++++++++++-----------
 fs/inode.c |  4 ++--
 2 files changed, 35 insertions(+), 13 deletions(-)
diff mbox

Patch

diff --git a/fs/attr.c b/fs/attr.c
index 25b24d0..c476257 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -47,26 +47,38 @@  int inode_change_ok(const struct inode *inode, struct iattr *attr)
 		return 0;
 
 	/* Make sure a caller can chown. */
-	if ((ia_valid & ATTR_UID) &&
-	    (!uid_eq(current_fsuid(), inode->i_uid) ||
-	     !uid_eq(attr->ia_uid, inode->i_uid)) &&
-	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
-		return -EPERM;
+	if (ia_valid & ATTR_UID) {
+		/* Shift to virtual if necessary */
+		kuid_t i_uid = vfs_shift_i_uid_to_virtual(inode);
+
+		if ((!uid_eq(current_fsuid(), i_uid) ||
+		     !uid_eq(attr->ia_uid, inode->i_uid)) &&
+		    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+			return -EPERM;
+	}
 
 	/* Make sure caller can chgrp. */
-	if ((ia_valid & ATTR_GID) &&
-	    (!uid_eq(current_fsuid(), inode->i_uid) ||
-	    (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
-	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+	if (ia_valid & ATTR_GID) {
+		/* Shift to virtual if ncessary */
+		kuid_t i_uid = vfs_shift_i_uid_to_virtual(inode);
+		/* Shift it back to virtual if necessary */
+		kgid_t ia_gid = vfs_kgid_disk_to_virtual(inode, attr->ia_gid);
+
+		if ((!uid_eq(current_fsuid(), i_uid) ||
+		     (!in_group_p(ia_gid) &&
+		      !gid_eq(attr->ia_gid, inode->i_gid))) &&
+		    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
 		return -EPERM;
+	}
 
 	/* Make sure a caller can chmod. */
 	if (ia_valid & ATTR_MODE) {
 		if (!inode_owner_or_capable(inode))
 			return -EPERM;
 		/* Also check the setgid bit! */
-		if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
-				inode->i_gid) &&
+		if (!in_group_p((ia_valid & ATTR_GID) ?
+				vfs_kgid_disk_to_virtual(inode, attr->ia_gid) :
+				vfs_shift_i_gid_to_virtual(inode)) &&
 		    !capable_wrt_inode_uidgid(inode, CAP_FSETID))
 			attr->ia_mode &= ~S_ISGID;
 	}
@@ -209,6 +221,16 @@  int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
 			inode->i_flags &= ~S_NOSEC;
 	}
 
+	/*
+	 * Shift if necessary the UID and GID that are mean to be written
+	 * into inodes's uid/gid to on-disk view. Do that as early as
+	 * possible.
+	 */
+	if ((ia_valid & ATTR_UID))
+		attr->ia_uid = vfs_shift_kuid_to_disk(inode, attr->ia_uid);
+	if ((ia_valid & ATTR_GID))
+		attr->ia_gid = vfs_shift_kgid_to_disk(inode, attr->ia_gid);
+
 	now = current_fs_time(inode->i_sb);
 
 	attr->ia_ctime = now;
diff --git a/fs/inode.c b/fs/inode.c
index 07daf5f..e6ee56a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1940,13 +1940,13 @@  EXPORT_SYMBOL(init_special_inode);
 void inode_init_owner(struct inode *inode, const struct inode *dir,
 			umode_t mode)
 {
-	inode->i_uid = current_fsuid();
+	inode->i_uid = vfs_shift_kuid_to_disk(inode, current_fsuid());
 	if (dir && dir->i_mode & S_ISGID) {
 		inode->i_gid = dir->i_gid;
 		if (S_ISDIR(mode))
 			mode |= S_ISGID;
 	} else
-		inode->i_gid = current_fsgid();
+		inode->i_gid = vfs_shift_kgid_to_disk(inode, current_fsgid());
 	inode->i_mode = mode;
 }
 EXPORT_SYMBOL(inode_init_owner);