diff mbox series

[v3] fs: add a new SB_I_NOUMASK flag

Message ID 20230911-acl-fix-v3-1-b25315333f6c@kernel.org (mailing list archive)
State New, archived
Headers show
Series [v3] fs: add a new SB_I_NOUMASK flag | expand

Commit Message

Jeff Layton Sept. 12, 2023, 12:25 a.m. UTC
SB_POSIXACL must be set when a filesystem supports POSIX ACLs, but NFSv4
also sets this flag to prevent the VFS from applying the umask on
newly-created files. NFSv4 doesn't support POSIX ACLs however, which
causes confusion when other subsystems try to test for them.

Add a new SB_I_NOUMASK flag that allows filesystems to opt-in to umask
stripping without advertising support for POSIX ACLs. Set the new flag
on NFSv4 instead of SB_POSIXACL.

Also, move mode_strip_umask to namei.h and convert init_mknod and
init_mkdir to use it.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
Yet another approach to fixing this issue. I think this way is probably
the best, since makes the purpose of these flags clearer, and stops NFS
from relying on SB_POSIXACL to avoid umask stripping.
---
Changes in v3:
- move new flag to s_iflags
- convert init_mkdir and init_mknod to use mode_strip_umask
- Link to v2: https://lore.kernel.org/r/20230910-acl-fix-v2-1-38d6caa81419@kernel.org

Changes in v2:
- new approach: add a new SB_NOUMASK flag that NFSv4 can use instead of
  SB_POSIXACL
- Link to v1: https://lore.kernel.org/r/20230908-acl-fix-v1-1-1e6b76c8dcc8@kernel.org
---
 fs/init.c             |  6 ++----
 fs/namei.c            | 19 -------------------
 fs/nfs/super.c        |  2 +-
 include/linux/fs.h    |  3 ++-
 include/linux/namei.h | 24 ++++++++++++++++++++++++
 5 files changed, 29 insertions(+), 25 deletions(-)


---
base-commit: a48fa7efaf1161c1c898931fe4c7f0070964233a
change-id: 20230908-acl-fix-6f8f86930f32

Best regards,

Comments

Christian Brauner Sept. 12, 2023, 12:18 p.m. UTC | #1
On Mon, 11 Sep 2023 20:25:50 -0400, Jeff Layton wrote:
> SB_POSIXACL must be set when a filesystem supports POSIX ACLs, but NFSv4
> also sets this flag to prevent the VFS from applying the umask on
> newly-created files. NFSv4 doesn't support POSIX ACLs however, which
> causes confusion when other subsystems try to test for them.
> 
> Add a new SB_I_NOUMASK flag that allows filesystems to opt-in to umask
> stripping without advertising support for POSIX ACLs. Set the new flag
> on NFSv4 instead of SB_POSIXACL.
> 
> [...]

Fine by me and removes hacking around this by abusing the POSIX ACL api.

---

Applied to the vfs.misc branch of the vfs/vfs.git tree.
Patches in the vfs.misc branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs.misc

[1/1] fs: add a new SB_I_NOUMASK flag
      https://git.kernel.org/vfs/vfs/c/34618fcb9fae
diff mbox series

Patch

diff --git a/fs/init.c b/fs/init.c
index 9684406a8416..e9387b6c4f30 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -153,8 +153,7 @@  int __init init_mknod(const char *filename, umode_t mode, unsigned int dev)
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-	if (!IS_POSIXACL(path.dentry->d_inode))
-		mode &= ~current_umask();
+	mode = mode_strip_umask(d_inode(path.dentry), mode);
 	error = security_path_mknod(&path, dentry, mode, dev);
 	if (!error)
 		error = vfs_mknod(mnt_idmap(path.mnt), path.dentry->d_inode,
@@ -229,8 +228,7 @@  int __init init_mkdir(const char *pathname, umode_t mode)
 	dentry = kern_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
-	if (!IS_POSIXACL(path.dentry->d_inode))
-		mode &= ~current_umask();
+	mode = mode_strip_umask(d_inode(path.dentry), mode);
 	error = security_path_mkdir(&path, dentry, mode);
 	if (!error)
 		error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
diff --git a/fs/namei.c b/fs/namei.c
index 567ee547492b..94b27370f468 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3103,25 +3103,6 @@  void unlock_rename(struct dentry *p1, struct dentry *p2)
 }
 EXPORT_SYMBOL(unlock_rename);
 
-/**
- * mode_strip_umask - handle vfs umask stripping
- * @dir:	parent directory of the new inode
- * @mode:	mode of the new inode to be created in @dir
- *
- * Umask stripping depends on whether or not the filesystem supports POSIX
- * ACLs. If the filesystem doesn't support it umask stripping is done directly
- * in here. If the filesystem does support POSIX ACLs umask stripping is
- * deferred until the filesystem calls posix_acl_create().
- *
- * Returns: mode
- */
-static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
-{
-	if (!IS_POSIXACL(dir))
-		mode &= ~current_umask();
-	return mode;
-}
-
 /**
  * vfs_prepare_mode - prepare the mode to be used for a new inode
  * @idmap:	idmap of the mount the inode was found from
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0d6473cb00cb..9b1cfca8112a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1071,7 +1071,7 @@  static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
 		sb->s_export_op = &nfs_export_ops;
 		break;
 	case 4:
-		sb->s_flags |= SB_POSIXACL;
+		sb->s_iflags |= SB_I_NOUMASK;
 		sb->s_time_gran = 1;
 		sb->s_time_min = S64_MIN;
 		sb->s_time_max = S64_MAX;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4aeb3fa11927..58dea591a341 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1119,7 +1119,7 @@  extern int send_sigurg(struct fown_struct *fown);
 #define SB_NOATIME      BIT(10)	/* Do not update access times. */
 #define SB_NODIRATIME   BIT(11)	/* Do not update directory access times */
 #define SB_SILENT       BIT(15)
-#define SB_POSIXACL     BIT(16)	/* VFS does not apply the umask */
+#define SB_POSIXACL     BIT(16)	/* Supports POSIX ACLs */
 #define SB_INLINECRYPT  BIT(17)	/* Use blk-crypto for encrypted files */
 #define SB_KERNMOUNT    BIT(22)	/* this is a kern_mount call */
 #define SB_I_VERSION    BIT(23)	/* Update inode I_version field */
@@ -1166,6 +1166,7 @@  extern int send_sigurg(struct fown_struct *fown);
 #define SB_I_PERSB_BDI	0x00000200	/* has a per-sb bdi */
 #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
 #define SB_I_RETIRED	0x00000800	/* superblock shouldn't be reused */
+#define SB_I_NOUMASK	0x00001000	/* VFS does not apply umask */
 
 /* Possible states of 'frozen' field */
 enum {
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 1463cbda4888..e3619920f9f0 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -92,6 +92,30 @@  extern struct dentry *lock_rename(struct dentry *, struct dentry *);
 extern struct dentry *lock_rename_child(struct dentry *, struct dentry *);
 extern void unlock_rename(struct dentry *, struct dentry *);
 
+/**
+ * mode_strip_umask - handle vfs umask stripping
+ * @dir:	parent directory of the new inode
+ * @mode:	mode of the new inode to be created in @dir
+ *
+ * In most filesystems, umask stripping depends on whether or not the
+ * filesystem supports POSIX ACLs. If the filesystem doesn't support it umask
+ * stripping is done directly in here. If the filesystem does support POSIX
+ * ACLs umask stripping is deferred until the filesystem calls
+ * posix_acl_create().
+ *
+ * Some filesystems (like NFSv4) also want to avoid umask stripping by the
+ * VFS, but don't support POSIX ACLs. Those filesystems can set SB_I_NOUMASK
+ * to get this effect without declaring that they support POSIX ACLs.
+ *
+ * Returns: mode
+ */
+static inline umode_t __must_check mode_strip_umask(const struct inode *dir, umode_t mode)
+{
+	if (!IS_POSIXACL(dir) && !(dir->i_sb->s_iflags & SB_I_NOUMASK))
+		mode &= ~current_umask();
+	return mode;
+}
+
 extern int __must_check nd_jump_link(const struct path *path);
 
 static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)