diff mbox series

[v2,02/30] fuse: Clear setuid bit even in cache=never path

Message ID 20190515192715.18000-3-vgoyal@redhat.com (mailing list archive)
State New, archived
Headers show
Series virtio-fs: shared file system for virtual machines | expand

Commit Message

Vivek Goyal May 15, 2019, 7:26 p.m. UTC
If fuse daemon is started with cache=never, fuse falls back to direct IO.
In that write path we don't call file_remove_privs() and that means setuid
bit is not cleared if unpriviliged user writes to a file with setuid bit set.

pjdfstest chmod test 12.t tests this and fails.

Fix this by calling fuse_remove_privs() even for direct I/O path.

I tested this as follows.

- Run fuse example pasthrough fs.

  $ passthrough_ll /mnt/pasthrough-mnt -o default_permissions,allow_other,cache=never
  $ mkdir /mnt/pasthrough-mnt/testdir
  $ cd /mnt/pasthrough-mnt/testdir
  $ prove -rv pjdfstests/tests/chmod/12.t

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 fs/fuse/file.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

Comments

Miklos Szeredi May 20, 2019, 2:41 p.m. UTC | #1
On Wed, May 15, 2019 at 03:26:47PM -0400, Vivek Goyal wrote:
> If fuse daemon is started with cache=never, fuse falls back to direct IO.
> In that write path we don't call file_remove_privs() and that means setuid
> bit is not cleared if unpriviliged user writes to a file with setuid bit set.
> 
> pjdfstest chmod test 12.t tests this and fails.

I think better sulution is to tell the server if the suid bit needs to be
removed, so it can do so in a race free way.

Here's the kernel patch, and I'll reply with the libfuse patch.

---
 fs/fuse2/file.c           |    2 ++
 include/uapi/linux/fuse.h |    3 +++
 2 files changed, 5 insertions(+)

--- a/fs/fuse2/file.c
+++ b/fs/fuse2/file.c
@@ -363,6 +363,8 @@ static ssize_t fuse_send_write(struct fu
 		inarg->flags |= O_DSYNC;
 	if (iocb->ki_flags & IOCB_SYNC)
 		inarg->flags |= O_SYNC;
+	if (!capable(CAP_FSETID))
+		inarg->write_flags |= FUSE_WRITE_KILL_PRIV;
 	req->inh.opcode = FUSE_WRITE;
 	req->inh.nodeid = ff->nodeid;
 	req->inh.len = req->inline_inlen + count;
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -125,6 +125,7 @@
  *
  *  7.29
  *  - add FUSE_NO_OPENDIR_SUPPORT flag
+ *  - add FUSE_WRITE_KILL_PRIV flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -318,9 +319,11 @@ struct fuse_file_lock {
  *
  * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
  * FUSE_WRITE_LOCKOWNER: lock_owner field is valid
+ * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits
  */
 #define FUSE_WRITE_CACHE	(1 << 0)
 #define FUSE_WRITE_LOCKOWNER	(1 << 1)
+#define FUSE_WRITE_KILL_PRIV	(1 << 2)
 
 /**
  * Read flags
Miklos Szeredi May 20, 2019, 2:44 p.m. UTC | #2
On Mon, May 20, 2019 at 04:41:37PM +0200, Miklos Szeredi wrote:
> On Wed, May 15, 2019 at 03:26:47PM -0400, Vivek Goyal wrote:
> > If fuse daemon is started with cache=never, fuse falls back to direct IO.
> > In that write path we don't call file_remove_privs() and that means setuid
> > bit is not cleared if unpriviliged user writes to a file with setuid bit set.
> > 
> > pjdfstest chmod test 12.t tests this and fails.
> 
> I think better sulution is to tell the server if the suid bit needs to be
> removed, so it can do so in a race free way.
> 
> Here's the kernel patch, and I'll reply with the libfuse patch.

Here are the patches for libfuse and passthrough_ll.
---
 include/fuse_common.h |    5 ++++-
 include/fuse_kernel.h |    2 ++
 lib/fuse_lowlevel.c   |   12 ++++++++----
 3 files changed, 14 insertions(+), 5 deletions(-)

--- a/include/fuse_common.h
+++ b/include/fuse_common.h
@@ -64,8 +64,11 @@ struct fuse_file_info {
 	   May only be set in ->release(). */
 	unsigned int flock_release : 1;
 
+	/* Kill suid and sgid bits on write */
+	unsigned int write_kill_priv : 1;
+
 	/** Padding.  Do not use*/
-	unsigned int padding : 27;
+	unsigned int padding : 26;
 
 	/** File handle.  May be filled in by filesystem in open().
 	    Available in all other file operations */
--- a/include/fuse_kernel.h
+++ b/include/fuse_kernel.h
@@ -304,9 +304,11 @@ struct fuse_file_lock {
  *
  * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
  * FUSE_WRITE_LOCKOWNER: lock_owner field is valid
+ * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits
  */
 #define FUSE_WRITE_CACHE	(1 << 0)
 #define FUSE_WRITE_LOCKOWNER	(1 << 1)
+#define FUSE_WRITE_KILL_PRIV	(1 << 2)
 
 /**
  * Read flags
--- a/lib/fuse_lowlevel.c
+++ b/lib/fuse_lowlevel.c
@@ -1315,12 +1315,14 @@ static void do_write(fuse_req_t req, fus
 
 	memset(&fi, 0, sizeof(fi));
 	fi.fh = arg->fh;
-	fi.writepage = (arg->write_flags & 1) != 0;
+	fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0;
+	fi.write_kill_priv = (arg->write_flags & FUSE_WRITE_KILL_PRIV) != 0;
 
 	if (req->se->conn.proto_minor < 9) {
 		param = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE;
 	} else {
-		fi.lock_owner = arg->lock_owner;
+		if (arg->write_flags & FUSE_WRITE_LOCKOWNER)
+			fi.lock_owner = arg->lock_owner;
 		fi.flags = arg->flags;
 		param = PARAM(arg);
 	}
@@ -1345,7 +1347,8 @@ static void do_write_buf(fuse_req_t req,
 
 	memset(&fi, 0, sizeof(fi));
 	fi.fh = arg->fh;
-	fi.writepage = arg->write_flags & 1;
+	fi.writepage = (arg->write_flags & FUSE_WRITE_CACHE) != 0;
+	fi.write_kill_priv = (arg->write_flags & FUSE_WRITE_KILL_PRIV) != 0;
 
 	if (se->conn.proto_minor < 9) {
 		bufv.buf[0].mem = ((char *) arg) + FUSE_COMPAT_WRITE_IN_SIZE;
@@ -1353,7 +1356,8 @@ static void do_write_buf(fuse_req_t req,
 			FUSE_COMPAT_WRITE_IN_SIZE;
 		assert(!(bufv.buf[0].flags & FUSE_BUF_IS_FD));
 	} else {
-		fi.lock_owner = arg->lock_owner;
+		if (arg->write_flags & FUSE_WRITE_LOCKOWNER)
+			fi.lock_owner = arg->lock_owner;
 		fi.flags = arg->flags;
 		if (!(bufv.buf[0].flags & FUSE_BUF_IS_FD))
 			bufv.buf[0].mem = PARAM(arg);
---
 example/passthrough_ll.c |   29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

--- a/example/passthrough_ll.c
+++ b/example/passthrough_ll.c
@@ -56,6 +56,7 @@
 #include <sys/file.h>
 #include <sys/xattr.h>
 #include <sys/syscall.h>
+#include <sys/capability.h>
 
 /* We are re-using pointers to our `struct lo_inode` and `struct
    lo_dirp` elements as inodes. This means that we must be able to
@@ -965,6 +966,11 @@ static void lo_write_buf(fuse_req_t req,
 	(void) ino;
 	ssize_t res;
 	struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
+	struct __user_cap_header_struct cap_hdr = {
+		.version = _LINUX_CAPABILITY_VERSION_1,
+	};
+	struct __user_cap_data_struct cap_orig;
+	struct __user_cap_data_struct cap_new;
 
 	out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
 	out_buf.buf[0].fd = fi->fh;
@@ -974,7 +980,28 @@ static void lo_write_buf(fuse_req_t req,
 		fprintf(stderr, "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n",
 			ino, out_buf.buf[0].size, (unsigned long) off);
 
+	if (fi->write_kill_priv) {
+		res = capget(&cap_hdr, &cap_orig);
+		if (res == -1) {
+			fuse_reply_err(req, errno);
+			return;
+		}
+		cap_new = cap_orig;
+		cap_new.effective &= ~(1 << CAP_FSETID);
+		res = capset(&cap_hdr, &cap_new);
+		if (res == -1) {
+			fuse_reply_err(req, errno);
+			return;
+		}
+	}
+
 	res = fuse_buf_copy(&out_buf, in_buf, 0);
+
+	if (fi->write_kill_priv) {
+		if (capset(&cap_hdr, &cap_orig) != 0)
+			abort();
+	}
+
 	if(res < 0)
 		fuse_reply_err(req, -res);
 	else
@@ -1215,7 +1242,7 @@ static void lo_copy_file_range(fuse_req_
 	res = copy_file_range(fi_in->fh, &off_in, fi_out->fh, &off_out, len,
 			      flags);
 	if (res < 0)
-		fuse_reply_err(req, -errno);
+		fuse_reply_err(req, errno);
 	else
 		fuse_reply_write(req, res);
 }
Nikolaus Rath May 20, 2019, 8:25 p.m. UTC | #3
On May 20 2019, Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Mon, May 20, 2019 at 04:41:37PM +0200, Miklos Szeredi wrote:
>> On Wed, May 15, 2019 at 03:26:47PM -0400, Vivek Goyal wrote:
>> > If fuse daemon is started with cache=never, fuse falls back to direct IO.
>> > In that write path we don't call file_remove_privs() and that means setuid
>> > bit is not cleared if unpriviliged user writes to a file with setuid bit set.
>> > 
>> > pjdfstest chmod test 12.t tests this and fails.
>> 
>> I think better sulution is to tell the server if the suid bit needs to be
>> removed, so it can do so in a race free way.
>> 
>> Here's the kernel patch, and I'll reply with the libfuse patch.
>
> Here are the patches for libfuse and passthrough_ll.

Could you also submit them as pull requests at https://github.com/libfuse/libfuse/pulls?

Best,
-Nikolaus
Vivek Goyal May 21, 2019, 3:01 p.m. UTC | #4
On Mon, May 20, 2019 at 04:41:37PM +0200, Miklos Szeredi wrote:
> On Wed, May 15, 2019 at 03:26:47PM -0400, Vivek Goyal wrote:
> > If fuse daemon is started with cache=never, fuse falls back to direct IO.
> > In that write path we don't call file_remove_privs() and that means setuid
> > bit is not cleared if unpriviliged user writes to a file with setuid bit set.
> > 
> > pjdfstest chmod test 12.t tests this and fails.
> 
> I think better sulution is to tell the server if the suid bit needs to be
> removed, so it can do so in a race free way.
> 
> Here's the kernel patch, and I'll reply with the libfuse patch.

Hi Miklos,

I tested and it works for me.

Vivek

> 
> ---
>  fs/fuse2/file.c           |    2 ++
>  include/uapi/linux/fuse.h |    3 +++
>  2 files changed, 5 insertions(+)
> 
> --- a/fs/fuse2/file.c
> +++ b/fs/fuse2/file.c
> @@ -363,6 +363,8 @@ static ssize_t fuse_send_write(struct fu
>  		inarg->flags |= O_DSYNC;
>  	if (iocb->ki_flags & IOCB_SYNC)
>  		inarg->flags |= O_SYNC;
> +	if (!capable(CAP_FSETID))
> +		inarg->write_flags |= FUSE_WRITE_KILL_PRIV;
>  	req->inh.opcode = FUSE_WRITE;
>  	req->inh.nodeid = ff->nodeid;
>  	req->inh.len = req->inline_inlen + count;
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -125,6 +125,7 @@
>   *
>   *  7.29
>   *  - add FUSE_NO_OPENDIR_SUPPORT flag
> + *  - add FUSE_WRITE_KILL_PRIV flag
>   */
>  
>  #ifndef _LINUX_FUSE_H
> @@ -318,9 +319,11 @@ struct fuse_file_lock {
>   *
>   * FUSE_WRITE_CACHE: delayed write from page cache, file handle is guessed
>   * FUSE_WRITE_LOCKOWNER: lock_owner field is valid
> + * FUSE_WRITE_KILL_PRIV: kill suid and sgid bits
>   */
>  #define FUSE_WRITE_CACHE	(1 << 0)
>  #define FUSE_WRITE_LOCKOWNER	(1 << 1)
> +#define FUSE_WRITE_KILL_PRIV	(1 << 2)
>  
>  /**
>   * Read flags
> 
>
diff mbox series

Patch

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 06096b60f1df..5baf07fd2876 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1456,14 +1456,18 @@  static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	/* Don't allow parallel writes to the same file */
 	inode_lock(inode);
 	res = generic_write_checks(iocb, from);
-	if (res > 0) {
-		if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
-			res = fuse_direct_IO(iocb, from);
-		} else {
-			res = fuse_direct_io(&io, from, &iocb->ki_pos,
-					     FUSE_DIO_WRITE);
-		}
+	if (res <= 0)
+		goto out;
+
+	res = file_remove_privs(iocb->ki_filp);
+	if (res)
+		goto out;
+	if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
+		res = fuse_direct_IO(iocb, from);
+	} else {
+		res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
 	}
+out:
 	fuse_invalidate_attr(inode);
 	if (res > 0)
 		fuse_write_update_size(inode, iocb->ki_pos);