@@ -357,6 +357,21 @@ struct fuse_file_info {
*/
#define FUSE_CAP_SUBMOUNTS (1 << 27)
+/**
+ * Indicates that the filesystem is responsible for clearing
+ * security.capability xattr and clearing setuid and setgid bits. Following
+ * are the rules.
+ * - clear "security.capability" on write, truncate and chown unconditionally
+ * - clear suid/sgid if following is true. Note, sgid is cleared only if
+ * group executable bit is set.
+ * o setattr has FATTR_SIZE and FATTR_KILL_SUIDGID set.
+ * o setattr has FATTR_UID or FATTR_GID
+ * o open has O_TRUNC and FUSE_OPEN_KILL_SUIDGID
+ * o create has O_TRUNC and FUSE_OPEN_KILL_SUIDGID flag set.
+ * o write has FUSE_WRITE_KILL_SUIDGID
+ */
+#define FUSE_CAP_HANDLE_KILLPRIV_V2 (1 << 28)
+
/**
* Ioctl flags
*
@@ -855,7 +855,7 @@ static void do_setattr(fuse_req_t req, fuse_ino_t nodeid,
FUSE_SET_ATTR_GID | FUSE_SET_ATTR_SIZE |
FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME |
FUSE_SET_ATTR_ATIME_NOW | FUSE_SET_ATTR_MTIME_NOW |
- FUSE_SET_ATTR_CTIME;
+ FUSE_SET_ATTR_CTIME | FUSE_SET_ATTR_KILL_SUIDGID;
req->se->op.setattr(req, nodeid, &stbuf, arg->valid, fi);
} else {
@@ -1069,6 +1069,7 @@ static void do_create(fuse_req_t req, fuse_ino_t nodeid,
memset(&fi, 0, sizeof(fi));
fi.flags = arg->flags;
+ fi.kill_priv = arg->open_flags & FUSE_OPEN_KILL_SUIDGID;
req->ctx.umask = arg->umask;
@@ -1092,6 +1093,7 @@ static void do_open(fuse_req_t req, fuse_ino_t nodeid,
memset(&fi, 0, sizeof(fi));
fi.flags = arg->flags;
+ fi.kill_priv = arg->open_flags & FUSE_OPEN_KILL_SUIDGID;
if (req->se->op.open) {
req->se->op.open(req, nodeid, &fi);
@@ -1983,6 +1985,9 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
if (arg->flags & FUSE_SUBMOUNTS) {
se->conn.capable |= FUSE_CAP_SUBMOUNTS;
}
+ if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
+ se->conn.capable |= FUSE_CAP_HANDLE_KILLPRIV_V2;
+ }
#ifdef HAVE_SPLICE
#ifdef HAVE_VMSPLICE
se->conn.capable |= FUSE_CAP_SPLICE_WRITE | FUSE_CAP_SPLICE_MOVE;
@@ -2114,6 +2119,10 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
outarg.congestion_threshold = se->conn.congestion_threshold;
outarg.time_gran = se->conn.time_gran;
+ if (se->conn.want & FUSE_CAP_HANDLE_KILLPRIV_V2) {
+ outarg.flags |= FUSE_HANDLE_KILLPRIV_V2;
+ }
+
fuse_log(FUSE_LOG_DEBUG, " INIT: %u.%u\n", outarg.major, outarg.minor);
fuse_log(FUSE_LOG_DEBUG, " flags=0x%08x\n", outarg.flags);
fuse_log(FUSE_LOG_DEBUG, " max_readahead=0x%08x\n", outarg.max_readahead);
@@ -143,6 +143,7 @@ struct fuse_forget_data {
#define FUSE_SET_ATTR_ATIME_NOW (1 << 7)
#define FUSE_SET_ATTR_MTIME_NOW (1 << 8)
#define FUSE_SET_ATTR_CTIME (1 << 10)
+#define FUSE_SET_ATTR_KILL_SUIDGID (1 << 11)
/*
* Request methods and replies
@@ -168,6 +168,7 @@ struct lo_data {
/* An O_PATH file descriptor to /proc/self/fd/ */
int proc_self_fd;
+ int user_killpriv_v2, killpriv_v2;
};
static const struct fuse_opt lo_opts[] = {
@@ -198,6 +199,8 @@ static const struct fuse_opt lo_opts[] = {
{ "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 },
{ "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 },
{ "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 },
+ { "killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 1 },
+ { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 },
FUSE_OPT_END
};
static bool use_syslog = false;
@@ -630,6 +633,34 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
"does not support it\n");
lo->announce_submounts = false;
}
+
+ if (lo->user_killpriv_v2 == 1) {
+ /*
+ * User explicitly asked for this option. Enable it unconditionally.
+ * If connection does not have this capability, it should fail
+ * in fuse_lowlevel.c
+ */
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n");
+ conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2;
+ lo->killpriv_v2 = 1;
+ } else if (lo->user_killpriv_v2 == -1 &&
+ conn->capable & FUSE_CAP_HANDLE_KILLPRIV_V2) {
+ /*
+ * User did not specify a value for killpriv_v2. By default enable it
+ * if connection offers this capability
+ */
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n");
+ conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2;
+ lo->killpriv_v2 = 1;
+ } else {
+ /*
+ * Either user specified to disable killpriv_v2, or connection does
+ * not offer this capability. Disable killpriv_v2 in both the cases
+ */
+ fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling killpriv_v2\n");
+ conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2;
+ lo->killpriv_v2 = 0;
+ }
}
static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
@@ -714,7 +745,10 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
}
if (valid & FUSE_SET_ATTR_SIZE) {
int truncfd;
+ bool kill_suidgid;
+ bool cap_fsetid_dropped = false;
+ kill_suidgid = lo->killpriv_v2 && (valid & FUSE_SET_ATTR_KILL_SUIDGID);
if (fi) {
truncfd = fd;
} else {
@@ -725,8 +759,25 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
}
}
+ if (kill_suidgid) {
+ res = drop_effective_cap("FSETID", &cap_fsetid_dropped);
+ if (res != 0) {
+ saverr = res;
+ if (!fi) {
+ close(truncfd);
+ }
+ goto out_err;
+ }
+ }
+
res = ftruncate(truncfd, attr->st_size);
saverr = res == -1 ? errno : 0;
+
+ if (cap_fsetid_dropped) {
+ if (gain_effective_cap("FSETID")) {
+ fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
+ }
+ }
if (!fi) {
close(truncfd);
}
@@ -1709,11 +1760,27 @@ static int lo_do_open(struct lo_data *lo, struct lo_inode *inode,
{
ssize_t fh;
int fd = existing_fd;
+ int err;
+ bool cap_fsetid_dropped = false;
+ bool kill_suidgid = lo->killpriv_v2 && fi->kill_priv;
update_open_flags(lo->writeback, lo->allow_direct_io, fi);
if (fd < 0) {
+ if (kill_suidgid) {
+ err = drop_effective_cap("FSETID", &cap_fsetid_dropped);
+ if (err) {
+ return err;
+ }
+ }
+
fd = lo_inode_open(lo, inode, fi->flags);
+
+ if (cap_fsetid_dropped) {
+ if (gain_effective_cap("FSETID")) {
+ fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
+ }
+ }
if (fd < 0) {
return -fd;
}
@@ -1747,8 +1814,8 @@ static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
int err;
struct lo_cred old = {};
- fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent,
- name);
+ fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)"
+ " kill_priv=%d\n", parent, name, fi->kill_priv);
if (!is_safe_path_component(name)) {
fuse_reply_err(req, EINVAL);
@@ -1981,8 +2048,8 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
struct lo_inode *inode = lo_inode(req, ino);
int err;
- fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
- fi->flags);
+ fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d, kill_priv=%d)"
+ "\n", ino, fi->flags, fi->kill_priv);
if (!inode) {
fuse_reply_err(req, EBADF);
@@ -2121,12 +2188,14 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
out_buf.buf[0].pos = off;
fuse_log(FUSE_LOG_DEBUG,
- "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino,
- out_buf.buf[0].size, (unsigned long)off);
+ "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu kill_priv=%d)\n",
+ ino, out_buf.buf[0].size, (unsigned long)off, fi->kill_priv);
/*
* If kill_priv is set, drop CAP_FSETID which should lead to kernel
- * clearing setuid/setgid on file.
+ * clearing setuid/setgid on file. Note, for WRITE, we need to do
+ * this even if killpriv_v2 is not enabled. fuse direct write path
+ * relies on this.
*/
if (fi->kill_priv) {
res = drop_effective_cap("FSETID", &cap_fsetid_dropped);
@@ -3534,6 +3603,7 @@ int main(int argc, char *argv[])
.posix_lock = 0,
.allow_direct_io = 0,
.proc_self_fd = -1,
+ .user_killpriv_v2 = -1,
};
struct lo_map_elem *root_elem;
struct lo_map_elem *reserve_elem;