@@ -406,3 +406,4 @@
392 i386 fspick sys_fspick __ia32_sys_fspick
393 i386 fsinfo sys_fsinfo __ia32_sys_fsinfo
394 i386 mount_notify sys_mount_notify __ia32_sys_mount_notify
+395 i386 sb_notify sys_sb_notify __ia32_sys_sb_notify
@@ -351,6 +351,7 @@
340 common fspick __x64_sys_fspick
341 common fsinfo __x64_sys_fsinfo
342 common mount_notify __x64_sys_mount_notify
+343 common sb_notify __x64_sys_sb_notify
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -116,6 +116,18 @@ config MOUNT_NOTIFICATIONS
device to handle the notification buffer and provides the
mount_notify() system call to enable/disable watchpoints.
+config SB_NOTIFICATIONS
+ bool "Superblock event notifications"
+ select WATCH_QUEUE
+ help
+ This option provides support for receiving superblock event
+ notifications. This makes use of the /dev/watch_queue misc device to
+ handle the notification buffer and provides the sb_notify() system
+ call to enable/disable watches.
+
+ Events can include things like changing between R/W and R/O, EIO
+ generation, ENOSPC generation and EDQUOT generation.
+
source "fs/quota/Kconfig"
source "fs/autofs/Kconfig"
@@ -37,6 +37,8 @@
#include <linux/user_namespace.h>
#include <uapi/linux/mount.h>
#include <linux/fs_context.h>
+#include <linux/syscalls.h>
+#include <linux/namei.h>
#include "internal.h"
static int thaw_super_locked(struct super_block *sb);
@@ -320,6 +322,10 @@ void deactivate_locked_super(struct super_block *s)
{
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
+#ifdef CONFIG_SB_NOTIFICATIONS
+ if (s->s_watchers)
+ remove_watch_list(s->s_watchers);
+#endif
cleancache_invalidate_fs(s);
unregister_shrinker(&s->s_shrink);
fs->kill_sb(s);
@@ -997,6 +1003,8 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data,
/* Needs to be ordered wrt mnt_is_readonly() */
smp_wmb();
sb->s_readonly_remount = 0;
+ notify_sb(sb, notify_superblock_readonly,
+ remount_ro ? WATCH_INFO_FLAG_0 : 0);
/*
* Some filesystems modify their metadata via some other path than the
@@ -1810,3 +1818,111 @@ int vfs_get_tree(struct fs_context *fc)
return ret;
}
EXPORT_SYMBOL(vfs_get_tree);
+
+#ifdef CONFIG_SB_NOTIFICATIONS
+/*
+ * Post superblock notifications.
+ */
+void post_sb_notification(struct super_block *s, struct superblock_notification *n)
+{
+ post_watch_notification(s->s_watchers, &n->watch, s->s_watch_id);
+}
+
+static void release_sb_watch(struct watch_list *wlist, struct watch *watch)
+{
+ struct super_block *s = watch->private;
+
+ put_super(s);
+}
+
+/**
+ * sys_sb_notify - Watch for superblock events.
+ * @dfd: Base directory to pathwalk from or fd referring to superblock.
+ * @filename: Path to superblock to place the watch upon
+ * @at_flags: Pathwalk control flags
+ * @watch_fd: The watch queue to send notifications to.
+ * @watch_id: The watch ID to be placed in the notification (-1 to remove watch)
+ */
+SYSCALL_DEFINE5(sb_notify,
+ int, dfd,
+ const char __user *, filename,
+ unsigned int, at_flags,
+ int, watch_fd,
+ int, watch_id)
+{
+ struct watch_queue *wqueue;
+ struct super_block *s;
+ struct watch_list *wlist = NULL;
+ struct watch *watch;
+ struct path path;
+ int ret;
+
+ if (watch_id < -1 || watch_id > 0xff)
+ return -EINVAL;
+
+ ret = user_path_at(dfd, filename, at_flags, &path);
+ if (ret)
+ return ret;
+
+ wqueue = get_watch_queue(watch_fd);
+ if (IS_ERR(wqueue))
+ goto err_path;
+
+ s = path.dentry->d_sb;
+ if (watch_id >= 0) {
+ if (!s->s_watchers) {
+ wlist = kzalloc(sizeof(*wlist), GFP_KERNEL);
+ if (!wlist)
+ goto err_wqueue;
+ INIT_HLIST_HEAD(&wlist->watchers);
+ spin_lock_init(&wlist->lock);
+ wlist->release_watch = release_sb_watch;
+ }
+
+ watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+ if (!watch)
+ goto err_wlist;
+
+ init_watch(watch);
+ watch->id = s->s_watch_id;
+ watch->queue = wqueue;
+ watch->private = s;
+ watch->info_id = (u32)watch_id << 24;
+
+ down_write(&s->s_umount);
+ ret = -EIO;
+ if (atomic_read(&s->s_active)) {
+ if (!s->s_watchers) {
+ s->s_watchers = wlist;
+ wlist = NULL;
+ }
+
+ watch->watch_list = s->s_watchers;
+ ret = add_watch_to_object(watch);
+ if (ret == 0) {
+ spin_lock(&sb_lock);
+ s->s_count++;
+ spin_unlock(&sb_lock);
+ }
+ }
+ up_write(&s->s_umount);
+ if (ret < 0)
+ kfree(watch);
+ } else if (s->s_watchers) {
+ down_write(&s->s_umount);
+ ret = remove_watch_from_object(s->s_watchers, wqueue,
+ s->s_watch_id, false);
+ up_write(&s->s_umount);
+ } else {
+ ret = -EBADSLT;
+ }
+
+err_wlist:
+ kfree(wlist);
+err_wqueue:
+ put_watch_queue(wqueue);
+err_path:
+ path_put(&path);
+ return ret;
+}
+#endif
@@ -37,6 +37,7 @@
#include <linux/uuid.h>
#include <linux/errseq.h>
#include <linux/ioprio.h>
+#include <linux/watch_queue.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -1463,6 +1464,12 @@ struct super_block {
spinlock_t s_inode_wblist_lock;
struct list_head s_inodes_wb; /* writeback inodes */
+
+ /* Superblock event notifications */
+#ifdef CONFIG_SB_NOTIFICATIONS
+ struct watch_list *s_watchers;
+ u64 s_watch_id;
+#endif
} __randomize_layout;
/* Helper functions so that in most cases filesystems will
@@ -3458,4 +3465,74 @@ static inline bool dir_relax_shared(struct inode *inode)
extern bool path_noexec(const struct path *path);
extern void inode_nohighmem(struct inode *inode);
+extern void post_sb_notification(struct super_block *, struct superblock_notification *);
+
+/**
+ * notify_sb: Post simple superblock notification.
+ * @s: The superblock the notification is about.
+ * @subtype: The type of notification.
+ */
+static inline void notify_sb(struct super_block *s,
+ enum superblock_notification_type subtype,
+ u32 info)
+{
+#ifdef CONFIG_SB_NOTIFICATIONS
+ if (unlikely(s->s_watchers)) {
+ struct superblock_notification n = {
+ .watch.type = WATCH_TYPE_SB_NOTIFY,
+ .watch.subtype = subtype,
+ .watch.info = sizeof(n) | info,
+ .sb_id = s->s_watch_id,
+ };
+
+ post_sb_notification(s, &n);
+ }
+
+#endif
+}
+
+/**
+ * sb_error: Post superblock error notification.
+ * @s: The superblock the notification is about.
+ */
+static inline int sb_error(struct super_block *s, int error)
+{
+#ifdef CONFIG_SB_NOTIFICATIONS
+ if (unlikely(s->s_watchers)) {
+ struct superblock_error_notification n = {
+ .s.watch.type = WATCH_TYPE_SB_NOTIFY,
+ .s.watch.subtype = notify_superblock_error,
+ .s.watch.info = sizeof(n),
+ .s.sb_id = s->s_watch_id,
+ .error_number = error,
+ .error_cookie = 0,
+ };
+
+ post_sb_notification(s, &n.s);
+ }
+#endif
+ return error;
+}
+
+/**
+ * sb_EDQUOT: Post superblock quota overrun notification.
+ * @s: The superblock the notification is about.
+ */
+static inline int sb_EQDUOT(struct super_block *s)
+{
+#ifdef CONFIG_SB_NOTIFICATIONS
+ if (unlikely(s->s_watchers)) {
+ struct superblock_notification n = {
+ .watch.type = WATCH_TYPE_SB_NOTIFY,
+ .watch.subtype = notify_superblock_edquot,
+ .watch.info = sizeof(n),
+ .sb_id = s->s_watch_id,
+ };
+
+ post_sb_notification(s, &n);
+ }
+#endif
+ return -EDQUOT;
+}
+
#endif /* _LINUX_FS_H */
@@ -915,6 +915,8 @@ asmlinkage long sys_fsinfo(int dfd, const char __user *path,
void __user *buffer, size_t buf_size);
asmlinkage long sys_mount_notify(int dfd, const char __user *path,
unsigned int at_flags, int watch_fd, int watch_id);
+asmlinkage long sys_sb_notify(int dfd, const char __user *path,
+ unsigned int at_flags, int watch_fd, int watch_id);
/*
* Architecture-specific system calls
@@ -127,4 +127,30 @@ struct mount_notification {
__u32 changed_mount; /* The mount that got changed */
};
+/*
+ * Type of superblock notification.
+ */
+enum superblock_notification_type {
+ notify_superblock_readonly = 0, /* Filesystem toggled between R/O and R/W */
+ notify_superblock_error = 1, /* Error in filesystem or blockdev */
+ notify_superblock_edquot = 2, /* EDQUOT notification */
+ notify_superblock_network = 3, /* Network status change */
+};
+
+/*
+ * Superblock notification record.
+ * - watch.type = WATCH_TYPE_MOUNT_NOTIFY
+ * - watch.subtype = enum superblock_notification_subtype
+ */
+struct superblock_notification {
+ struct watch_notification watch; /* WATCH_TYPE_SB_NOTIFY */
+ __u64 sb_id; /* 64-bit superblock ID [fsinfo_ids::f_sb_id] */
+};
+
+struct superblock_error_notification {
+ struct superblock_notification s; /* subtype = notify_superblock_error */
+ __u32 error_number;
+ __u32 error_cookie;
+};
+
#endif /* _UAPI_LINUX_WATCH_QUEUE_H */
@@ -100,6 +100,9 @@ COND_SYSCALL(quotactl);
/* fs/read_write.c */
+/* fs/sb_notify.c */
+COND_SYSCALL(sb_notify);
+
/* fs/sendfile.c */
/* fs/select.c */
Add a superblock event notification facility whereby notifications about superblock events, such as I/O errors (EIO), quota limits being hit (EDQUOT) and running out of space (ENOSPC) can be reported to a monitoring process asynchronously. Note that this does not cover vfsmount topology changes. mount_notify() is used for that. Firstly, an event queue needs to be created: fd = open("/dev/event_queue", O_RDWR); then a notification can be set up to report notifications via that queue: struct watch_notification_filter filter; memset(&filter, 0, sizeof(filter)); filter.subtype_filter[0] = ~0ULL; filter.info_id = 0x03000000; sb_notify(AT_FDCWD, "/home/dhowells", 0, fd, &filter); In this case, it would let me monitor my own homedir for events. Note that the queue can be shared between multiple notifications of various types. [*] QUESTION: Does this want to be per-sb, per-mount_namespace, per-some-new-notify-ns or per-system? Or do multiple options make sense? [*] QUESTION: I've done it this way so that anyone could theoretically monitor the superblock of any filesystem they can pathwalk to, but do we need other security controls? [*] QUESTION: Should the LSM be able to filter the events a queue can receive? For instance the opener of the queue would grant that queue subject creds (by ->f_cred) that could be used to govern what events could be seen, assuming the target superblock to have some object creds, based on, say, the mounter. Signed-off-by: David Howells <dhowells@redhat.com> --- arch/x86/entry/syscalls/syscall_32.tbl | 1 arch/x86/entry/syscalls/syscall_64.tbl | 1 fs/Kconfig | 12 +++ fs/super.c | 116 ++++++++++++++++++++++++++++++++ include/linux/fs.h | 77 +++++++++++++++++++++ include/linux/syscalls.h | 2 + include/uapi/linux/watch_queue.h | 26 +++++++ kernel/sys_ni.c | 3 + 8 files changed, 238 insertions(+) -- To unsubscribe from this list: send the line "unsubscribe linux-security-module" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html