@@ -234,7 +234,6 @@ void __destroy_inode(struct inode *inode)
inode_detach_wb(inode);
security_inode_free(inode);
fsnotify_inode_delete(inode);
- fsnotify_connector_free(&inode->i_fsnotify_marks);
locks_free_lock_context(inode);
if (!inode->i_nlink) {
WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
@@ -59,7 +59,7 @@ struct mount {
struct mountpoint *mnt_mp; /* where is it mounted */
struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
#ifdef CONFIG_FSNOTIFY
- struct fsnotify_mark_connector *mnt_fsnotify_marks;
+ struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
#endif
int mnt_id; /* mount identifier */
@@ -1108,7 +1108,6 @@ static void cleanup_mnt(struct mount *mnt)
if (unlikely(mnt->mnt_pins.first))
mnt_pin_kill(mnt);
fsnotify_vfsmount_delete(&mnt->mnt);
- fsnotify_connector_free(&mnt->mnt_fsnotify_marks);
dput(mnt->mnt.mnt_root);
deactivate_super(mnt->mnt.mnt_sb);
mnt_free_id(mnt);
@@ -228,7 +228,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
if ((mask & FS_MODIFY) ||
(test_mask & to_tell->i_fsnotify_mask)) {
- inode_conn = lockless_dereference(to_tell->i_fsnotify_marks);
+ inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
+ &fsnotify_mark_srcu);
if (inode_conn)
inode_node = srcu_dereference(inode_conn->list.first,
&fsnotify_mark_srcu);
@@ -236,11 +237,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
if (mnt && ((mask & FS_MODIFY) ||
(test_mask & mnt->mnt_fsnotify_mask))) {
- inode_conn = lockless_dereference(to_tell->i_fsnotify_marks);
+ inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
+ &fsnotify_mark_srcu);
if (inode_conn)
inode_node = srcu_dereference(inode_conn->list.first,
&fsnotify_mark_srcu);
- vfsmount_conn = lockless_dereference(mnt->mnt_fsnotify_marks);
+ vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks,
+ &fsnotify_mark_srcu);
if (vfsmount_conn)
vfsmount_node = srcu_dereference(
vfsmount_conn->list.first,
@@ -20,19 +20,19 @@ extern int fsnotify_compare_groups(struct fsnotify_group *a,
/* Find mark belonging to given group in the list of marks */
extern struct fsnotify_mark *fsnotify_find_mark(
- struct fsnotify_mark_connector *conn,
+ struct fsnotify_mark_connector **connp,
struct fsnotify_group *group);
/* Destroy all marks connected via given connector */
-extern void fsnotify_destroy_marks(struct fsnotify_mark_connector *conn);
+extern void fsnotify_destroy_marks(struct fsnotify_mark_connector **connp);
/* run the list of all marks associated with inode and destroy them */
static inline void fsnotify_clear_marks_by_inode(struct inode *inode)
{
- fsnotify_destroy_marks(inode->i_fsnotify_marks);
+ fsnotify_destroy_marks(&inode->i_fsnotify_marks);
}
/* run the list of all marks associated with vfsmount and destroy them */
static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
{
- fsnotify_destroy_marks(real_mount(mnt)->mnt_fsnotify_marks);
+ fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks);
}
/* prepare for freeing all marks associated with given group */
extern void fsnotify_detach_group_marks(struct fsnotify_group *group);
@@ -50,7 +50,7 @@ void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
struct inode *inode)
{
- return fsnotify_find_mark(inode->i_fsnotify_marks, group);
+ return fsnotify_find_mark(&inode->i_fsnotify_marks, group);
}
/**
@@ -89,10 +89,14 @@ struct kmem_cache *fsnotify_mark_connector_cachep;
static DEFINE_SPINLOCK(destroy_lock);
static LIST_HEAD(destroy_list);
+static struct fsnotify_mark_connector *connector_destroy_list;
static void fsnotify_mark_destroy_workfn(struct work_struct *work);
static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn);
+static void fsnotify_connector_destroy_workfn(struct work_struct *work);
+static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn);
+
void fsnotify_get_mark(struct fsnotify_mark *mark)
{
atomic_inc(&mark->refcnt);
@@ -146,23 +150,63 @@ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
}
}
+/* Free all connectors queued for freeing once SRCU period ends */
+static void fsnotify_connector_destroy_workfn(struct work_struct *work)
+{
+ struct fsnotify_mark_connector *conn, *free;
+
+ spin_lock(&destroy_lock);
+ conn = connector_destroy_list;
+ connector_destroy_list = NULL;
+ spin_unlock(&destroy_lock);
+
+ synchronize_srcu(&fsnotify_mark_srcu);
+ while (conn) {
+ free = conn;
+ conn = conn->destroy_next;
+ kmem_cache_free(fsnotify_mark_connector_cachep, free);
+ }
+}
+
static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark)
{
struct fsnotify_mark_connector *conn;
struct inode *inode = NULL;
+ bool free_conn = false;
conn = mark->connector;
spin_lock(&conn->lock);
hlist_del_init_rcu(&mark->obj_list);
if (hlist_empty(&conn->list)) {
- if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE)
+ if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) {
inode = conn->inode;
+ rcu_assign_pointer(inode->i_fsnotify_marks, NULL);
+ inode->i_fsnotify_mask = 0;
+ conn->inode = NULL;
+ conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE;
+ } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
+ rcu_assign_pointer(
+ real_mount(conn->mnt)->mnt_fsnotify_marks,
+ NULL);
+ real_mount(conn->mnt)->mnt_fsnotify_mask = 0;
+ conn->mnt = NULL;
+ conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT;
+ }
+ free_conn = true;
} else {
__fsnotify_recalc_mask(conn);
}
mark->connector = NULL;
spin_unlock(&conn->lock);
+ if (free_conn) {
+ spin_lock(&destroy_lock);
+ conn->destroy_next = connector_destroy_list;
+ connector_destroy_list = conn;
+ spin_unlock(&destroy_lock);
+ queue_work(system_unbound_wq, &connector_reaper_work);
+ }
+
return inode;
}
@@ -267,14 +311,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
fsnotify_free_mark(mark);
}
-void fsnotify_connector_free(struct fsnotify_mark_connector **connp)
-{
- if (*connp) {
- kmem_cache_free(fsnotify_mark_connector_cachep, *connp);
- *connp = NULL;
- }
-}
-
void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
{
assert_spin_locked(&mark->lock);
@@ -326,6 +362,34 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
}
/*
+ * Get mark connector, make sure it is alive and return with its lock held.
+ * This is for users that get connector pointer from inode or mount. Users that
+ * hold reference to a mark on the list may directly lock connector->lock as
+ * they are sure list cannot go away under them.
+ */
+static struct fsnotify_mark_connector *fsnotify_grab_connector(
+ struct fsnotify_mark_connector * __rcu *connp)
+{
+ struct fsnotify_mark_connector *conn;
+ int idx;
+
+ idx = srcu_read_lock(&fsnotify_mark_srcu);
+ conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
+ if (!conn)
+ goto out;
+ spin_lock(&conn->lock);
+ if (!(conn->flags & (FSNOTIFY_OBJ_TYPE_INODE |
+ FSNOTIFY_OBJ_TYPE_VFSMOUNT))) {
+ spin_unlock(&conn->lock);
+ srcu_read_unlock(&fsnotify_mark_srcu, idx);
+ return NULL;
+ }
+out:
+ srcu_read_unlock(&fsnotify_mark_srcu, idx);
+ return conn;
+}
+
+/*
* Add mark into proper place in given list of marks. These marks may be used
* for the fsnotify backend to determine which event types should be delivered
* to which group and for which inodes. These marks are ordered according to
@@ -347,8 +411,11 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
connp = &inode->i_fsnotify_marks;
else
connp = &real_mount(mnt)->mnt_fsnotify_marks;
-
- if (!*connp) {
+restart:
+ spin_lock(&mark->lock);
+ conn = fsnotify_grab_connector(connp);
+ if (!conn) {
+ spin_unlock(&mark->lock);
conn = kmem_cache_alloc(fsnotify_mark_connector_cachep,
GFP_KERNEL);
if (!conn)
@@ -357,7 +424,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
INIT_HLIST_HEAD(&conn->list);
if (inode) {
conn->flags = FSNOTIFY_OBJ_TYPE_INODE;
- conn->inode = inode;
+ conn->inode = igrab(inode);
} else {
conn->flags = FSNOTIFY_OBJ_TYPE_VFSMOUNT;
conn->mnt = mnt;
@@ -368,18 +435,16 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
*/
if (cmpxchg(connp, NULL, conn)) {
/* Someone else created list structure for us */
+ if (inode)
+ iput(inode);
kmem_cache_free(fsnotify_mark_connector_cachep, conn);
}
+ goto restart;
}
- conn = *connp;
- spin_lock(&mark->lock);
- spin_lock(&conn->lock);
/* is mark the first mark? */
if (hlist_empty(&conn->list)) {
hlist_add_head_rcu(&mark->obj_list, &conn->list);
- if (inode)
- igrab(inode);
goto added;
}
@@ -481,15 +546,16 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
* Given a list of marks, find the mark associated with given group. If found
* take a reference to that mark and return it, else return NULL.
*/
-struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_mark_connector *conn,
+struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_mark_connector **connp,
struct fsnotify_group *group)
{
+ struct fsnotify_mark_connector *conn;
struct fsnotify_mark *mark;
+ conn = fsnotify_grab_connector(connp);
if (!conn)
return NULL;
- spin_lock(&conn->lock);
hlist_for_each_entry(mark, &conn->list, obj_list) {
if (mark->group == group) {
fsnotify_get_mark(mark);
@@ -567,26 +633,20 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group)
}
}
-void fsnotify_destroy_marks(struct fsnotify_mark_connector *conn)
+/* Destroy all marks attached to inode / vfsmount */
+void fsnotify_destroy_marks(struct fsnotify_mark_connector **connp)
{
+ struct fsnotify_mark_connector *conn;
struct fsnotify_mark *mark;
- if (!conn)
- return;
-
- while (1) {
+ while ((conn = fsnotify_grab_connector(connp))) {
/*
* We have to be careful since we can race with e.g.
- * fsnotify_clear_marks_by_group() and once we drop 'lock',
- * mark can get removed from the obj_list and destroyed. But
- * we are holding mark reference so mark cannot be freed and
- * calling fsnotify_destroy_mark() more than once is fine.
+ * fsnotify_clear_marks_by_group() and once we drop the list
+ * lock, mark can get removed from the obj_list and destroyed.
+ * But we are holding mark reference so mark cannot be freed
+ * and calling fsnotify_destroy_mark() more than once is fine.
*/
- spin_lock(&conn->lock);
- if (hlist_empty(&conn->list)) {
- spin_unlock(&conn->lock);
- break;
- }
mark = hlist_entry(conn->list.first, struct fsnotify_mark,
obj_list);
fsnotify_get_mark(mark);
@@ -48,5 +48,5 @@ struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group,
{
struct mount *m = real_mount(mnt);
- return fsnotify_find_mark(m->mnt_fsnotify_marks, group);
+ return fsnotify_find_mark(&m->mnt_fsnotify_marks, group);
}
@@ -647,7 +647,7 @@ struct inode {
#ifdef CONFIG_FSNOTIFY
__u32 i_fsnotify_mask; /* all events this inode cares about */
- struct fsnotify_mark_connector *i_fsnotify_marks;
+ struct fsnotify_mark_connector __rcu *i_fsnotify_marks;
#endif
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
@@ -197,8 +197,8 @@ struct fsnotify_group {
/*
* Inode / vfsmount point to this structure which tracks all marks attached to
* the inode / vfsmount. The reference to inode / vfsmount is held by this
- * structure whenever the list is non-empty. The structure is freed only when
- * inode / vfsmount gets freed.
+ * structure. We destroy this structure when there are no more marks attached
+ * to it. The structure is protected by fsnotify_mark_srcu.
*/
struct fsnotify_mark_connector {
spinlock_t lock;
@@ -209,7 +209,11 @@ struct fsnotify_mark_connector {
struct inode *inode;
struct vfsmount *mnt;
};
- struct hlist_head list;
+ union {
+ struct hlist_head list;
+ /* Used listing heads to free after srcu period expires */
+ struct fsnotify_mark_connector *destroy_next;
+ };
};
/*
@@ -361,7 +365,6 @@ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group);
/* run all the marks in a group, and clear all of the marks attached to given object type */
extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags);
-extern void fsnotify_connector_free(struct fsnotify_mark_connector **connp);
extern void fsnotify_get_mark(struct fsnotify_mark *mark);
extern void fsnotify_put_mark(struct fsnotify_mark *mark);
extern void fsnotify_unmount_inodes(struct super_block *sb);
@@ -1597,8 +1597,7 @@ static inline void handle_one(const struct inode *inode)
struct audit_tree_refs *p;
struct audit_chunk *chunk;
int count;
- if (likely(!inode->i_fsnotify_marks ||
- hlist_empty(&inode->i_fsnotify_marks->list)))
+ if (likely(!inode->i_fsnotify_marks))
return;
context = current->audit_context;
p = context->trees;
@@ -1641,8 +1640,7 @@ static void handle_path(const struct dentry *dentry)
seq = read_seqbegin(&rename_lock);
for(;;) {
struct inode *inode = d_backing_inode(d);
- if (inode && unlikely(inode->i_fsnotify_marks &&
- !hlist_empty(&inode->i_fsnotify_marks->list))) {
+ if (inode && unlikely(inode->i_fsnotify_marks)) {
struct audit_chunk *chunk;
chunk = audit_tree_lookup(inode);
if (chunk) {