diff mbox series

[3/5] fsnotify: allow adding an inode mark without pinning inode

Message ID 20220307155741.1352405-4-amir73il@gmail.com (mailing list archive)
State New, archived
Headers show
Series Volatile fanotify marks | expand

Commit Message

Amir Goldstein March 7, 2022, 3:57 p.m. UTC
fsnotify_add_mark() and variants implicitly take a reference on inode
when attaching a mark to an inode.

Make that behavior opt-out with the flag FSNOTIFY_ADD_MARK_NO_IREF.

Instead of taking the inode reference when attaching connector to inode
and dropping the inode reference when detaching connector from inode,
take the inode reference on attach of the first mark that wants to hold
an inode reference and drop the inode reference on detach of the last
mark that wants to hold an inode reference.

This leaves the choice to the backend whether or not to pin the inode
when adding an inode mark.

This is intended to be used when adding a mark with ignored mask that is
used for optimization in cases where group can afford getting unneeded
events and reinstate the mark with ignored mask when inode is accessed
again after being evicted.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/notify/mark.c                 | 70 ++++++++++++++++++++++++++++----
 include/linux/fsnotify_backend.h |  3 ++
 2 files changed, 65 insertions(+), 8 deletions(-)

Comments

Jan Kara March 17, 2022, 3:27 p.m. UTC | #1
On Mon 07-03-22 17:57:39, Amir Goldstein wrote:
> fsnotify_add_mark() and variants implicitly take a reference on inode
> when attaching a mark to an inode.
> 
> Make that behavior opt-out with the flag FSNOTIFY_ADD_MARK_NO_IREF.
> 
> Instead of taking the inode reference when attaching connector to inode
> and dropping the inode reference when detaching connector from inode,
> take the inode reference on attach of the first mark that wants to hold
> an inode reference and drop the inode reference on detach of the last
> mark that wants to hold an inode reference.
> 
> This leaves the choice to the backend whether or not to pin the inode
> when adding an inode mark.
> 
> This is intended to be used when adding a mark with ignored mask that is
> used for optimization in cases where group can afford getting unneeded
> events and reinstate the mark with ignored mask when inode is accessed
> again after being evicted.
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>

Couple of notes below.

> diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> index 190df435919f..f71b6814bfa7 100644
> --- a/fs/notify/mark.c
> +++ b/fs/notify/mark.c
> @@ -213,6 +213,17 @@ static void *fsnotify_detach_connector_from_object(
>  	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
>  		inode = fsnotify_conn_inode(conn);
>  		inode->i_fsnotify_mask = 0;
> +
> +		pr_debug("%s: inode=%p iref=%u sb_connectors=%lu icount=%u\n",
> +			 __func__, inode, atomic_read(&conn->proxy_iref),
> +			 atomic_long_read(&inode->i_sb->s_fsnotify_connectors),
> +			 atomic_read(&inode->i_count));

Are these pr_debug() prints that useful? My experience is they get rarely used
after the code is debugged... If you think some places are useful longer
term, tracepoints are probably easier to use these days?

> +
> +		/* Unpin inode when detaching from connector */
> +		if (atomic_read(&conn->proxy_iref))
> +			atomic_set(&conn->proxy_iref, 0);
> +		else
> +			inode = NULL;

proxy_iref is always manipulated under conn->lock so there's no need for
atomic operations here.

>  	} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
>  		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
>  	} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
> @@ -240,12 +251,43 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
>  /* Drop object reference originally held by a connector */
>  static void fsnotify_drop_object(unsigned int type, void *objp)
>  {
> +	struct inode *inode = objp;
> +
>  	if (!objp)
>  		return;
>  	/* Currently only inode references are passed to be dropped */
>  	if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
>  		return;
> -	fsnotify_put_inode_ref(objp);
> +
> +	pr_debug("%s: inode=%p sb_connectors=%lu, icount=%u\n", __func__,
> +		 inode, atomic_long_read(&inode->i_sb->s_fsnotify_connectors),
> +		 atomic_read(&inode->i_count));
> +
> +	fsnotify_put_inode_ref(inode);
> +}
> +
> +/* Drop the proxy refcount on inode maintainted by connector */
> +static struct inode *fsnotify_drop_iref(struct fsnotify_mark_connector *conn,
> +					unsigned int *type)
> +{
> +	struct inode *inode = fsnotify_conn_inode(conn);
> +
> +	if (WARN_ON_ONCE(!inode || conn->type != FSNOTIFY_OBJ_TYPE_INODE))
> +		return NULL;
> +
> +	pr_debug("%s: inode=%p iref=%u sb_connectors=%lu icount=%u\n",
> +		 __func__, inode, atomic_read(&conn->proxy_iref),
> +		 atomic_long_read(&inode->i_sb->s_fsnotify_connectors),
> +		 atomic_read(&inode->i_count));
> +
> +	if (WARN_ON_ONCE(!atomic_read(&conn->proxy_iref)) ||
> +	    !atomic_dec_and_test(&conn->proxy_iref))
> +		return NULL;
> +
> +	fsnotify_put_inode_ref(inode);

You cannot call fsnotify_put_inode_ref() here because the function is
called under conn->lock and iput() can sleep... You need to play similar
game with passing inode pointer like
fsnotify_detach_connector_from_object() does.

> +	*type = FSNOTIFY_OBJ_TYPE_INODE;
> +
> +	return inode;
>  }
>  
>  void fsnotify_put_mark(struct fsnotify_mark *mark)
> @@ -275,6 +317,9 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
>  		free_conn = true;
>  	} else {
>  		__fsnotify_recalc_mask(conn);
> +		/* Unpin inode on last mark that wants inode refcount held */
> +		if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IREF)
> +			objp = fsnotify_drop_iref(conn, &type);
>  	}

This is going to be interesting. What if the connector got detached from
the inode before fsnotify_put_mark() was called? Then iref_proxy would be
already 0 and we would barf? I think
fsnotify_detach_connector_from_object() needs to drop inode reference but
leave iref_proxy alone for this to work. fsnotify_drop_iref() would then
drop inode reference only if iref_proxy reaches 0 and conn->objp != NULL...


>  	WRITE_ONCE(mark->connector, NULL);
>  	spin_unlock(&conn->lock);

								Honza
Amir Goldstein March 18, 2022, 6:23 a.m. UTC | #2
>

On Thu, Mar 17, 2022 at 5:27 PM Jan Kara <jack@suse.cz> wrote:
>
> On Mon 07-03-22 17:57:39, Amir Goldstein wrote:
> > fsnotify_add_mark() and variants implicitly take a reference on inode
> > when attaching a mark to an inode.
> >
> > Make that behavior opt-out with the flag FSNOTIFY_ADD_MARK_NO_IREF.
> >
> > Instead of taking the inode reference when attaching connector to inode
> > and dropping the inode reference when detaching connector from inode,
> > take the inode reference on attach of the first mark that wants to hold
> > an inode reference and drop the inode reference on detach of the last
> > mark that wants to hold an inode reference.
> >
> > This leaves the choice to the backend whether or not to pin the inode
> > when adding an inode mark.
> >
> > This is intended to be used when adding a mark with ignored mask that is
> > used for optimization in cases where group can afford getting unneeded
> > events and reinstate the mark with ignored mask when inode is accessed
> > again after being evicted.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>
> Couple of notes below.
>
> > diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> > index 190df435919f..f71b6814bfa7 100644
> > --- a/fs/notify/mark.c
> > +++ b/fs/notify/mark.c
> > @@ -213,6 +213,17 @@ static void *fsnotify_detach_connector_from_object(
> >       if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
> >               inode = fsnotify_conn_inode(conn);
> >               inode->i_fsnotify_mask = 0;
> > +
> > +             pr_debug("%s: inode=%p iref=%u sb_connectors=%lu icount=%u\n",
> > +                      __func__, inode, atomic_read(&conn->proxy_iref),
> > +                      atomic_long_read(&inode->i_sb->s_fsnotify_connectors),
> > +                      atomic_read(&inode->i_count));
>
> Are these pr_debug() prints that useful? My experience is they get rarely used
> after the code is debugged... If you think some places are useful longer
> term, tracepoints are probably easier to use these days?
>

Well in this case, the debug prints didn't even help me find the refcount bug
I had in the code, so not useful.

> > +
> > +             /* Unpin inode when detaching from connector */
> > +             if (atomic_read(&conn->proxy_iref))
> > +                     atomic_set(&conn->proxy_iref, 0);
> > +             else
> > +                     inode = NULL;
>
> proxy_iref is always manipulated under conn->lock so there's no need for
> atomic operations here.

Of course. Much simpler!

>
> >       } else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
> >               fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
> >       } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
> > @@ -240,12 +251,43 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
> >  /* Drop object reference originally held by a connector */
> >  static void fsnotify_drop_object(unsigned int type, void *objp)
> >  {
> > +     struct inode *inode = objp;
> > +
> >       if (!objp)
> >               return;
> >       /* Currently only inode references are passed to be dropped */
> >       if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
> >               return;
> > -     fsnotify_put_inode_ref(objp);
> > +
> > +     pr_debug("%s: inode=%p sb_connectors=%lu, icount=%u\n", __func__,
> > +              inode, atomic_long_read(&inode->i_sb->s_fsnotify_connectors),
> > +              atomic_read(&inode->i_count));
> > +
> > +     fsnotify_put_inode_ref(inode);
> > +}
> > +
> > +/* Drop the proxy refcount on inode maintainted by connector */
> > +static struct inode *fsnotify_drop_iref(struct fsnotify_mark_connector *conn,
> > +                                     unsigned int *type)
> > +{
> > +     struct inode *inode = fsnotify_conn_inode(conn);
> > +
> > +     if (WARN_ON_ONCE(!inode || conn->type != FSNOTIFY_OBJ_TYPE_INODE))
> > +             return NULL;
> > +
> > +     pr_debug("%s: inode=%p iref=%u sb_connectors=%lu icount=%u\n",
> > +              __func__, inode, atomic_read(&conn->proxy_iref),
> > +              atomic_long_read(&inode->i_sb->s_fsnotify_connectors),
> > +              atomic_read(&inode->i_count));
> > +
> > +     if (WARN_ON_ONCE(!atomic_read(&conn->proxy_iref)) ||
> > +         !atomic_dec_and_test(&conn->proxy_iref))
> > +             return NULL;
> > +
> > +     fsnotify_put_inode_ref(inode);
>
> You cannot call fsnotify_put_inode_ref() here because the function is
> called under conn->lock and iput() can sleep... You need to play similar
> game with passing inode pointer like
> fsnotify_detach_connector_from_object() does.

That was a plain bug.
That game is already being played and fsnotify_drop_object()
is responsible of iput().


BTW, I realized that incrementing/decrementing s_fsnotify_connectors
along with ihold/iput is completely useless, so I will remove the
fsnotify_{put,get}_inode_ref() helpers.

> > +     *type = FSNOTIFY_OBJ_TYPE_INODE;
> > +
> > +     return inode;
> >  }
> >
> >  void fsnotify_put_mark(struct fsnotify_mark *mark)
> > @@ -275,6 +317,9 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
> >               free_conn = true;
> >       } else {
> >               __fsnotify_recalc_mask(conn);
> > +             /* Unpin inode on last mark that wants inode refcount held */
> > +             if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IREF)
> > +                     objp = fsnotify_drop_iref(conn, &type);
> >       }
>
> This is going to be interesting. What if the connector got detached from
> the inode before fsnotify_put_mark() was called? Then iref_proxy would be
> already 0 and we would barf? I think
> fsnotify_detach_connector_from_object() needs to drop inode reference but
> leave iref_proxy alone for this to work. fsnotify_drop_iref() would then
> drop inode reference only if iref_proxy reaches 0 and conn->objp != NULL...
>

Good catch! but solution I think the is way simpler:

+               /* Unpin inode on last mark that wants inode refcount held */
+               if (conn->type == FSNOTIFY_OBJ_TYPE_INODE &&
+                   mark->flags & FSNOTIFY_MARK_FLAG_HAS_IREF)
+                       objp = fsnotify_drop_iref(conn, &type);

(iref_proxy > 0) always infers a single i_count reference, so it makes
fsnotify_detach_connector_from_object() sets iref_proxy = 0 and
conn->type = FSNOTIFY_OBJ_TYPE_DETACHED, so we should be good here.

Thanks,
Amir.
Amir Goldstein March 20, 2022, 1:06 p.m. UTC | #3
> > >  void fsnotify_put_mark(struct fsnotify_mark *mark)
> > > @@ -275,6 +317,9 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
> > >               free_conn = true;
> > >       } else {
> > >               __fsnotify_recalc_mask(conn);
> > > +             /* Unpin inode on last mark that wants inode refcount held */
> > > +             if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IREF)
> > > +                     objp = fsnotify_drop_iref(conn, &type);
> > >       }
> >
> > This is going to be interesting. What if the connector got detached from
> > the inode before fsnotify_put_mark() was called? Then iref_proxy would be
> > already 0 and we would barf? I think
> > fsnotify_detach_connector_from_object() needs to drop inode reference but
> > leave iref_proxy alone for this to work. fsnotify_drop_iref() would then
> > drop inode reference only if iref_proxy reaches 0 and conn->objp != NULL...
> >
>
> Good catch! but solution I think the is way simpler:
>
> +               /* Unpin inode on last mark that wants inode refcount held */
> +               if (conn->type == FSNOTIFY_OBJ_TYPE_INODE &&
> +                   mark->flags & FSNOTIFY_MARK_FLAG_HAS_IREF)
> +                       objp = fsnotify_drop_iref(conn, &type);
>
> (iref_proxy > 0) always infers a single i_count reference, so it makes
> fsnotify_detach_connector_from_object() sets iref_proxy = 0 and
> conn->type = FSNOTIFY_OBJ_TYPE_DETACHED, so we should be good here.
>

FWIW, I completely changed the proxy iref tracking in v2 (branch fan_evictable).
There is no iref_proxy, only FSNOTIFY_CONN_FLAG_HAS_IREF flag on the
connector which is aligned with elevated inode reference.

The "virtual iref_proxy" is recalculated in __fsnotify_recalc_mask()
which allows
for the "upgrade to pinned inode" logic, but iput() is only called on
detach of mark
or detach of object, if connector had FSNOTIFY_CONN_FLAG_HAS_IREF and
the  "virtual iref_proxy" dropped to 0.

Thanks,
Amir.
diff mbox series

Patch

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 190df435919f..f71b6814bfa7 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -213,6 +213,17 @@  static void *fsnotify_detach_connector_from_object(
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
 		inode = fsnotify_conn_inode(conn);
 		inode->i_fsnotify_mask = 0;
+
+		pr_debug("%s: inode=%p iref=%u sb_connectors=%lu icount=%u\n",
+			 __func__, inode, atomic_read(&conn->proxy_iref),
+			 atomic_long_read(&inode->i_sb->s_fsnotify_connectors),
+			 atomic_read(&inode->i_count));
+
+		/* Unpin inode when detaching from connector */
+		if (atomic_read(&conn->proxy_iref))
+			atomic_set(&conn->proxy_iref, 0);
+		else
+			inode = NULL;
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
 		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
@@ -240,12 +251,43 @@  static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
 /* Drop object reference originally held by a connector */
 static void fsnotify_drop_object(unsigned int type, void *objp)
 {
+	struct inode *inode = objp;
+
 	if (!objp)
 		return;
 	/* Currently only inode references are passed to be dropped */
 	if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
 		return;
-	fsnotify_put_inode_ref(objp);
+
+	pr_debug("%s: inode=%p sb_connectors=%lu, icount=%u\n", __func__,
+		 inode, atomic_long_read(&inode->i_sb->s_fsnotify_connectors),
+		 atomic_read(&inode->i_count));
+
+	fsnotify_put_inode_ref(inode);
+}
+
+/* Drop the proxy refcount on inode maintainted by connector */
+static struct inode *fsnotify_drop_iref(struct fsnotify_mark_connector *conn,
+					unsigned int *type)
+{
+	struct inode *inode = fsnotify_conn_inode(conn);
+
+	if (WARN_ON_ONCE(!inode || conn->type != FSNOTIFY_OBJ_TYPE_INODE))
+		return NULL;
+
+	pr_debug("%s: inode=%p iref=%u sb_connectors=%lu icount=%u\n",
+		 __func__, inode, atomic_read(&conn->proxy_iref),
+		 atomic_long_read(&inode->i_sb->s_fsnotify_connectors),
+		 atomic_read(&inode->i_count));
+
+	if (WARN_ON_ONCE(!atomic_read(&conn->proxy_iref)) ||
+	    !atomic_dec_and_test(&conn->proxy_iref))
+		return NULL;
+
+	fsnotify_put_inode_ref(inode);
+	*type = FSNOTIFY_OBJ_TYPE_INODE;
+
+	return inode;
 }
 
 void fsnotify_put_mark(struct fsnotify_mark *mark)
@@ -275,6 +317,9 @@  void fsnotify_put_mark(struct fsnotify_mark *mark)
 		free_conn = true;
 	} else {
 		__fsnotify_recalc_mask(conn);
+		/* Unpin inode on last mark that wants inode refcount held */
+		if (mark->flags & FSNOTIFY_MARK_FLAG_HAS_IREF)
+			objp = fsnotify_drop_iref(conn, &type);
 	}
 	WRITE_ONCE(mark->connector, NULL);
 	spin_unlock(&conn->lock);
@@ -499,7 +544,6 @@  static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 					       unsigned int obj_type,
 					       __kernel_fsid_t *fsid)
 {
-	struct inode *inode = NULL;
 	struct fsnotify_mark_connector *conn;
 
 	conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
@@ -507,6 +551,7 @@  static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 		return -ENOMEM;
 	spin_lock_init(&conn->lock);
 	INIT_HLIST_HEAD(&conn->list);
+	atomic_set(&conn->proxy_iref, 0);
 	conn->type = obj_type;
 	conn->obj = connp;
 	/* Cache fsid of filesystem containing the object */
@@ -517,10 +562,6 @@  static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 		conn->fsid.val[0] = conn->fsid.val[1] = 0;
 		conn->flags = 0;
 	}
-	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
-		inode = fsnotify_conn_inode(conn);
-		fsnotify_get_inode_ref(inode);
-	}
 	fsnotify_get_sb_connectors(conn);
 
 	/*
@@ -529,8 +570,6 @@  static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 	 */
 	if (cmpxchg(connp, NULL, conn)) {
 		/* Someone else created list structure for us */
-		if (inode)
-			fsnotify_put_inode_ref(inode);
 		fsnotify_put_sb_connectors(conn);
 		kmem_cache_free(fsnotify_mark_connector_cachep, conn);
 	}
@@ -649,6 +688,21 @@  static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
 	/* mark should be the last entry.  last is the current last entry */
 	hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
 added:
+	/* Pin inode on first mark that wants inode refcount held */
+	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE &&
+	    !(flags & FSNOTIFY_ADD_MARK_NO_IREF)) {
+		struct inode *inode = fsnotify_conn_inode(conn);
+
+		mark->flags |= FSNOTIFY_MARK_FLAG_HAS_IREF;
+		if (atomic_inc_return(&conn->proxy_iref) == 1)
+			fsnotify_get_inode_ref(inode);
+
+		pr_debug("%s: inode=%p iref=%u sb_connectors=%lu icount=%u\n",
+			 __func__, inode, atomic_read(&conn->proxy_iref),
+			 atomic_long_read(&inode->i_sb->s_fsnotify_connectors),
+			 atomic_read(&inode->i_count));
+	}
+
 	/*
 	 * Since connector is attached to object using cmpxchg() we are
 	 * guaranteed that connector initialization is fully visible by anyone
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 0b548518b166..de718864c5f5 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -425,6 +425,7 @@  struct fsnotify_mark_connector {
 	unsigned short type;	/* Type of object [lock] */
 #define FSNOTIFY_CONN_FLAG_HAS_FSID	0x01
 	unsigned short flags;	/* flags [lock] */
+	atomic_t proxy_iref;	/* marks that want inode reference held */
 	__kernel_fsid_t fsid;	/* fsid of filesystem containing object */
 	union {
 		/* Object pointer [lock] */
@@ -471,6 +472,7 @@  struct fsnotify_mark {
 	/* Events types to ignore [mark->lock, group->mark_mutex] */
 	__u32 ignored_mask;
 	/* Internal fsnotify flags */
+#define FSNOTIFY_MARK_FLAG_HAS_IREF		0x01
 #define FSNOTIFY_MARK_FLAG_ALIVE		0x02
 #define FSNOTIFY_MARK_FLAG_ATTACHED		0x04
 	/* Backend controlled flags */
@@ -636,6 +638,7 @@  extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn,
 
 /* attach the mark to the object */
 #define FSNOTIFY_ADD_MARK_ALLOW_DUPS	0x1
+#define FSNOTIFY_ADD_MARK_NO_IREF	0x2
 
 extern int fsnotify_add_mark(struct fsnotify_mark *mark,
 			     fsnotify_connp_t *connp, unsigned int obj_type,