diff mbox series

[RFC,v2,09/19] fuse: {uring} Add a dev_release exception for fuse-over-io-uring

Message ID 20240529-fuse-uring-for-6-9-rfc2-out-v1-9-d149476b1d65@ddn.com (mailing list archive)
State New
Headers show
Series fuse: fuse-over-io-uring | expand

Commit Message

Bernd Schubert May 29, 2024, 6 p.m. UTC
fuse-over-io-uring needs an implicit device clone, which is done per
queue to avoid hanging "umount" when daemon side is already terminated.
Reason is that fuse_dev_release() is not called when there are queued
(waiting) io_uring commands.
Solution is the implicit device clone and an exception in fuse_dev_release
for uring devices to abort the connection when only uring device
are left.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
---
 fs/fuse/dev.c         | 32 ++++++++++++++++++++++++++++++--
 fs/fuse/dev_uring_i.h | 13 +++++++++++++
 2 files changed, 43 insertions(+), 2 deletions(-)

Comments

Josef Bacik May 30, 2024, 7 p.m. UTC | #1
On Wed, May 29, 2024 at 08:00:44PM +0200, Bernd Schubert wrote:
> fuse-over-io-uring needs an implicit device clone, which is done per
> queue to avoid hanging "umount" when daemon side is already terminated.
> Reason is that fuse_dev_release() is not called when there are queued
> (waiting) io_uring commands.
> Solution is the implicit device clone and an exception in fuse_dev_release
> for uring devices to abort the connection when only uring device
> are left.
> 
> Signed-off-by: Bernd Schubert <bschubert@ddn.com>
> ---
>  fs/fuse/dev.c         | 32 ++++++++++++++++++++++++++++++--
>  fs/fuse/dev_uring_i.h | 13 +++++++++++++
>  2 files changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 78c05516da7f..cd5dc6ae9272 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -2257,6 +2257,8 @@ int fuse_dev_release(struct inode *inode, struct file *file)
>  		struct fuse_pqueue *fpq = &fud->pq;
>  		LIST_HEAD(to_end);
>  		unsigned int i;
> +		int dev_cnt;
> +		bool abort_conn = false;
>  
>  		spin_lock(&fpq->lock);
>  		WARN_ON(!list_empty(&fpq->io));
> @@ -2266,8 +2268,34 @@ int fuse_dev_release(struct inode *inode, struct file *file)
>  
>  		fuse_dev_end_requests(&to_end);
>  
> -		/* Are we the last open device? */
> -		if (atomic_dec_and_test(&fc->dev_count)) {
> +		/* Are we the last open device?  */
> +		dev_cnt = atomic_dec_return(&fc->dev_count);
> +		if (dev_cnt == 0)
> +			abort_conn = true;

You can just do

if (atomic_dec_and_test(&fc->dev_count))
	abort_conn = true;
else if (fuse_uring_configured(fc))
	abort_conn = fuse_uring_empty(fc);

and have fuse_uring_empty() do the work below to find if we're able to abort the
connection, so it's in it's own little helper.

> +
> +		/*
> +		 * Or is this with io_uring and only ring devices left?
> +		 * These devices will not receive a ->release() as long as
> +		 * there are io_uring_cmd's waiting and not completed
> +		 * with io_uring_cmd_done yet
> +		 */
> +		if (fuse_uring_configured(fc)) {
> +			struct fuse_dev *list_dev;
> +			bool all_uring = true;
> +
> +			spin_lock(&fc->lock);
> +			list_for_each_entry(list_dev, &fc->devices, entry) {
> +				if (list_dev == fud)
> +					continue;
> +				if (!list_dev->uring_dev)
> +					all_uring = false;
> +			}
> +			spin_unlock(&fc->lock);
> +			if (all_uring)
> +				abort_conn = true;
> +		}
> +
> +		if (abort_conn) {
>  			WARN_ON(fc->iq.fasync != NULL);
>  			fuse_abort_conn(fc);
>  		}
> diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
> index 7a2f540d3ea5..114e9c008013 100644
> --- a/fs/fuse/dev_uring_i.h
> +++ b/fs/fuse/dev_uring_i.h
> @@ -261,6 +261,14 @@ fuse_uring_get_queue(struct fuse_ring *ring, int qid)
>  	return (struct fuse_ring_queue *)(ptr + qid * ring->queue_size);
>  }
>  
> +static inline bool fuse_uring_configured(struct fuse_conn *fc)
> +{
> +	if (READ_ONCE(fc->ring) != NULL && fc->ring->configured)
> +		return true;

I see what you're trying to do here, and it is safe because you won't drop
fc->ring at this point, but it gives the illusion that it'll work if we race
with somebody who is freeing fc->ring, which isn't the case because you
immediately de-reference it again afterwards.

Using READ_ONCE/WRITE_ONCE for pointer access isn't actually safe unless you're
documenting it specifically, don't use it unless you really need lockless access
to the thing.

If we know that having fc means that fc->ring will be valid at all times then
the READ_ONCE is redundant and unnecessary, if we don't know that then this
needs more protection to make sure we don't suddenly lose fc->ring between the
two statements.

AFAICT if we have fc then ->ring will either be NULL or it won't be (once the
connection is established and running), so it's fine to just delete the
READ_ONCE/WRITE_ONCE things.  Thanks,

Josef
diff mbox series

Patch

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 78c05516da7f..cd5dc6ae9272 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2257,6 +2257,8 @@  int fuse_dev_release(struct inode *inode, struct file *file)
 		struct fuse_pqueue *fpq = &fud->pq;
 		LIST_HEAD(to_end);
 		unsigned int i;
+		int dev_cnt;
+		bool abort_conn = false;
 
 		spin_lock(&fpq->lock);
 		WARN_ON(!list_empty(&fpq->io));
@@ -2266,8 +2268,34 @@  int fuse_dev_release(struct inode *inode, struct file *file)
 
 		fuse_dev_end_requests(&to_end);
 
-		/* Are we the last open device? */
-		if (atomic_dec_and_test(&fc->dev_count)) {
+		/* Are we the last open device?  */
+		dev_cnt = atomic_dec_return(&fc->dev_count);
+		if (dev_cnt == 0)
+			abort_conn = true;
+
+		/*
+		 * Or is this with io_uring and only ring devices left?
+		 * These devices will not receive a ->release() as long as
+		 * there are io_uring_cmd's waiting and not completed
+		 * with io_uring_cmd_done yet
+		 */
+		if (fuse_uring_configured(fc)) {
+			struct fuse_dev *list_dev;
+			bool all_uring = true;
+
+			spin_lock(&fc->lock);
+			list_for_each_entry(list_dev, &fc->devices, entry) {
+				if (list_dev == fud)
+					continue;
+				if (!list_dev->uring_dev)
+					all_uring = false;
+			}
+			spin_unlock(&fc->lock);
+			if (all_uring)
+				abort_conn = true;
+		}
+
+		if (abort_conn) {
 			WARN_ON(fc->iq.fasync != NULL);
 			fuse_abort_conn(fc);
 		}
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
index 7a2f540d3ea5..114e9c008013 100644
--- a/fs/fuse/dev_uring_i.h
+++ b/fs/fuse/dev_uring_i.h
@@ -261,6 +261,14 @@  fuse_uring_get_queue(struct fuse_ring *ring, int qid)
 	return (struct fuse_ring_queue *)(ptr + qid * ring->queue_size);
 }
 
+static inline bool fuse_uring_configured(struct fuse_conn *fc)
+{
+	if (READ_ONCE(fc->ring) != NULL && fc->ring->configured)
+		return true;
+
+	return false;
+}
+
 #else /* CONFIG_FUSE_IO_URING */
 
 struct fuse_ring;
@@ -274,6 +282,11 @@  static inline void fuse_uring_conn_destruct(struct fuse_conn *fc)
 {
 }
 
+static inline bool fuse_uring_configured(struct fuse_conn *fc)
+{
+	return false;
+}
+
 #endif /* CONFIG_FUSE_IO_URING */
 
 #endif /* _FS_FUSE_DEV_URING_I_H */