diff mbox series

[4/4] fuse: introduce inode io modes

Message ID 20231224104914.49316-5-bschubert@ddn.com (mailing list archive)
State New
Headers show
Series fuse: inode IO modes and mmap | expand

Commit Message

Bernd Schubert Dec. 24, 2023, 10:49 a.m. UTC
From: Amir Goldstein <amir73il@gmail.com>

The fuse inode io mode is determined by the mode of its open files/mmaps
and parallel dio.

- caching io mode - files open in caching mode or mmap on direct_io file
- direct io mode - no files open in caching mode and no files mmaped
- parallel dio mode - direct io mode with parallel dio in progress

We use a new FOPEN_CACHE_IO flag to explicitly mark a file that was open
in caching mode.

direct_io mmap uses page cache, so first mmap will mark the file as
FOPEN_DIRECT_IO|FOPEN_CACHE_IO (i.e. mixed mode) and inode will enter
the caching io mode.

If the server opens the file with flags FOPEN_DIRECT_IO|FOPEN_CACHE_IO,
the inode enters caching io mode already on open.

This allows executing parallel dio when inode is not in caching mode
even if shared mmap is allowed, but no mmaps have been performed on
the inode in question.

An mmap on direct_io file now waits for in-progress parallel dio writes,
so FOPEN_PARALLEL_DIRECT_WRITES is enabled again by this commit.

Open in caching mode falls back to direct io mode if parallel dio is
in progress.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/fuse/file.c            | 160 ++++++++++++++++++++++++++++++++++++--
 fs/fuse/fuse_i.h          |  76 +++++++++++++++++-
 include/uapi/linux/fuse.h |   2 +
 3 files changed, 230 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index abc93415ec7e3..fb0b571daaf55 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -104,10 +104,100 @@  static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
 	kfree(ra);
 }
 
+static bool fuse_file_is_direct_io(struct file *file)
+{
+	struct fuse_file *ff = file->private_data;
+
+	return ff->open_flags & FOPEN_DIRECT_IO || file->f_flags & O_DIRECT;
+}
+
+/* Request access to submit new io to inode via open file */
+static bool fuse_file_io_open(struct file *file, struct inode *inode)
+{
+	struct fuse_file *ff = file->private_data;
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	bool ok = true;
+
+	if (!S_ISREG(inode->i_mode) || FUSE_IS_DAX(inode))
+		return true;
+
+	/* Set explicit FOPEN_CACHE_IO flag for file open in caching mode */
+	if (!fuse_file_is_direct_io(file))
+		ff->open_flags |= FOPEN_CACHE_IO;
+
+	spin_lock(&fi->lock);
+	/* First caching file open enters caching inode io mode */
+	if (ff->open_flags & FOPEN_CACHE_IO) {
+		ok = fuse_inode_get_io_cache(fi);
+		if (!ok) {
+			/* fallback to open in direct io mode */
+			pr_debug("failed to open file in caching mode; falling back to direct io mode.\n");
+			ff->open_flags &= ~FOPEN_CACHE_IO;
+			ff->open_flags |= FOPEN_DIRECT_IO;
+		}
+	}
+	spin_unlock(&fi->lock);
+
+	return ok;
+}
+
+/* Request access to submit new io to inode via mmap */
+static int fuse_file_io_mmap(struct fuse_file *ff, struct inode *inode)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+
+	if (WARN_ON(!S_ISREG(inode->i_mode) || FUSE_IS_DAX(inode)))
+		return -ENODEV;
+
+	spin_lock(&fi->lock);
+	/*
+	 * First mmap of direct_io file enters caching inode io mode, blocks
+	 * new parallel dio writes and waits for the in-progress parallel dio
+	 * writes to complete.
+	 */
+	if (!(ff->open_flags & FOPEN_CACHE_IO)) {
+		while (!fuse_inode_get_io_cache(fi)) {
+			/*
+			 * Setting the bit advises new direct-io writes
+			 * to use an exclusive lock - without it the wait below
+			 * might be forever.
+			 */
+			set_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
+			spin_unlock(&fi->lock);
+			wait_event_interruptible(fi->direct_io_waitq,
+						 fuse_is_io_cache_allowed(fi));
+			spin_lock(&fi->lock);
+		}
+		ff->open_flags |= FOPEN_CACHE_IO;
+	}
+	spin_unlock(&fi->lock);
+
+	return 0;
+}
+
+/* No more pending io and no new io possible to inode via open/mmapped file */
+static void fuse_file_io_release(struct fuse_file *ff, struct inode *inode)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+
+	if (!S_ISREG(inode->i_mode) || FUSE_IS_DAX(inode))
+		return;
+
+	spin_lock(&fi->lock);
+	/* Last caching file close exits caching inode io mode */
+	if (ff->open_flags & FOPEN_CACHE_IO)
+		fuse_inode_put_io_cache(fi);
+	spin_unlock(&fi->lock);
+}
+
 static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
 {
 	if (refcount_dec_and_test(&ff->count)) {
 		struct fuse_args *args = &ff->release_args->args;
+		struct inode *inode = ff->release_args->inode;
+
+		if (inode)
+			fuse_file_io_release(ff, inode);
 
 		if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) {
 			/* Do nothing when client does not implement 'open' */
@@ -199,6 +289,9 @@  void fuse_finish_open(struct inode *inode, struct file *file)
 	struct fuse_file *ff = file->private_data;
 	struct fuse_conn *fc = get_fuse_conn(inode);
 
+	/* The file open mode determines the inode io mode */
+	fuse_file_io_open(file, inode);
+
 	if (ff->open_flags & FOPEN_STREAM)
 		stream_open(inode, file);
 	else if (ff->open_flags & FOPEN_NONSEEKABLE)
@@ -1305,6 +1398,37 @@  static bool fuse_io_past_eof(struct kiocb *iocb, struct iov_iter *iter)
 	return iocb->ki_pos + iov_iter_count(iter) > i_size_read(inode);
 }
 
+/*
+ * New parallal dio allowed only if inode is not in caching mode and
+ * denies new opens in caching mode.
+ */
+static bool fuse_file_shared_dio_start(struct inode *inode)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	bool ok;
+
+	if (WARN_ON(!S_ISREG(inode->i_mode) || FUSE_IS_DAX(inode)))
+		return false;
+
+	spin_lock(&fi->lock);
+	ok = fuse_inode_deny_io_cache(fi);
+	spin_unlock(&fi->lock);
+	return ok;
+}
+
+/* Allow new opens in caching mode after last parallel dio end */
+static void fuse_file_shared_dio_end(struct inode *inode)
+{
+	struct fuse_inode *fi = get_fuse_inode(inode);
+	bool allow_cached_io;
+
+	spin_lock(&fi->lock);
+	allow_cached_io = fuse_inode_allow_io_cache(fi);
+	spin_unlock(&fi->lock);
+	if (allow_cached_io)
+		wake_up(&fi->direct_io_waitq);
+}
+
 /*
  * @return true if an exclusive lock for direct IO writes is needed
  */
@@ -1313,6 +1437,7 @@  static bool fuse_dio_wr_exclusive_lock(struct kiocb *iocb, struct iov_iter *from
 	struct file *file = iocb->ki_filp;
 	struct fuse_file *ff = file->private_data;
 	struct inode *inode = file_inode(iocb->ki_filp);
+	struct fuse_inode *fi = get_fuse_inode(inode);
 
 	/* server side has to advise that it supports parallel dio writes */
 	if (!(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES))
@@ -1324,11 +1449,9 @@  static bool fuse_dio_wr_exclusive_lock(struct kiocb *iocb, struct iov_iter *from
 	if (iocb->ki_flags & IOCB_APPEND)
 		return true;
 
-	/* combination opf page access and direct-io difficult, shared
-	 * locks actually introduce a conflict.
-	 */
-	if (get_fuse_conn(inode)->direct_io_allow_mmap)
-		return true;
+	/* shared locks are not allowed with parallel page cache IO */
+	if (test_bit(FUSE_I_CACHE_IO_MODE, &fi->state))
+		return false;
 
 	/* parallel dio beyond eof is at least for now not supported */
 	if (fuse_io_past_eof(iocb, from))
@@ -1349,9 +1472,11 @@  static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from,
 		inode_lock_shared(inode);
 		/*
 		 * Previous check was without inode lock and might have raced,
-		 * check again.
+		 * check again. fuse_file_shared_dio_start() should be performed
+		 * only after taking shared inode lock.
 		 */
-		if (fuse_io_past_eof(iocb, from)) {
+		if (fuse_io_past_eof(iocb, from) ||
+		    !fuse_file_shared_dio_start(inode)) {
 			inode_unlock_shared(inode);
 			inode_lock(inode);
 			*exclusive = true;
@@ -1364,6 +1489,7 @@  static void fuse_dio_unlock(struct inode *inode, bool exclusive)
 	if (exclusive) {
 		inode_unlock(inode);
 	} else {
+		fuse_file_shared_dio_end(inode);
 		inode_unlock_shared(inode);
 	}
 }
@@ -2493,11 +2619,16 @@  static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct fuse_file *ff = file->private_data;
 	struct fuse_conn *fc = ff->fm->fc;
+	int rc;
 
 	/* DAX mmap is superior to direct_io mmap */
 	if (FUSE_IS_DAX(file_inode(file)))
 		return fuse_dax_mmap(file, vma);
 
+	/*
+	 * FOPEN_DIRECT_IO handling is special compared to O_DIRECT,
+	 * as does not allow MAP_SHARED mmap without FUSE_DIRECT_IO_ALLOW_MMAP.
+	 */
 	if (ff->open_flags & FOPEN_DIRECT_IO) {
 		/*
 		 * Can't provide the coherency needed for MAP_SHARED
@@ -2508,10 +2639,23 @@  static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
 
 		invalidate_inode_pages2(file->f_mapping);
 
+		/*
+		 * First mmap of direct_io file enters caching inode io mode.
+		 * Also waits for parallel dio writers to go into serial mode
+		 * (exclusive instead of shared lock).
+		 */
+		rc = fuse_file_io_mmap(ff, file_inode(file));
+		if (rc)
+			return rc;
+
 		if (!(vma->vm_flags & VM_MAYSHARE)) {
 			/* MAP_PRIVATE */
 			return generic_file_mmap(file, vma);
 		}
+	} else if (file->f_flags & O_DIRECT) {
+		rc = fuse_file_io_mmap(ff, file_inode(file));
+		if (rc)
+			return rc;
 	}
 
 	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
@@ -3280,7 +3424,9 @@  void fuse_init_file_inode(struct inode *inode, unsigned int flags)
 	INIT_LIST_HEAD(&fi->write_files);
 	INIT_LIST_HEAD(&fi->queued_writes);
 	fi->writectr = 0;
+	fi->iocachectr = 0;
 	init_waitqueue_head(&fi->page_waitq);
+	init_waitqueue_head(&fi->direct_io_waitq);
 	fi->writepages = RB_ROOT;
 
 	if (IS_ENABLED(CONFIG_FUSE_DAX))
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1df83eebda927..5774585f6de3e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -111,7 +111,7 @@  struct fuse_inode {
 	u64 attr_version;
 
 	union {
-		/* Write related fields (regular file only) */
+		/* read/write io cache (regular file only) */
 		struct {
 			/* Files usable in writepage.  Protected by fi->lock */
 			struct list_head write_files;
@@ -123,9 +123,15 @@  struct fuse_inode {
 			 * (FUSE_NOWRITE) means more writes are blocked */
 			int writectr;
 
+			/** Number of files/maps using page cache */
+			int iocachectr;
+
 			/* Waitq for writepage completion */
 			wait_queue_head_t page_waitq;
 
+			/* waitq for direct-io completion */
+			wait_queue_head_t direct_io_waitq;
+
 			/* List of writepage requestst (pending or sent) */
 			struct rb_root writepages;
 		};
@@ -187,6 +193,8 @@  enum {
 	FUSE_I_BAD,
 	/* Has btime */
 	FUSE_I_BTIME,
+	/* Wants or already has page cache IO */
+	FUSE_I_CACHE_IO_MODE,
 };
 
 struct fuse_conn;
@@ -1349,6 +1357,72 @@  int fuse_fileattr_set(struct mnt_idmap *idmap,
 		      struct dentry *dentry, struct fileattr *fa);
 
 /* file.c */
+/*
+ * Request an open in caching mode.
+ * Return true if in caching mode.
+ */
+static inline bool fuse_inode_get_io_cache(struct fuse_inode *fi)
+{
+	assert_spin_locked(&fi->lock);
+	if (fi->iocachectr < 0)
+		return false;
+	fi->iocachectr++;
+	if (fi->iocachectr == 1)
+		set_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
+
+	return true;
+}
+
+/*
+ * Release an open in caching mode.
+ * Return true if no more files open in caching mode.
+ */
+static inline bool fuse_inode_put_io_cache(struct fuse_inode *fi)
+{
+	assert_spin_locked(&fi->lock);
+	if (WARN_ON(fi->iocachectr <= 0))
+		return false;
+
+	if (--fi->iocachectr == 0) {
+		clear_bit(FUSE_I_CACHE_IO_MODE, &fi->state);
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * Requets to deny new opens in caching mode.
+ * Return true if denying new opens in caching mode.
+ */
+static inline bool fuse_inode_deny_io_cache(struct fuse_inode *fi)
+{
+	assert_spin_locked(&fi->lock);
+	if (fi->iocachectr > 0)
+		return false;
+	fi->iocachectr--;
+	return true;
+}
+
+/*
+ * Release a request to deny open in caching mode.
+ * Return true if allowing new opens in caching mode.
+ */
+static inline bool fuse_inode_allow_io_cache(struct fuse_inode *fi)
+{
+	assert_spin_locked(&fi->lock);
+	if (WARN_ON(fi->iocachectr >= 0))
+		return false;
+	return ++(fi->iocachectr) == 0;
+}
+
+/*
+ * Return true if allowing new opens in caching mode.
+ */
+static inline bool fuse_is_io_cache_allowed(struct fuse_inode *fi)
+{
+	return READ_ONCE(fi->iocachectr) >= 0;
+}
 
 struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
 				 unsigned int open_flags, bool isdir);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index e7418d15fe390..66a4bd8d767d4 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -353,6 +353,7 @@  struct fuse_file_lock {
  * FOPEN_STREAM: the file is stream-like (no file position at all)
  * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE)
  * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode
+ * FOPEN_CACHE_IO: using cache for this open file (incl. mmap on direct_io)
  */
 #define FOPEN_DIRECT_IO		(1 << 0)
 #define FOPEN_KEEP_CACHE	(1 << 1)
@@ -361,6 +362,7 @@  struct fuse_file_lock {
 #define FOPEN_STREAM		(1 << 4)
 #define FOPEN_NOFLUSH		(1 << 5)
 #define FOPEN_PARALLEL_DIRECT_WRITES	(1 << 6)
+#define FOPEN_CACHE_IO		(1 << 7)
 
 /**
  * INIT request/reply flags