diff mbox series

[v5,04/17] fs: Restore F_[GS]ET_FILE_RW_HINT support

Message ID 20231130013322.175290-5-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Pass data lifetime information to SCSI disk devices | expand

Commit Message

Bart Van Assche Nov. 30, 2023, 1:33 a.m. UTC
Revert commit 7b12e49669c9 ("fs: remove fs.f_write_hint") to enable testing
write hint support with fio and direct I/O.

Cc: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 fs/fcntl.c         | 17 +++++++++++++++++
 fs/open.c          |  1 +
 include/linux/fs.h |  9 +++++++++
 3 files changed, 27 insertions(+)

Comments

Christoph Hellwig Dec. 7, 2023, 5:46 p.m. UTC | #1
On Wed, Nov 29, 2023 at 05:33:09PM -0800, Bart Van Assche wrote:
> Revert commit 7b12e49669c9 ("fs: remove fs.f_write_hint") to enable testing
> write hint support with fio and direct I/O.

To enable testing seems like a pretty bad argument for bloating struct
file.  I'd much prefer to not restore it, but if you want to do so
please write a convincing commit log.
Bart Van Assche Dec. 7, 2023, 7:37 p.m. UTC | #2
On 12/7/23 07:46, Christoph Hellwig wrote:
> On Wed, Nov 29, 2023 at 05:33:09PM -0800, Bart Van Assche wrote:
>> Revert commit 7b12e49669c9 ("fs: remove fs.f_write_hint") to enable testing
>> write hint support with fio and direct I/O.
> 
> To enable testing seems like a pretty bad argument for bloating struct
> file.  I'd much prefer to not restore it, but if you want to do so
> please write a convincing commit log.

Hi Christoph,

I have submitted a pull request for fio such that my tests can be run
even if F_SET_FILE_RW_HINT is not supported (see also
https://github.com/axboe/fio/pull/1682).

The only other application that I found that uses F_SET_FILE_RW_HINT is
Ceph. Do we want to make the Ceph code work again that uses
F_SET_FILE_RW_HINT? I think this code cannot be converted to
F_SET_RW_HINT.

 From the Ceph source code:

----------------------------------------------------------------------
int KernelDevice::choose_fd(bool buffered, int write_hint) const
{
#if defined(F_SET_FILE_RW_HINT)
   if (!enable_wrt)
     write_hint = WRITE_LIFE_NOT_SET;
#else
   // Without WRITE_LIFE capabilities, only one file is used.
   // And rocksdb sets this value also to > 0, so we need to catch this
   // here instead of trusting rocksdb to set write_hint.
   write_hint = WRITE_LIFE_NOT_SET;
#endif
   return buffered ? fd_buffereds[write_hint] : fd_directs[write_hint];
}
----------------------------------------------------------------------

Thanks,

Bart.
Christoph Hellwig Dec. 11, 2023, 4:45 p.m. UTC | #3
On Thu, Dec 07, 2023 at 09:37:44AM -1000, Bart Van Assche wrote:
> I have submitted a pull request for fio such that my tests can be run
> even if F_SET_FILE_RW_HINT is not supported (see also
> https://github.com/axboe/fio/pull/1682).
>
> The only other application that I found that uses F_SET_FILE_RW_HINT is
> Ceph. Do we want to make the Ceph code work again that uses
> F_SET_FILE_RW_HINT? I think this code cannot be converted to
> F_SET_RW_HINT.

Well, let's pull a few folks in.  I'd personally prefer not carrying
this around in the file and supporting different write hints in the
same inode, which also makes things a mess for file systems.
diff mbox series

Patch

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 891a9ebcdef1..fe80e19f1c1a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -292,6 +292,21 @@  static long fcntl_rw_hint(struct file *file, unsigned int cmd,
 	u64 hint;
 
 	switch (cmd) {
+	case F_GET_FILE_RW_HINT:
+		hint = file_write_hint(file);
+		if (copy_to_user(argp, &hint, sizeof(*argp)))
+			return -EFAULT;
+		return 0;
+	case F_SET_FILE_RW_HINT:
+		if (copy_from_user(&hint, argp, sizeof(hint)))
+			return -EFAULT;
+		if (!rw_hint_valid(hint))
+			return -EINVAL;
+
+		spin_lock(&file->f_lock);
+		file->f_write_hint = hint;
+		spin_unlock(&file->f_lock);
+		return 0;
 	case F_GET_RW_HINT:
 		hint = inode->i_write_hint;
 		if (copy_to_user(argp, &hint, sizeof(*argp)))
@@ -416,6 +431,8 @@  static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		break;
 	case F_GET_RW_HINT:
 	case F_SET_RW_HINT:
+	case F_GET_FILE_RW_HINT:
+	case F_SET_FILE_RW_HINT:
 		err = fcntl_rw_hint(filp, cmd, arg);
 		break;
 	default:
diff --git a/fs/open.c b/fs/open.c
index 02dc608d40d8..4c5c29541ac5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -961,6 +961,7 @@  static int do_dentry_open(struct file *f,
 	if (f->f_mapping->a_ops && f->f_mapping->a_ops->direct_IO)
 		f->f_mode |= FMODE_CAN_ODIRECT;
 
+	f->f_write_hint = WRITE_LIFE_NOT_SET;
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 	f->f_iocb_flags = iocb_flags(f);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a08014b68d6e..a6e0c4b5a72b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -989,6 +989,7 @@  struct file {
 	 * Must not be taken from IRQ context.
 	 */
 	spinlock_t		f_lock;
+	enum rw_hint		f_write_hint;
 	fmode_t			f_mode;
 	atomic_long_t		f_count;
 	struct mutex		f_pos_lock;
@@ -2162,6 +2163,14 @@  static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap,
 	       !vfsgid_valid(i_gid_into_vfsgid(idmap, inode));
 }
 
+static inline enum rw_hint file_write_hint(struct file *file)
+{
+	if (file->f_write_hint != WRITE_LIFE_NOT_SET)
+		return file->f_write_hint;
+
+	return file_inode(file)->i_write_hint;
+}
+
 static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 {
 	*kiocb = (struct kiocb) {