diff mbox

[1/9] fs: add fcntl() interface for setting/getting write life time hints

Message ID ec6ab0fa-60d7-3aa5-aa7c-81c105bdd488@kernel.dk (mailing list archive)
State New, archived
Headers show

Commit Message

Jens Axboe June 27, 2017, 3:09 p.m. UTC
On 06/27/2017 08:42 AM, Christoph Hellwig wrote:
> The API looks ok, but the code could use some cleanups.  What do
> you think about the incremental patch below:
> 
> It refactors various manipulations, and stores the write hint right
> in the iocb as there is a 4 byte hole (this will need some minor
> adjustments in the next patches):

How's this? Fixes for compile, and also squeeze an enum rw_hint into
a hole in the inode structure.

I'll refactor around this and squeeze into bio/rq holes as well, then
re-test it.

Comments

Christoph Hellwig June 27, 2017, 3:16 p.m. UTC | #1
On Tue, Jun 27, 2017 at 09:09:48AM -0600, Jens Axboe wrote:
> On 06/27/2017 08:42 AM, Christoph Hellwig wrote:
> > The API looks ok, but the code could use some cleanups.  What do
> > you think about the incremental patch below:
> > 
> > It refactors various manipulations, and stores the write hint right
> > in the iocb as there is a 4 byte hole (this will need some minor
> > adjustments in the next patches):
> 
> How's this? Fixes for compile, and also squeeze an enum rw_hint into
> a hole in the inode structure.
> 
> I'll refactor around this and squeeze into bio/rq holes as well, then
> re-test it.

Looks good, minor nitpick below:

> index 4574121f4746..4587a181162e 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -265,6 +265,20 @@ struct page;
>  struct address_space;
>  struct writeback_control;
>  
> +#include <linux/fcntl.h>

I didn't seem to need the move.  But if you want to move it can
we keep all the includes together at the very top?

> +static inline enum rw_hint __inode_write_hint(struct inode *inode)
> +{
> +	return inode->i_write_hint;
> +}
> +
> +static inline enum rw_hint inode_write_hint(struct inode *inode)
> +{
> +	enum rw_hint ret = __inode_write_hint(inode);
> +	if (ret != WRITE_LIFE_NOT_SET)
> +		return ret;
> +	return WRITE_LIFE_NONE;
> +}
> +
> +static inline enum rw_hint __file_write_hint(struct file *file)
> +{
> +	if (file->f_write_hint != WRITE_LIFE_NOT_SET)
> +		return file->f_write_hint;
> +
> +	return __inode_write_hint(file_inode(file));
> +}
> +
> +static inline enum rw_hint file_write_hint(struct file *file)
> +{
> +	enum rw_hint ret = __file_write_hint(file);
> +	if (ret != WRITE_LIFE_NOT_SET)
> +		return ret;
> +	return WRITE_LIFE_NONE;
> +}

I'd say kill all these helpers and just treat both WRITE_LIFE_NONE
and WRITE_LIFE_NOT_SET special all the way down in NVMe.
Jens Axboe June 27, 2017, 3:18 p.m. UTC | #2
On 06/27/2017 09:16 AM, Christoph Hellwig wrote:
> On Tue, Jun 27, 2017 at 09:09:48AM -0600, Jens Axboe wrote:
>> On 06/27/2017 08:42 AM, Christoph Hellwig wrote:
>>> The API looks ok, but the code could use some cleanups.  What do
>>> you think about the incremental patch below:
>>>
>>> It refactors various manipulations, and stores the write hint right
>>> in the iocb as there is a 4 byte hole (this will need some minor
>>> adjustments in the next patches):
>>
>> How's this? Fixes for compile, and also squeeze an enum rw_hint into
>> a hole in the inode structure.
>>
>> I'll refactor around this and squeeze into bio/rq holes as well, then
>> re-test it.
> 
> Looks good, minor nitpick below:
> 
>> index 4574121f4746..4587a181162e 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -265,6 +265,20 @@ struct page;
>>  struct address_space;
>>  struct writeback_control;
>>  
>> +#include <linux/fcntl.h>
> 
> I didn't seem to need the move.  But if you want to move it can
> we keep all the includes together at the very top?

It did here, we need it for the RWH_ defines or my compile blows up.
But yeah, let's just move it to the top, not sure why it's in the
middle.

>> +static inline enum rw_hint __inode_write_hint(struct inode *inode)
>> +{
>> +	return inode->i_write_hint;
>> +}
>> +
>> +static inline enum rw_hint inode_write_hint(struct inode *inode)
>> +{
>> +	enum rw_hint ret = __inode_write_hint(inode);
>> +	if (ret != WRITE_LIFE_NOT_SET)
>> +		return ret;
>> +	return WRITE_LIFE_NONE;
>> +}
>> +
>> +static inline enum rw_hint __file_write_hint(struct file *file)
>> +{
>> +	if (file->f_write_hint != WRITE_LIFE_NOT_SET)
>> +		return file->f_write_hint;
>> +
>> +	return __inode_write_hint(file_inode(file));
>> +}
>> +
>> +static inline enum rw_hint file_write_hint(struct file *file)
>> +{
>> +	enum rw_hint ret = __file_write_hint(file);
>> +	if (ret != WRITE_LIFE_NOT_SET)
>> +		return ret;
>> +	return WRITE_LIFE_NONE;
>> +}
> 
> I'd say kill all these helpers and just treat both WRITE_LIFE_NONE
> and WRITE_LIFE_NOT_SET special all the way down in NVMe.

Sure, we can do that.
diff mbox

Patch

diff --git a/fs/fcntl.c b/fs/fcntl.c
index f4e7267d117f..25f96a101f1a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -243,6 +243,62 @@  static int f_getowner_uids(struct file *filp, unsigned long arg)
 }
 #endif
 
+static bool rw_hint_valid(enum rw_hint hint)
+{
+	switch (hint) {
+	case RWF_WRITE_LIFE_NOT_SET:
+	case RWH_WRITE_LIFE_NONE:
+	case RWH_WRITE_LIFE_SHORT:
+	case RWH_WRITE_LIFE_MEDIUM:
+	case RWH_WRITE_LIFE_LONG:
+	case RWH_WRITE_LIFE_EXTREME:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static long fcntl_rw_hint(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct inode *inode = file_inode(file);
+	u64 *argp = (u64 __user *)arg;
+	enum rw_hint hint;
+
+	switch (cmd) {
+	case F_GET_FILE_RW_HINT:
+		if (put_user(__file_write_hint(file), argp))
+			return -EFAULT;
+		return 0;
+	case F_SET_FILE_RW_HINT:
+		if (get_user(hint, argp))
+			return -EFAULT;
+		if (!rw_hint_valid(hint))
+			return -EINVAL;
+
+		spin_lock(&file->f_lock);
+		file->f_write_hint = hint;
+		spin_unlock(&file->f_lock);
+		return 0;
+	case F_GET_RW_HINT:
+		if (put_user(__inode_write_hint(inode), argp))
+			return -EFAULT;
+		return 0;
+	case F_SET_RW_HINT:
+		if (get_user(hint, argp))
+			return -EFAULT;
+		if (!rw_hint_valid(hint))
+			return -EINVAL;
+
+		inode_lock(inode);
+		inode->i_write_hint = hint;
+		inode_unlock(inode);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
@@ -337,6 +393,12 @@  static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_GET_SEALS:
 		err = shmem_fcntl(filp, cmd, arg);
 		break;
+	case F_GET_RW_HINT:
+	case F_SET_RW_HINT:
+	case F_GET_FILE_RW_HINT:
+	case F_SET_FILE_RW_HINT:
+		err = fcntl_rw_hint(filp, cmd, arg);
+		break;
 	default:
 		break;
 	}
diff --git a/fs/inode.c b/fs/inode.c
index db5914783a71..f0e5fc77e6a4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -146,6 +146,7 @@  int inode_init_always(struct super_block *sb, struct inode *inode)
 	i_gid_write(inode, 0);
 	atomic_set(&inode->i_writecount, 0);
 	inode->i_size = 0;
+	inode->i_write_hint = WRITE_LIFE_NOT_SET;
 	inode->i_blocks = 0;
 	inode->i_bytes = 0;
 	inode->i_generation = 0;
diff --git a/fs/open.c b/fs/open.c
index cd0c5be8d012..3fe0c4aa7d27 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -759,6 +759,7 @@  static int do_dentry_open(struct file *f,
 	     likely(f->f_op->write || f->f_op->write_iter))
 		f->f_mode |= FMODE_CAN_WRITE;
 
+	f->f_write_hint = WRITE_LIFE_NOT_SET;
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4574121f4746..4587a181162e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -265,6 +265,20 @@  struct page;
 struct address_space;
 struct writeback_control;
 
+#include <linux/fcntl.h>
+
+/*
+ * Write life time hint values.
+ */
+enum rw_hint {
+	WRITE_LIFE_NOT_SET	= 0,
+	WRITE_LIFE_NONE		= RWH_WRITE_LIFE_NONE,
+	WRITE_LIFE_SHORT	= RWH_WRITE_LIFE_SHORT,
+	WRITE_LIFE_MEDIUM	= RWH_WRITE_LIFE_MEDIUM,
+	WRITE_LIFE_LONG		= RWH_WRITE_LIFE_LONG,
+	WRITE_LIFE_EXTREME	= RWH_WRITE_LIFE_EXTREME,
+};
+
 #define IOCB_EVENTFD		(1 << 0)
 #define IOCB_APPEND		(1 << 1)
 #define IOCB_DIRECT		(1 << 2)
@@ -280,6 +294,7 @@  struct kiocb {
 	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
 	void			*private;
 	int			ki_flags;
+	enum rw_hint		ki_hint;
 };
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -597,6 +612,7 @@  struct inode {
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned short          i_bytes;
 	unsigned int		i_blkbits;
+	enum rw_hint		i_write_hint;
 	blkcnt_t		i_blocks;
 
 #ifdef __NEED_I_SIZE_ORDERED
@@ -851,6 +867,7 @@  struct file {
 	 * Must not be taken from IRQ context.
 	 */
 	spinlock_t		f_lock;
+	enum rw_hint		f_write_hint;
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -1026,8 +1043,6 @@  struct file_lock_context {
 #define OFFT_OFFSET_MAX	INT_LIMIT(off_t)
 #endif
 
-#include <linux/fcntl.h>
-
 extern void send_sigio(struct fown_struct *fown, int fd, int band);
 
 /*
@@ -1878,6 +1893,35 @@  static inline bool HAS_UNMAPPED_ID(struct inode *inode)
 	return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
 }
 
+static inline enum rw_hint __inode_write_hint(struct inode *inode)
+{
+	return inode->i_write_hint;
+}
+
+static inline enum rw_hint inode_write_hint(struct inode *inode)
+{
+	enum rw_hint ret = __inode_write_hint(inode);
+	if (ret != WRITE_LIFE_NOT_SET)
+		return ret;
+	return WRITE_LIFE_NONE;
+}
+
+static inline enum rw_hint __file_write_hint(struct file *file)
+{
+	if (file->f_write_hint != WRITE_LIFE_NOT_SET)
+		return file->f_write_hint;
+
+	return __inode_write_hint(file_inode(file));
+}
+
+static inline enum rw_hint file_write_hint(struct file *file)
+{
+	enum rw_hint ret = __file_write_hint(file);
+	if (ret != WRITE_LIFE_NOT_SET)
+		return ret;
+	return WRITE_LIFE_NONE;
+}
+
 /*
  * Inode state bits.  Protected by inode->i_lock
  *
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 813afd6eee71..ec69d55bcec7 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -43,6 +43,27 @@ 
 /* (1U << 31) is reserved for signed error codes */
 
 /*
+ * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
+ * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
+ * the specific file.
+ */
+#define F_GET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 11)
+#define F_SET_RW_HINT		(F_LINUX_SPECIFIC_BASE + 12)
+#define F_GET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 13)
+#define F_SET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 14)
+
+/*
+ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
+ * used to clear any hints previously set.
+ */
+#define RWF_WRITE_LIFE_NOT_SET	0
+#define RWH_WRITE_LIFE_NONE	1
+#define RWH_WRITE_LIFE_SHORT	2
+#define RWH_WRITE_LIFE_MEDIUM	3
+#define RWH_WRITE_LIFE_LONG	4
+#define RWH_WRITE_LIFE_EXTREME	5
+
+/*
  * Types of directory notifications that may be requested.
  */
 #define DN_ACCESS	0x00000001	/* File accessed */