@@ -334,28 +334,6 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
}
#endif
-static bool rw_hint_valid(u64 hint)
-{
- BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET);
- BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE);
- BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT);
- BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM);
- BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG);
- BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME);
-
- switch (hint) {
- case RWH_WRITE_LIFE_NOT_SET:
- case RWH_WRITE_LIFE_NONE:
- case RWH_WRITE_LIFE_SHORT:
- case RWH_WRITE_LIFE_MEDIUM:
- case RWH_WRITE_LIFE_LONG:
- case RWH_WRITE_LIFE_EXTREME:
- return true;
- default:
- return false;
- }
-}
-
static long fcntl_get_rw_hint(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -21,4 +21,28 @@ enum rw_hint {
static_assert(sizeof(enum rw_hint) == 1);
#endif
+#define WRITE_LIFE_INVALID (RWH_WRITE_LIFE_EXTREME + 1)
+
+static inline bool rw_hint_valid(u64 hint)
+{
+ BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET);
+ BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE);
+ BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT);
+ BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM);
+ BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG);
+ BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME);
+
+ switch (hint) {
+ case RWH_WRITE_LIFE_NOT_SET:
+ case RWH_WRITE_LIFE_NONE:
+ case RWH_WRITE_LIFE_SHORT:
+ case RWH_WRITE_LIFE_MEDIUM:
+ case RWH_WRITE_LIFE_LONG:
+ case RWH_WRITE_LIFE_EXTREME:
+ return true;
+ default:
+ return false;
+ }
+}
+
#endif /* _LINUX_RW_HINT_H */
@@ -92,12 +92,23 @@ struct io_uring_sqe {
__u16 addr_len;
__u16 __pad3[1];
};
+ struct {
+ /* Bit field to express 16 meta types */
+ __u16 meta_type;
+ __u16 __pad4[1];
+ };
};
union {
struct {
__u64 addr3;
__u64 __pad2[1];
};
+ struct {
+ /* First meta type specific fields */
+ __u64 lifetime_val;
+ /* For future use */
+ __u64 __pad5[1];
+ };
__u64 optval;
/*
* If the ring is initialized with IORING_SETUP_SQE128, then
@@ -107,6 +118,14 @@ struct io_uring_sqe {
};
};
+enum io_uring_sqe_meta_type_bits {
+ META_TYPE_LIFETIME_HINT_BIT
+};
+
+/* this meta type covers write hint values supported by F_SET_RW_HINT fcntl */
+#define META_TYPE_LIFETIME_HINT (1U << META_TYPE_LIFETIME_HINT_BIT)
+
+
/*
* If sqe->file_index is set to this for opcodes that instantiate a new
* direct descriptor (like openat/openat2/accept), then io_uring will allocate
@@ -269,6 +269,24 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
rw->kiocb.ki_ioprio = get_current_ioprio();
}
rw->kiocb.dio_complete = NULL;
+ if (ddir == ITER_SOURCE) {
+ u16 mtype = READ_ONCE(sqe->meta_type);
+
+ rw->kiocb.ki_write_hint = WRITE_LIFE_INVALID;
+ if (mtype) {
+ u64 lhint = READ_ONCE(sqe->lifetime_val);
+
+ if (READ_ONCE(sqe->__pad4[0]) ||
+ READ_ONCE(sqe->__pad5[0]))
+ return -EINVAL;
+
+ if (mtype != META_TYPE_LIFETIME_HINT ||
+ !rw_hint_valid(lhint))
+ return -EINVAL;
+
+ rw->kiocb.ki_write_hint = lhint;
+ }
+ }
rw->addr = READ_ONCE(sqe->addr);
rw->len = READ_ONCE(sqe->len);
@@ -1023,7 +1041,12 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(ret))
return ret;
req->cqe.res = iov_iter_count(&io->iter);
- rw->kiocb.ki_write_hint = file_write_hint(rw->kiocb.ki_filp);
+ /*
+ * Use per-file hint only if per-io hint is not set.
+ * We need per-io hint to get precedence.
+ */
+ if (rw->kiocb.ki_write_hint == WRITE_LIFE_INVALID)
+ rw->kiocb.ki_write_hint = file_write_hint(rw->kiocb.ki_filp);
if (force_nonblock) {
/* If the file doesn't support async, just async punt */