diff mbox series

timerfd: fix nonblocking reads

Message ID b4059ed0-5567-44e7-95f7-f7e4b227501c@kernel.dk (mailing list archive)
State New
Headers show
Series timerfd: fix nonblocking reads | expand

Commit Message

Jens Axboe April 1, 2024, 2:36 p.m. UTC
timerfd is utterly buggy wrt nonblocking reads - regardless of whether
or not there's data available, it returns -EAGAIN. This is incompatible
with how nonblocking reads should work. If there's data available, it
should be returned.

Convert it to use fops->read_iter() so it can handle both nonblocking
fds and IOCB_NOWAIT, mark it as FMODE_NOWAIT to signify that it's
compatible with nonblocking reads, and finally have timerfd_read_iter()
properly check for data availability in nonblocking mode.

Cc: stable@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

Can't believe it's this broken... Patch has been tested with a test case
that was reported via io_uring, and I also ran the ltp timerfd test
cases and it passes all of those too.

Comments

Jens Axboe April 1, 2024, 3:21 p.m. UTC | #1
On 4/1/24 8:36 AM, Jens Axboe wrote:
> timerfd is utterly buggy wrt nonblocking reads - regardless of whether
> or not there's data available, it returns -EAGAIN. This is incompatible
> with how nonblocking reads should work. If there's data available, it
> should be returned.
> 
> Convert it to use fops->read_iter() so it can handle both nonblocking
> fds and IOCB_NOWAIT, mark it as FMODE_NOWAIT to signify that it's
> compatible with nonblocking reads, and finally have timerfd_read_iter()
> properly check for data availability in nonblocking mode.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> 
> ---
> 
> Can't believe it's this broken... Patch has been tested with a test case
> that was reported via io_uring, and I also ran the ltp timerfd test
> cases and it passes all of those too.

OK pre-coffee email, it's not actually that broken. Which makes me feel
better, because I'm not used to finding stuff like this in core bits,
mostly just the odd drivers.

I'll send a revised version of this with a fixed up changelog, as
supporting IOCB_NOWAIT is still very useful. Won't change the actual
patch, as that's really all it does.
diff mbox series

Patch

diff --git a/fs/timerfd.c b/fs/timerfd.c
index e9c96a0c79f1..9297b82af13d 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -262,20 +262,24 @@  static __poll_t timerfd_poll(struct file *file, poll_table *wait)
 	return events;
 }
 
-static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
-			    loff_t *ppos)
+static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
+	struct file *file = iocb->ki_filp;
 	struct timerfd_ctx *ctx = file->private_data;
 	ssize_t res;
 	u64 ticks = 0;
 
-	if (count < sizeof(ticks))
+	if (iov_iter_count(to) < sizeof(ticks))
 		return -EINVAL;
+
 	spin_lock_irq(&ctx->wqh.lock);
-	if (file->f_flags & O_NONBLOCK)
-		res = -EAGAIN;
-	else
-		res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
+	if (!ctx->ticks) {
+		if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT)
+			res = -EAGAIN;
+		else
+			res = wait_event_interruptible_locked_irq(ctx->wqh,
+								  ctx->ticks);
+	}
 
 	/*
 	 * If clock has changed, we do not care about the
@@ -313,7 +317,7 @@  static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 	}
 	spin_unlock_irq(&ctx->wqh.lock);
 	if (ticks)
-		res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
+		res = copy_to_iter(&ticks, sizeof(ticks), to);
 	return res;
 }
 
@@ -384,7 +388,7 @@  static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 static const struct file_operations timerfd_fops = {
 	.release	= timerfd_release,
 	.poll		= timerfd_poll,
-	.read		= timerfd_read,
+	.read_iter	= timerfd_read_iter,
 	.llseek		= noop_llseek,
 	.show_fdinfo	= timerfd_show,
 	.unlocked_ioctl	= timerfd_ioctl,
@@ -407,6 +411,7 @@  SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 {
 	int ufd;
 	struct timerfd_ctx *ctx;
+	struct file *file;
 
 	/* Check the TFD_* constants for consistency.  */
 	BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
@@ -443,11 +448,22 @@  SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 
 	ctx->moffs = ktime_mono_to_real(0);
 
-	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
-			       O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
-	if (ufd < 0)
+	ufd = get_unused_fd_flags(O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
+	if (ufd < 0) {
 		kfree(ctx);
+		return ufd;
+	}
+
+	file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx,
+				    O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
+	if (IS_ERR(file)) {
+		put_unused_fd(ufd);
+		kfree(ctx);
+		return PTR_ERR(file);
+	}
 
+	file->f_mode |= FMODE_NOWAIT;
+	fd_install(ufd, file);
 	return ufd;
 }