diff mbox series

[2/4] timerfd: convert to ->read_iter()

Message ID 20240409152438.77960-3-axboe@kernel.dk (mailing list archive)
State New
Headers show
Series Convert fs drivers to ->read_iter() | expand

Commit Message

Jens Axboe April 9, 2024, 3:22 p.m. UTC
Switch timerfd to using fops->read_iter(), so it can support not just
O_NONBLOCK but IOCB_NOWAIT as well. With the latter, users like io_uring
interact with timerfds a lot better, as they can be driven purely
by the poll trigger.

Manually get and install the required fd, so that FMODE_NOWAIT can be
set before the file is installed into the file table.

No functional changes intended in this patch, it's purely a straight
conversion to using the read iterator method.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/timerfd.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

Comments

Jens Axboe April 10, 2024, 10:27 p.m. UTC | #1
On 4/9/24 9:22 AM, Jens Axboe wrote:
> @@ -312,8 +313,8 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
>  		ctx->ticks = 0;
>  	}
>  	spin_unlock_irq(&ctx->wqh.lock);
> -	if (ticks)
> -		res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
> +	if (ticks && !copy_to_iter_full(&ticks, sizeof(ticks), to))
> +		res = -EFAULT;
>  	return res;
>  }

Dumb thinko here, as that should be:

if (ticks) {                                                            
	res = copy_to_iter(&ticks, sizeof(ticks), to);                  
	if (!res)                                                       
		res = -EFAULT;                                          
}            

I've updated my branch, just a heads-up. Odd how it passing testing,
guess I got stack lucky...
Marek Szyprowski April 11, 2024, 11:40 a.m. UTC | #2
Hi,

On 11.04.2024 00:27, Jens Axboe wrote:
> On 4/9/24 9:22 AM, Jens Axboe wrote:
>> @@ -312,8 +313,8 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
>>   		ctx->ticks = 0;
>>   	}
>>   	spin_unlock_irq(&ctx->wqh.lock);
>> -	if (ticks)
>> -		res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
>> +	if (ticks && !copy_to_iter_full(&ticks, sizeof(ticks), to))
>> +		res = -EFAULT;
>>   	return res;
>>   }
> Dumb thinko here, as that should be:
>
> if (ticks) {
> 	res = copy_to_iter(&ticks, sizeof(ticks), to);
> 	if (!res)
> 		res = -EFAULT;
> }
>
> I've updated my branch, just a heads-up. Odd how it passing testing,
> guess I got stack lucky...

The old version got its way into today's linux-next and bisecting the 
boot issues directed me here. There is nothing more to report, but I can 
confirm that the above change indeed fixes the problems observed on 
next-20240411.

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>

I hope that tomorrow's linux-next will have the correct version of this 
patch.

Best regards
Jens Axboe April 11, 2024, 4:32 p.m. UTC | #3
On 4/11/24 5:40 AM, Marek Szyprowski wrote:
> Hi,
> 
> On 11.04.2024 00:27, Jens Axboe wrote:
>> On 4/9/24 9:22 AM, Jens Axboe wrote:
>>> @@ -312,8 +313,8 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
>>>   		ctx->ticks = 0;
>>>   	}
>>>   	spin_unlock_irq(&ctx->wqh.lock);
>>> -	if (ticks)
>>> -		res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
>>> +	if (ticks && !copy_to_iter_full(&ticks, sizeof(ticks), to))
>>> +		res = -EFAULT;
>>>   	return res;
>>>   }
>> Dumb thinko here, as that should be:
>>
>> if (ticks) {
>> 	res = copy_to_iter(&ticks, sizeof(ticks), to);
>> 	if (!res)
>> 		res = -EFAULT;
>> }
>>
>> I've updated my branch, just a heads-up. Odd how it passing testing,
>> guess I got stack lucky...
> 
> The old version got its way into today's linux-next and bisecting the 
> boot issues directed me here. There is nothing more to report, but I can 
> confirm that the above change indeed fixes the problems observed on 
> next-20240411.

Yeah sorry about that :(

> Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>

Thanks!

> I hope that tomorrow's linux-next will have the correct version of this 
> patch.

It should, the branches have been updated.
diff mbox series

Patch

diff --git a/fs/timerfd.c b/fs/timerfd.c
index e9c96a0c79f1..f0d82dcbffef 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -262,17 +262,18 @@  static __poll_t timerfd_poll(struct file *file, poll_table *wait)
 	return events;
 }
 
-static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
-			    loff_t *ppos)
+static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
+	struct file *file = iocb->ki_filp;
 	struct timerfd_ctx *ctx = file->private_data;
 	ssize_t res;
 	u64 ticks = 0;
 
-	if (count < sizeof(ticks))
+	if (iov_iter_count(to) < sizeof(ticks))
 		return -EINVAL;
+
 	spin_lock_irq(&ctx->wqh.lock);
-	if (file->f_flags & O_NONBLOCK)
+	if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT)
 		res = -EAGAIN;
 	else
 		res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
@@ -312,8 +313,8 @@  static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 		ctx->ticks = 0;
 	}
 	spin_unlock_irq(&ctx->wqh.lock);
-	if (ticks)
-		res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
+	if (ticks && !copy_to_iter_full(&ticks, sizeof(ticks), to))
+		res = -EFAULT;
 	return res;
 }
 
@@ -384,7 +385,7 @@  static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
 static const struct file_operations timerfd_fops = {
 	.release	= timerfd_release,
 	.poll		= timerfd_poll,
-	.read		= timerfd_read,
+	.read_iter	= timerfd_read_iter,
 	.llseek		= noop_llseek,
 	.show_fdinfo	= timerfd_show,
 	.unlocked_ioctl	= timerfd_ioctl,
@@ -407,6 +408,7 @@  SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 {
 	int ufd;
 	struct timerfd_ctx *ctx;
+	struct file *file;
 
 	/* Check the TFD_* constants for consistency.  */
 	BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
@@ -443,11 +445,22 @@  SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
 
 	ctx->moffs = ktime_mono_to_real(0);
 
-	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
-			       O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
-	if (ufd < 0)
+	ufd = get_unused_fd_flags(flags & TFD_SHARED_FCNTL_FLAGS);
+	if (ufd < 0) {
 		kfree(ctx);
+		return ufd;
+	}
+
+	file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx,
+				    O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
+	if (IS_ERR(file)) {
+		put_unused_fd(ufd);
+		kfree(ctx);
+		return PTR_ERR(file);
+	}
 
+	file->f_mode |= FMODE_NOWAIT;
+	fd_install(ufd, file);
 	return ufd;
 }