diff mbox series

[2/3] vfs_getdents/struct dir_context: add flags field

Message ID 20230711114027.59945-3-hao.xu@linux.dev (mailing list archive)
State New
Headers show
Series io_uring getdents | expand

Commit Message

Hao Xu July 11, 2023, 11:40 a.m. UTC
From: Hao Xu <howeyxu@tencent.com>

The flags will allow passing DIR_CONTEXT_F_NOWAIT to iterate()
implementations that support it (as signaled through FMODE_NWAIT
in file->f_mode)

Notes:
- considered using IOCB_NOWAIT but if we add more flags later it
would be confusing to keep track of which values are valid, use
dedicated flags
- might want to check ctx.flags & DIR_CONTEXT_F_NOWAIT is only set
when file->f_mode & FMODE_NOWAIT in iterate_dir() as e.g. WARN_ONCE?

Co-developed-by: Dominique Martinet <asmadeus@codewreck.org>
Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
Signed-off-by: Hao Xu <howeyxu@tencent.com>
---
 fs/internal.h      | 2 +-
 fs/readdir.c       | 6 ++++--
 include/linux/fs.h | 8 ++++++++
 3 files changed, 13 insertions(+), 3 deletions(-)

Comments

Christian Brauner July 12, 2023, 11:31 a.m. UTC | #1
On Tue, Jul 11, 2023 at 07:40:26PM +0800, Hao Xu wrote:
> From: Hao Xu <howeyxu@tencent.com>
> 
> The flags will allow passing DIR_CONTEXT_F_NOWAIT to iterate()
> implementations that support it (as signaled through FMODE_NWAIT
> in file->f_mode)
> 
> Notes:
> - considered using IOCB_NOWAIT but if we add more flags later it
> would be confusing to keep track of which values are valid, use
> dedicated flags
> - might want to check ctx.flags & DIR_CONTEXT_F_NOWAIT is only set
> when file->f_mode & FMODE_NOWAIT in iterate_dir() as e.g. WARN_ONCE?
> 
> Co-developed-by: Dominique Martinet <asmadeus@codewreck.org>
> Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
> Signed-off-by: Hao Xu <howeyxu@tencent.com>
> ---
>  fs/internal.h      | 2 +-
>  fs/readdir.c       | 6 ++++--
>  include/linux/fs.h | 8 ++++++++
>  3 files changed, 13 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/internal.h b/fs/internal.h
> index b1f66e52d61b..7508d485c655 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -311,4 +311,4 @@ void mnt_idmap_put(struct mnt_idmap *idmap);
>  struct linux_dirent64;
>  
>  int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
> -		 unsigned int count);
> +		 unsigned int count, unsigned long flags);
> diff --git a/fs/readdir.c b/fs/readdir.c
> index 9592259b7e7f..b80caf4c9321 100644
> --- a/fs/readdir.c
> +++ b/fs/readdir.c
> @@ -358,12 +358,14 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
>   * @file    : pointer to file struct of directory
>   * @dirent  : pointer to user directory structure
>   * @count   : size of buffer
> + * @flags   : additional dir_context flags

Why do you need that flag argument. The ->iterate{_shared}() i_op gets
passed the file so the filesystem can check
@file->f_mode & FMODE_NOWAIT, no?

>   */
>  int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
> -		 unsigned int count)
> +		 unsigned int count, unsigned long flags)
>  {
>  	struct getdents_callback64 buf = {
>  		.ctx.actor = filldir64,
> +		.ctx.flags = flags,
>  		.count = count,
>  		.current_dir = dirent
>  	};
> @@ -395,7 +397,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd,
>  	if (!f.file)
>  		return -EBADF;
>  
> -	error = vfs_getdents(f.file, dirent, count);
> +	error = vfs_getdents(f.file, dirent, count, 0);
>  
>  	fdput_pos(f);
>  	return error;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 6867512907d6..f3e315e8efdd 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1719,8 +1719,16 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
>  struct dir_context {
>  	filldir_t actor;
>  	loff_t pos;
> +	unsigned long flags;
>  };
>  
> +/*
> + * flags for dir_context flags
> + * DIR_CONTEXT_F_NOWAIT: Request non-blocking iterate
> + *                       (requires file->f_mode & FMODE_NOWAIT)
> + */
> +#define DIR_CONTEXT_F_NOWAIT	(1 << 0)

Even if this should be needed, I don't think this needs to use a full
flags field.
Dominique Martinet July 12, 2023, 4:02 p.m. UTC | #2
(replying as that was my code)

Christian Brauner wrote on Wed, Jul 12, 2023 at 01:31:57PM +0200:
> On Tue, Jul 11, 2023 at 07:40:26PM +0800, Hao Xu wrote:
> > diff --git a/fs/readdir.c b/fs/readdir.c
> > index 9592259b7e7f..b80caf4c9321 100644
> > --- a/fs/readdir.c
> > +++ b/fs/readdir.c
> > @@ -358,12 +358,14 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
> >   * @file    : pointer to file struct of directory
> >   * @dirent  : pointer to user directory structure
> >   * @count   : size of buffer
> > + * @flags   : additional dir_context flags
> 
> Why do you need that flag argument. The ->iterate{_shared}() i_op gets
> passed the file so the filesystem can check
> @file->f_mode & FMODE_NOWAIT, no?

As far as I understand it, it's not because the fd is capable of NOWAIT
that uring will call it in NOWAIT mode:
- if the first getdents call returned -EAGAIN it'll also fall back to
waiting in a separate thread (there's no "getdents poll" implementation,
so there's no other way of rescheduling a non-blocking call)
- it's also possible for the user to specify it wants IOSQE_ASYNC in the
sqe->flags (admitedly I'm not sure why would anyone do this, but that's
useful for benchmarks at least -- it skips the initial NOWAIT call
before falling back to threaded waiting call)

Even outsides of io_uring, a call to getdents64 should block, so even if
the filesystem supports non-blocking it should be explicitely required
by the caller.


> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -1719,8 +1719,16 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
> >  struct dir_context {
> >  	filldir_t actor;
> >  	loff_t pos;
> > +	unsigned long flags;
> >  };
> >  
> > +/*
> > + * flags for dir_context flags
> > + * DIR_CONTEXT_F_NOWAIT: Request non-blocking iterate
> > + *                       (requires file->f_mode & FMODE_NOWAIT)
> > + */
> > +#define DIR_CONTEXT_F_NOWAIT	(1 << 0)
> 
> Even if this should be needed, I don't think this needs to use a full
> flags field.

I also got a request to somehow pass back "are there more entries to
read after this call" to the caller in my v1, and I had done this as a
second flag -- in general my understanding was that it's better to add
flags than a specific boolean for extensibility but I have no opinon
here.
Hao Xu July 13, 2023, 4:12 a.m. UTC | #3
Hi Christian and Dominique,


On 7/13/23 00:02, Dominique Martinet wrote:
> (replying as that was my code)
>
> Christian Brauner wrote on Wed, Jul 12, 2023 at 01:31:57PM +0200:
>> On Tue, Jul 11, 2023 at 07:40:26PM +0800, Hao Xu wrote:
>>> diff --git a/fs/readdir.c b/fs/readdir.c
>>> index 9592259b7e7f..b80caf4c9321 100644
>>> --- a/fs/readdir.c
>>> +++ b/fs/readdir.c
>>> @@ -358,12 +358,14 @@ static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
>>>    * @file    : pointer to file struct of directory
>>>    * @dirent  : pointer to user directory structure
>>>    * @count   : size of buffer
>>> + * @flags   : additional dir_context flags
>> Why do you need that flag argument. The ->iterate{_shared}() i_op gets
>> passed the file so the filesystem can check
>> @file->f_mode & FMODE_NOWAIT, no?
> As far as I understand it, it's not because the fd is capable of NOWAIT
> that uring will call it in NOWAIT mode:
> - if the first getdents call returned -EAGAIN it'll also fall back to
> waiting in a separate thread (there's no "getdents poll" implementation,
> so there's no other way of rescheduling a non-blocking call)
> - it's also possible for the user to specify it wants IOSQE_ASYNC in the
> sqe->flags (admitedly I'm not sure why would anyone do this, but that's
> useful for benchmarks at least -- it skips the initial NOWAIT call
> before falling back to threaded waiting call)
>
> Even outsides of io_uring, a call to getdents64 should block, so even if
> the filesystem supports non-blocking it should be explicitely required
> by the caller.


Hi Christian,

My understanding of FMODE_NOWAIT is "this file support nowait IO". Just 
like what Dominique

said, io_uring issue a request two rounds(let's simplify it here since 
no apoll or task work involved),

and the first round isĀ  a nowait/nonblock try, the second one is an 
offload-ed block try. So besides

a "ability" flag(FMODE_NOWAIT), we still need a "one-round" flag to 
point out that "we do need to

do nowait IO this time".


>
>>> --- a/include/linux/fs.h
>>> +++ b/include/linux/fs.h
>>> @@ -1719,8 +1719,16 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
>>>   struct dir_context {
>>>   	filldir_t actor;
>>>   	loff_t pos;
>>> +	unsigned long flags;
>>>   };
>>>   
>>> +/*
>>> + * flags for dir_context flags
>>> + * DIR_CONTEXT_F_NOWAIT: Request non-blocking iterate
>>> + *                       (requires file->f_mode & FMODE_NOWAIT)
>>> + */
>>> +#define DIR_CONTEXT_F_NOWAIT	(1 << 0)
>> Even if this should be needed, I don't think this needs to use a full
>> flags field.
> I also got a request to somehow pass back "are there more entries to
> read after this call" to the caller in my v1, and I had done this as a
> second flag -- in general my understanding was that it's better to add
> flags than a specific boolean for extensibility but I have no opinon
> here.


I've no strong opinion here, I kept it here as a flag variable to make it

more extendable in the future.


Thanks,

Hao
diff mbox series

Patch

diff --git a/fs/internal.h b/fs/internal.h
index b1f66e52d61b..7508d485c655 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -311,4 +311,4 @@  void mnt_idmap_put(struct mnt_idmap *idmap);
 struct linux_dirent64;
 
 int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
-		 unsigned int count);
+		 unsigned int count, unsigned long flags);
diff --git a/fs/readdir.c b/fs/readdir.c
index 9592259b7e7f..b80caf4c9321 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -358,12 +358,14 @@  static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
  * @file    : pointer to file struct of directory
  * @dirent  : pointer to user directory structure
  * @count   : size of buffer
+ * @flags   : additional dir_context flags
  */
 int vfs_getdents(struct file *file, struct linux_dirent64 __user *dirent,
-		 unsigned int count)
+		 unsigned int count, unsigned long flags)
 {
 	struct getdents_callback64 buf = {
 		.ctx.actor = filldir64,
+		.ctx.flags = flags,
 		.count = count,
 		.current_dir = dirent
 	};
@@ -395,7 +397,7 @@  SYSCALL_DEFINE3(getdents64, unsigned int, fd,
 	if (!f.file)
 		return -EBADF;
 
-	error = vfs_getdents(f.file, dirent, count);
+	error = vfs_getdents(f.file, dirent, count, 0);
 
 	fdput_pos(f);
 	return error;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..f3e315e8efdd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1719,8 +1719,16 @@  typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
 struct dir_context {
 	filldir_t actor;
 	loff_t pos;
+	unsigned long flags;
 };
 
+/*
+ * flags for dir_context flags
+ * DIR_CONTEXT_F_NOWAIT: Request non-blocking iterate
+ *                       (requires file->f_mode & FMODE_NOWAIT)
+ */
+#define DIR_CONTEXT_F_NOWAIT	(1 << 0)
+
 /*
  * These flags let !MMU mmap() govern direct device mapping vs immediate
  * copying more easily for MAP_PRIVATE, especially for ROM filesystems.