diff mbox series

[068/104] virtiofsd: passthrough_ll: control readdirplus

Message ID 20191212163904.159893-69-dgilbert@redhat.com (mailing list archive)
State New, archived
Headers show
Series virtiofs daemon [all] | expand

Commit Message

Dr. David Alan Gilbert Dec. 12, 2019, 4:38 p.m. UTC
From: Miklos Szeredi <mszeredi@redhat.com>

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 tools/virtiofsd/passthrough_ll.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

Comments

Daniel P. Berrangé Jan. 7, 2020, 11:23 a.m. UTC | #1
On Thu, Dec 12, 2019 at 04:38:28PM +0000, Dr. David Alan Gilbert (git) wrote:
> From: Miklos Szeredi <mszeredi@redhat.com>
>

What is readdirplus and what do we need a command line option to
control it ? What's the user benefit of changing the setting ?

> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> ---
>  tools/virtiofsd/passthrough_ll.c | 7 ++++++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index 0d70a367bd..c3e8bde5cf 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -118,6 +118,8 @@ struct lo_data {
>      double timeout;
>      int cache;
>      int timeout_set;
> +    int readdirplus_set;
> +    int readdirplus_clear;
>      struct lo_inode root; /* protected by lo->mutex */
>      struct lo_map ino_map; /* protected by lo->mutex */
>      struct lo_map dirp_map; /* protected by lo->mutex */
> @@ -141,6 +143,8 @@ static const struct fuse_opt lo_opts[] = {
>      { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
>      { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
>      { "norace", offsetof(struct lo_data, norace), 1 },
> +    { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
> +    { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
>      FUSE_OPT_END
>  };
>  static bool use_syslog = false;
> @@ -479,7 +483,8 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
>          fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
>          conn->want |= FUSE_CAP_FLOCK_LOCKS;
>      }
> -    if (lo->cache == CACHE_NEVER) {
> +    if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) ||
> +        lo->readdirplus_clear) {
>          fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
>          conn->want &= ~FUSE_CAP_READDIRPLUS;
>      }
> -- 
> 2.23.0
> 
> 

Regards,
Daniel
Dr. David Alan Gilbert Jan. 10, 2020, 3:04 p.m. UTC | #2
* Daniel P. Berrangé (berrange@redhat.com) wrote:
> On Thu, Dec 12, 2019 at 04:38:28PM +0000, Dr. David Alan Gilbert (git) wrote:
> > From: Miklos Szeredi <mszeredi@redhat.com>
> >
> 
> What is readdirplus and what do we need a command line option to
> control it ? What's the user benefit of changing the setting ?

cc'ing Miklos who understands this better than me.

My understanding is that readdirplus is a heuristic inherited from NFS
where when you iterate over the directory you also pick up stat() data
for each entry in the directory.  You then cache that stat data
somewhere.
The Plus-ness is that a lot of directory operations involve you stating
each entry (e.g. to figure out if you can access it etc) so rolling it
into one op avoids the separate stat.  The unplus-ness is that it's an
overhead and I think changes some of the caching behaviour.

Dave


> > Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> > ---
> >  tools/virtiofsd/passthrough_ll.c | 7 ++++++-
> >  1 file changed, 6 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> > index 0d70a367bd..c3e8bde5cf 100644
> > --- a/tools/virtiofsd/passthrough_ll.c
> > +++ b/tools/virtiofsd/passthrough_ll.c
> > @@ -118,6 +118,8 @@ struct lo_data {
> >      double timeout;
> >      int cache;
> >      int timeout_set;
> > +    int readdirplus_set;
> > +    int readdirplus_clear;
> >      struct lo_inode root; /* protected by lo->mutex */
> >      struct lo_map ino_map; /* protected by lo->mutex */
> >      struct lo_map dirp_map; /* protected by lo->mutex */
> > @@ -141,6 +143,8 @@ static const struct fuse_opt lo_opts[] = {
> >      { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
> >      { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
> >      { "norace", offsetof(struct lo_data, norace), 1 },
> > +    { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
> > +    { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
> >      FUSE_OPT_END
> >  };
> >  static bool use_syslog = false;
> > @@ -479,7 +483,8 @@ static void lo_init(void *userdata, struct fuse_conn_info *conn)
> >          fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
> >          conn->want |= FUSE_CAP_FLOCK_LOCKS;
> >      }
> > -    if (lo->cache == CACHE_NEVER) {
> > +    if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) ||
> > +        lo->readdirplus_clear) {
> >          fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
> >          conn->want &= ~FUSE_CAP_READDIRPLUS;
> >      }
> > -- 
> > 2.23.0
> > 
> > 
> 
> Regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Miklos Szeredi Jan. 10, 2020, 3:13 p.m. UTC | #3
On Fri, Jan 10, 2020 at 4:04 PM Dr. David Alan Gilbert
<dgilbert@redhat.com> wrote:
>
> * Daniel P. Berrangé (berrange@redhat.com) wrote:
> > On Thu, Dec 12, 2019 at 04:38:28PM +0000, Dr. David Alan Gilbert (git) wrote:
> > > From: Miklos Szeredi <mszeredi@redhat.com>
> > >
> >
> > What is readdirplus and what do we need a command line option to
> > control it ? What's the user benefit of changing the setting ?
>
> cc'ing Miklos who understands this better than me.
>
> My understanding is that readdirplus is a heuristic inherited from NFS
> where when you iterate over the directory you also pick up stat() data
> for each entry in the directory.  You then cache that stat data
> somewhere.
> The Plus-ness is that a lot of directory operations involve you stating
> each entry (e.g. to figure out if you can access it etc) so rolling it
> into one op avoids the separate stat.  The unplus-ness is that it's an
> overhead and I think changes some of the caching behaviour.

Yeah, so either may give better performance and it's hard to pick a
clear winner.  NFS also has an option to control this.

Thanks,
Miklos
Daniel P. Berrangé Jan. 10, 2020, 3:18 p.m. UTC | #4
On Fri, Jan 10, 2020 at 04:13:08PM +0100, Miklos Szeredi wrote:
> On Fri, Jan 10, 2020 at 4:04 PM Dr. David Alan Gilbert
> <dgilbert@redhat.com> wrote:
> >
> > * Daniel P. Berrangé (berrange@redhat.com) wrote:
> > > On Thu, Dec 12, 2019 at 04:38:28PM +0000, Dr. David Alan Gilbert (git) wrote:
> > > > From: Miklos Szeredi <mszeredi@redhat.com>
> > > >
> > >
> > > What is readdirplus and what do we need a command line option to
> > > control it ? What's the user benefit of changing the setting ?
> >
> > cc'ing Miklos who understands this better than me.
> >
> > My understanding is that readdirplus is a heuristic inherited from NFS
> > where when you iterate over the directory you also pick up stat() data
> > for each entry in the directory.  You then cache that stat data
> > somewhere.
> > The Plus-ness is that a lot of directory operations involve you stating
> > each entry (e.g. to figure out if you can access it etc) so rolling it
> > into one op avoids the separate stat.  The unplus-ness is that it's an
> > overhead and I think changes some of the caching behaviour.
> 
> Yeah, so either may give better performance and it's hard to pick a
> clear winner.  NFS also has an option to control this.

IIUC from the man page, the NFS option for controlling this is a client
side mount option. This makes sense as only the client really has knowledge
of whether its workload will benefit.

With this in mind, should the readdirplus control for virtio-fs also be a
guest mount option instead of a host virtiofsd CLI option ? The guest admin
seems best placed to know whether their workload will benefit or not.

Regards,
Daniel
Miklos Szeredi Jan. 10, 2020, 3:30 p.m. UTC | #5
On Fri, Jan 10, 2020 at 4:18 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Fri, Jan 10, 2020 at 04:13:08PM +0100, Miklos Szeredi wrote:
> > On Fri, Jan 10, 2020 at 4:04 PM Dr. David Alan Gilbert
> > <dgilbert@redhat.com> wrote:
> > >
> > > * Daniel P. Berrangé (berrange@redhat.com) wrote:
> > > > On Thu, Dec 12, 2019 at 04:38:28PM +0000, Dr. David Alan Gilbert (git) wrote:
> > > > > From: Miklos Szeredi <mszeredi@redhat.com>
> > > > >
> > > >
> > > > What is readdirplus and what do we need a command line option to
> > > > control it ? What's the user benefit of changing the setting ?
> > >
> > > cc'ing Miklos who understands this better than me.
> > >
> > > My understanding is that readdirplus is a heuristic inherited from NFS
> > > where when you iterate over the directory you also pick up stat() data
> > > for each entry in the directory.  You then cache that stat data
> > > somewhere.
> > > The Plus-ness is that a lot of directory operations involve you stating
> > > each entry (e.g. to figure out if you can access it etc) so rolling it
> > > into one op avoids the separate stat.  The unplus-ness is that it's an
> > > overhead and I think changes some of the caching behaviour.
> >
> > Yeah, so either may give better performance and it's hard to pick a
> > clear winner.  NFS also has an option to control this.
>
> IIUC from the man page, the NFS option for controlling this is a client
> side mount option. This makes sense as only the client really has knowledge
> of whether its workload will benefit.
>
> With this in mind, should the readdirplus control for virtio-fs also be a
> guest mount option instead of a host virtiofsd CLI option ? The guest admin
> seems best placed to know whether their workload will benefit or not.

Definitely.   In fact other options, e.g. ones that control caching,
should probably also be client side (cache=XXX, writeback,
timeout=XXX, etc).

This needs an extension of the INIT message, so options can be passed
to the server.   Added this to our TODO list.

Thanks,
Miklos
Vivek Goyal Jan. 10, 2020, 3:40 p.m. UTC | #6
On Fri, Jan 10, 2020 at 04:30:01PM +0100, Miklos Szeredi wrote:
> On Fri, Jan 10, 2020 at 4:18 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> >
> > On Fri, Jan 10, 2020 at 04:13:08PM +0100, Miklos Szeredi wrote:
> > > On Fri, Jan 10, 2020 at 4:04 PM Dr. David Alan Gilbert
> > > <dgilbert@redhat.com> wrote:
> > > >
> > > > * Daniel P. Berrangé (berrange@redhat.com) wrote:
> > > > > On Thu, Dec 12, 2019 at 04:38:28PM +0000, Dr. David Alan Gilbert (git) wrote:
> > > > > > From: Miklos Szeredi <mszeredi@redhat.com>
> > > > > >
> > > > >
> > > > > What is readdirplus and what do we need a command line option to
> > > > > control it ? What's the user benefit of changing the setting ?
> > > >
> > > > cc'ing Miklos who understands this better than me.
> > > >
> > > > My understanding is that readdirplus is a heuristic inherited from NFS
> > > > where when you iterate over the directory you also pick up stat() data
> > > > for each entry in the directory.  You then cache that stat data
> > > > somewhere.
> > > > The Plus-ness is that a lot of directory operations involve you stating
> > > > each entry (e.g. to figure out if you can access it etc) so rolling it
> > > > into one op avoids the separate stat.  The unplus-ness is that it's an
> > > > overhead and I think changes some of the caching behaviour.
> > >
> > > Yeah, so either may give better performance and it's hard to pick a
> > > clear winner.  NFS also has an option to control this.
> >
> > IIUC from the man page, the NFS option for controlling this is a client
> > side mount option. This makes sense as only the client really has knowledge
> > of whether its workload will benefit.
> >
> > With this in mind, should the readdirplus control for virtio-fs also be a
> > guest mount option instead of a host virtiofsd CLI option ? The guest admin
> > seems best placed to know whether their workload will benefit or not.
> 
> Definitely.   In fact other options, e.g. ones that control caching,
> should probably also be client side (cache=XXX, writeback,
> timeout=XXX, etc).

I am not sure about cache options. So if we want to share a directory
between multiple guests with stronger coherency (cache=none), then admin
should decide that cache=always/auto is not supported on this export.

Also, how will one client know whether there are other clients same
directory with strong coherency requirements and it should use cache=none
instead of cache=always/auto.

Having said that, it also makes sense that client knows its workoad
and can decide if cache=auto works best for it and use that instead.

May be we need both client and server side options. Client will request
certain cache=xxx options and server can deny these if admin decides
not to enable that option for that particular mount.

For example, if admin decides that we can only support cache=none on
this particular dir due to other guest sharing it, then daemon should
be able to deny cache=auto/always requests from client.

Thanks
Vivek
Miklos Szeredi Jan. 10, 2020, 4 p.m. UTC | #7
On Fri, Jan 10, 2020 at 4:40 PM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Fri, Jan 10, 2020 at 04:30:01PM +0100, Miklos Szeredi wrote:
> > On Fri, Jan 10, 2020 at 4:18 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > >
> > > On Fri, Jan 10, 2020 at 04:13:08PM +0100, Miklos Szeredi wrote:
> > > > On Fri, Jan 10, 2020 at 4:04 PM Dr. David Alan Gilbert
> > > > <dgilbert@redhat.com> wrote:
> > > > >
> > > > > * Daniel P. Berrangé (berrange@redhat.com) wrote:
> > > > > > On Thu, Dec 12, 2019 at 04:38:28PM +0000, Dr. David Alan Gilbert (git) wrote:
> > > > > > > From: Miklos Szeredi <mszeredi@redhat.com>
> > > > > > >
> > > > > >
> > > > > > What is readdirplus and what do we need a command line option to
> > > > > > control it ? What's the user benefit of changing the setting ?
> > > > >
> > > > > cc'ing Miklos who understands this better than me.
> > > > >
> > > > > My understanding is that readdirplus is a heuristic inherited from NFS
> > > > > where when you iterate over the directory you also pick up stat() data
> > > > > for each entry in the directory.  You then cache that stat data
> > > > > somewhere.
> > > > > The Plus-ness is that a lot of directory operations involve you stating
> > > > > each entry (e.g. to figure out if you can access it etc) so rolling it
> > > > > into one op avoids the separate stat.  The unplus-ness is that it's an
> > > > > overhead and I think changes some of the caching behaviour.
> > > >
> > > > Yeah, so either may give better performance and it's hard to pick a
> > > > clear winner.  NFS also has an option to control this.
> > >
> > > IIUC from the man page, the NFS option for controlling this is a client
> > > side mount option. This makes sense as only the client really has knowledge
> > > of whether its workload will benefit.
> > >
> > > With this in mind, should the readdirplus control for virtio-fs also be a
> > > guest mount option instead of a host virtiofsd CLI option ? The guest admin
> > > seems best placed to know whether their workload will benefit or not.
> >
> > Definitely.   In fact other options, e.g. ones that control caching,
> > should probably also be client side (cache=XXX, writeback,
> > timeout=XXX, etc).
>
> I am not sure about cache options. So if we want to share a directory
> between multiple guests with stronger coherency (cache=none), then admin
> should decide that cache=always/auto is not supported on this export.
>
> Also, how will one client know whether there are other clients same
> directory with strong coherency requirements and it should use cache=none
> instead of cache=always/auto.
>
> Having said that, it also makes sense that client knows its workoad
> and can decide if cache=auto works best for it and use that instead.
>
> May be we need both client and server side options. Client will request
> certain cache=xxx options and server can deny these if admin decides
> not to enable that option for that particular mount.
>
> For example, if admin decides that we can only support cache=none on
> this particular dir due to other guest sharing it, then daemon should
> be able to deny cache=auto/always requests from client.

Makes sense.  The server dictates policy, the client just passes the
options onto the server.

Thanks,
Miklos
diff mbox series

Patch

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 0d70a367bd..c3e8bde5cf 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -118,6 +118,8 @@  struct lo_data {
     double timeout;
     int cache;
     int timeout_set;
+    int readdirplus_set;
+    int readdirplus_clear;
     struct lo_inode root; /* protected by lo->mutex */
     struct lo_map ino_map; /* protected by lo->mutex */
     struct lo_map dirp_map; /* protected by lo->mutex */
@@ -141,6 +143,8 @@  static const struct fuse_opt lo_opts[] = {
     { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
     { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
     { "norace", offsetof(struct lo_data, norace), 1 },
+    { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
+    { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
     FUSE_OPT_END
 };
 static bool use_syslog = false;
@@ -479,7 +483,8 @@  static void lo_init(void *userdata, struct fuse_conn_info *conn)
         fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
         conn->want |= FUSE_CAP_FLOCK_LOCKS;
     }
-    if (lo->cache == CACHE_NEVER) {
+    if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) ||
+        lo->readdirplus_clear) {
         fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
         conn->want &= ~FUSE_CAP_READDIRPLUS;
     }