Message ID | 20250122215528.1270478-3-joannelkoong@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fuse: add kernel-enforced request timeout option | expand |
Hi Joanne, On Wed, Jan 22 2025, Joanne Koong wrote: > Introduce two new sysctls, "default_request_timeout" and > "max_request_timeout". These control how long (in seconds) a server can > take to reply to a request. If the server does not reply by the timeout, > then the connection will be aborted. The upper bound on these sysctl > values is 65535. > > "default_request_timeout" sets the default timeout if no timeout is > specified by the fuse server on mount. 0 (default) indicates no default > timeout should be enforced. If the server did specify a timeout, then > default_request_timeout will be ignored. > > "max_request_timeout" sets the max amount of time the server may take to > reply to a request. 0 (default) indicates no maximum timeout. If > max_request_timeout is set and the fuse server attempts to set a > timeout greater than max_request_timeout, the system will use > max_request_timeout as the timeout. Similarly, if default_request_timeout > is greater than max_request_timeout, the system will use > max_request_timeout as the timeout. If the server does not request a > timeout and default_request_timeout is set to 0 but max_request_timeout > is set, then the timeout will be max_request_timeout. > > Please note that these timeouts are not 100% precise. The request may > take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the set max > timeout due to how it's internally implemented. > > $ sysctl -a | grep fuse.default_request_timeout > fs.fuse.default_request_timeout = 0 > > $ echo 65536 | sudo tee /proc/sys/fs/fuse/default_request_timeout > tee: /proc/sys/fs/fuse/default_request_timeout: Invalid argument > > $ echo 65535 | sudo tee /proc/sys/fs/fuse/default_request_timeout > 65535 > > $ sysctl -a | grep fuse.default_request_timeout > fs.fuse.default_request_timeout = 65535 > > $ echo 0 | sudo tee /proc/sys/fs/fuse/default_request_timeout > 0 > > $ sysctl -a | grep fuse.default_request_timeout > fs.fuse.default_request_timeout = 0 > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > Reviewed-by: Bernd Schubert <bschubert@ddn.com> > Reviewed-by: Josef Bacik <josef@toxicpanda.com> > Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> > --- > Documentation/admin-guide/sysctl/fs.rst | 25 ++++++++++++++++++++++ > fs/fuse/fuse_i.h | 10 +++++++++ > fs/fuse/inode.c | 28 +++++++++++++++++++++++-- > fs/fuse/sysctl.c | 24 +++++++++++++++++++++ > 4 files changed, 85 insertions(+), 2 deletions(-) > > diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst > index f5ec6c9312e1..35aeb30bed8b 100644 > --- a/Documentation/admin-guide/sysctl/fs.rst > +++ b/Documentation/admin-guide/sysctl/fs.rst > @@ -347,3 +347,28 @@ filesystems: > ``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for > setting/getting the maximum number of pages that can be used for servicing > requests in FUSE. > + > +``/proc/sys/fs/fuse/default_request_timeout`` is a read/write file for > +setting/getting the default timeout (in seconds) for a fuse server to > +reply to a kernel-issued request in the event where the server did not > +specify a timeout at mount. If the server set a timeout, > +then default_request_timeout will be ignored. The default > +"default_request_timeout" is set to 0. 0 indicates no default timeout. > +The maximum value that can be set is 65535. > + > +``/proc/sys/fs/fuse/max_request_timeout`` is a read/write file for > +setting/getting the maximum timeout (in seconds) for a fuse server to > +reply to a kernel-issued request. A value greater than 0 automatically opts > +the server into a timeout that will be set to at most "max_request_timeout", > +even if the server did not specify a timeout and default_request_timeout is > +set to 0. If max_request_timeout is greater than 0 and the server set a timeout > +greater than max_request_timeout or default_request_timeout is set to a value > +greater than max_request_timeout, the system will use max_request_timeout as the > +timeout. 0 indicates no max request timeout. The maximum value that can be set > +is 65535. > + > +For timeouts, if the server does not respond to the request by the time > +the set timeout elapses, then the connection to the fuse server will be aborted. > +Please note that the timeouts are not 100% precise (eg you may set 60 seconds but > +the timeout may kick in after 70 seconds). The upper margin of error for the > +timeout is roughly FUSE_TIMEOUT_TIMER_FREQ seconds. > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index 1321cc4ed2ab..e5114831798f 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -49,6 +49,16 @@ extern const unsigned long fuse_timeout_timer_freq; > > /** Maximum of max_pages received in init_out */ > extern unsigned int fuse_max_pages_limit; > +/* > + * Default timeout (in seconds) for the server to reply to a request > + * before the connection is aborted, if no timeout was specified on mount. > + */ > +extern unsigned int fuse_default_req_timeout; > +/* > + * Max timeout (in seconds) for the server to reply to a request before > + * the connection is aborted. > + */ > +extern unsigned int fuse_max_req_timeout; > > /** List of active connections */ > extern struct list_head fuse_conn_list; > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c > index 79ebeb60015c..4e36d99fae52 100644 > --- a/fs/fuse/inode.c > +++ b/fs/fuse/inode.c > @@ -37,6 +37,9 @@ DEFINE_MUTEX(fuse_mutex); > static int set_global_limit(const char *val, const struct kernel_param *kp); > > unsigned int fuse_max_pages_limit = 256; > +/* default is no timeout */ > +unsigned int fuse_default_req_timeout = 0; > +unsigned int fuse_max_req_timeout = 0; > > unsigned max_user_bgreq; > module_param_call(max_user_bgreq, set_global_limit, param_get_uint, > @@ -1268,6 +1271,24 @@ static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) > fuse_timeout_timer_freq); > } > > +static void init_server_timeout(struct fuse_conn *fc, unsigned int timeout) > +{ > + if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) > + return; > + > + if (!timeout) > + timeout = fuse_default_req_timeout; > + > + if (fuse_max_req_timeout) { > + if (timeout) > + timeout = min(fuse_max_req_timeout, timeout); Sorry for this late comment (this is v12 already!), but I wonder if it would be worth to use clamp() instead of min(). Limiting this value to the range [FUSE_TIMEOUT_TIMER_FREQ, fuxe_max_req_timeout] would prevent accidentally setting the timeout to a very low value. There are also some white-spaces/tabs issues with the other patch, which are worth fixing before merging. Other than that, feel free to add my Reviewed-by: Luis Henriques <luis@igalia.com> Cheers,
On Thu, 23 Jan 2025 at 10:21, Luis Henriques <luis@igalia.com> wrote: > > Hi Joanne, > > On Wed, Jan 22 2025, Joanne Koong wrote: > > > Introduce two new sysctls, "default_request_timeout" and > > "max_request_timeout". These control how long (in seconds) a server can > > take to reply to a request. If the server does not reply by the timeout, > > then the connection will be aborted. The upper bound on these sysctl > > values is 65535. > > > > "default_request_timeout" sets the default timeout if no timeout is > > specified by the fuse server on mount. 0 (default) indicates no default > > timeout should be enforced. If the server did specify a timeout, then > > default_request_timeout will be ignored. > > > > "max_request_timeout" sets the max amount of time the server may take to > > reply to a request. 0 (default) indicates no maximum timeout. If > > max_request_timeout is set and the fuse server attempts to set a > > timeout greater than max_request_timeout, the system will use > > max_request_timeout as the timeout. Similarly, if default_request_timeout > > is greater than max_request_timeout, the system will use > > max_request_timeout as the timeout. If the server does not request a > > timeout and default_request_timeout is set to 0 but max_request_timeout > > is set, then the timeout will be max_request_timeout. > > > > Please note that these timeouts are not 100% precise. The request may > > take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the set max > > timeout due to how it's internally implemented. > > > > $ sysctl -a | grep fuse.default_request_timeout > > fs.fuse.default_request_timeout = 0 > > > > $ echo 65536 | sudo tee /proc/sys/fs/fuse/default_request_timeout > > tee: /proc/sys/fs/fuse/default_request_timeout: Invalid argument > > > > $ echo 65535 | sudo tee /proc/sys/fs/fuse/default_request_timeout > > 65535 > > > > $ sysctl -a | grep fuse.default_request_timeout > > fs.fuse.default_request_timeout = 65535 > > > > $ echo 0 | sudo tee /proc/sys/fs/fuse/default_request_timeout > > 0 > > > > $ sysctl -a | grep fuse.default_request_timeout > > fs.fuse.default_request_timeout = 0 > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > Reviewed-by: Bernd Schubert <bschubert@ddn.com> > > Reviewed-by: Josef Bacik <josef@toxicpanda.com> > > Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> Thanks, applied and pushed with some cleanups including Luis's clamp idea. Miklos
On 1/23/25 15:28, Miklos Szeredi wrote: > On Thu, 23 Jan 2025 at 10:21, Luis Henriques <luis@igalia.com> wrote: >> >> Hi Joanne, >> >> On Wed, Jan 22 2025, Joanne Koong wrote: >> >>> Introduce two new sysctls, "default_request_timeout" and >>> "max_request_timeout". These control how long (in seconds) a server can >>> take to reply to a request. If the server does not reply by the timeout, >>> then the connection will be aborted. The upper bound on these sysctl >>> values is 65535. >>> >>> "default_request_timeout" sets the default timeout if no timeout is >>> specified by the fuse server on mount. 0 (default) indicates no default >>> timeout should be enforced. If the server did specify a timeout, then >>> default_request_timeout will be ignored. >>> >>> "max_request_timeout" sets the max amount of time the server may take to >>> reply to a request. 0 (default) indicates no maximum timeout. If >>> max_request_timeout is set and the fuse server attempts to set a >>> timeout greater than max_request_timeout, the system will use >>> max_request_timeout as the timeout. Similarly, if default_request_timeout >>> is greater than max_request_timeout, the system will use >>> max_request_timeout as the timeout. If the server does not request a >>> timeout and default_request_timeout is set to 0 but max_request_timeout >>> is set, then the timeout will be max_request_timeout. >>> >>> Please note that these timeouts are not 100% precise. The request may >>> take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the set max >>> timeout due to how it's internally implemented. >>> >>> $ sysctl -a | grep fuse.default_request_timeout >>> fs.fuse.default_request_timeout = 0 >>> >>> $ echo 65536 | sudo tee /proc/sys/fs/fuse/default_request_timeout >>> tee: /proc/sys/fs/fuse/default_request_timeout: Invalid argument >>> >>> $ echo 65535 | sudo tee /proc/sys/fs/fuse/default_request_timeout >>> 65535 >>> >>> $ sysctl -a | grep fuse.default_request_timeout >>> fs.fuse.default_request_timeout = 65535 >>> >>> $ echo 0 | sudo tee /proc/sys/fs/fuse/default_request_timeout >>> 0 >>> >>> $ sysctl -a | grep fuse.default_request_timeout >>> fs.fuse.default_request_timeout = 0 >>> >>> Signed-off-by: Joanne Koong <joannelkoong@gmail.com> >>> Reviewed-by: Bernd Schubert <bschubert@ddn.com> >>> Reviewed-by: Josef Bacik <josef@toxicpanda.com> >>> Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> > > Thanks, applied and pushed with some cleanups including Luis's clamp idea. Hi Miklos, I don't think the timeouts do work with io-uring yet, I'm not sure yet if I have time to work on that today or tomorrow (on something else right now, I can try, but no promises). How shall we handle it, if I don't manage in time? Thanks, Bernd
On Thu, 23 Jan 2025 at 15:41, Bernd Schubert <bschubert@ddn.com> wrote: > I don't think the timeouts do work with io-uring yet, I'm not sure > yet if I have time to work on that today or tomorrow (on something > else right now, I can try, but no promises). > > How shall we handle it, if I don't manage in time? Okay. Let's just put the timeout patches on hold until this is resolved. Thanks, Miklos
On Thu, Jan 23, 2025 at 6:42 AM Bernd Schubert <bschubert@ddn.com> wrote: > > > > On 1/23/25 15:28, Miklos Szeredi wrote: > > On Thu, 23 Jan 2025 at 10:21, Luis Henriques <luis@igalia.com> wrote: > >> > >> Hi Joanne, > >> > >> On Wed, Jan 22 2025, Joanne Koong wrote: > >> > >>> Introduce two new sysctls, "default_request_timeout" and > >>> "max_request_timeout". These control how long (in seconds) a server can > >>> take to reply to a request. If the server does not reply by the timeout, > >>> then the connection will be aborted. The upper bound on these sysctl > >>> values is 65535. > >>> > >>> "default_request_timeout" sets the default timeout if no timeout is > >>> specified by the fuse server on mount. 0 (default) indicates no default > >>> timeout should be enforced. If the server did specify a timeout, then > >>> default_request_timeout will be ignored. > >>> > >>> "max_request_timeout" sets the max amount of time the server may take to > >>> reply to a request. 0 (default) indicates no maximum timeout. If > >>> max_request_timeout is set and the fuse server attempts to set a > >>> timeout greater than max_request_timeout, the system will use > >>> max_request_timeout as the timeout. Similarly, if default_request_timeout > >>> is greater than max_request_timeout, the system will use > >>> max_request_timeout as the timeout. If the server does not request a > >>> timeout and default_request_timeout is set to 0 but max_request_timeout > >>> is set, then the timeout will be max_request_timeout. > >>> > >>> Please note that these timeouts are not 100% precise. The request may > >>> take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the set max > >>> timeout due to how it's internally implemented. > >>> > >>> $ sysctl -a | grep fuse.default_request_timeout > >>> fs.fuse.default_request_timeout = 0 > >>> > >>> $ echo 65536 | sudo tee /proc/sys/fs/fuse/default_request_timeout > >>> tee: /proc/sys/fs/fuse/default_request_timeout: Invalid argument > >>> > >>> $ echo 65535 | sudo tee /proc/sys/fs/fuse/default_request_timeout > >>> 65535 > >>> > >>> $ sysctl -a | grep fuse.default_request_timeout > >>> fs.fuse.default_request_timeout = 65535 > >>> > >>> $ echo 0 | sudo tee /proc/sys/fs/fuse/default_request_timeout > >>> 0 > >>> > >>> $ sysctl -a | grep fuse.default_request_timeout > >>> fs.fuse.default_request_timeout = 0 > >>> > >>> Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > >>> Reviewed-by: Bernd Schubert <bschubert@ddn.com> > >>> Reviewed-by: Josef Bacik <josef@toxicpanda.com> > >>> Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> > > > > Thanks, applied and pushed with some cleanups including Luis's clamp idea. > > Hi Miklos, > > I don't think the timeouts do work with io-uring yet, I'm not sure > yet if I have time to work on that today or tomorrow (on something > else right now, I can try, but no promises). Hi Bernd, What are your thoughts on what is missing on the io-uring side for timeouts? If a request times out, it will abort the connection and AFAICT, the abort logic should already be fine for io-uring, as users can currently abort the connection through the sysfs interface and there's no internal difference in aborting through sysfs vs timeouts. Thanks, Joanne > > How shall we handle it, if I don't manage in time? > > > Thanks, > Bernd >
On Thu, Jan 23, 2025 at 1:21 AM Luis Henriques <luis@igalia.com> wrote: > > Hi Joanne, > > On Wed, Jan 22 2025, Joanne Koong wrote: > > > Introduce two new sysctls, "default_request_timeout" and > > "max_request_timeout". These control how long (in seconds) a server can > > take to reply to a request. If the server does not reply by the timeout, > > then the connection will be aborted. The upper bound on these sysctl > > values is 65535. > > > > "default_request_timeout" sets the default timeout if no timeout is > > specified by the fuse server on mount. 0 (default) indicates no default > > timeout should be enforced. If the server did specify a timeout, then > > default_request_timeout will be ignored. > > > > "max_request_timeout" sets the max amount of time the server may take to > > reply to a request. 0 (default) indicates no maximum timeout. If > > max_request_timeout is set and the fuse server attempts to set a > > timeout greater than max_request_timeout, the system will use > > max_request_timeout as the timeout. Similarly, if default_request_timeout > > is greater than max_request_timeout, the system will use > > max_request_timeout as the timeout. If the server does not request a > > timeout and default_request_timeout is set to 0 but max_request_timeout > > is set, then the timeout will be max_request_timeout. > > > > Please note that these timeouts are not 100% precise. The request may > > take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the set max > > timeout due to how it's internally implemented. > > > > $ sysctl -a | grep fuse.default_request_timeout > > fs.fuse.default_request_timeout = 0 > > > > $ echo 65536 | sudo tee /proc/sys/fs/fuse/default_request_timeout > > tee: /proc/sys/fs/fuse/default_request_timeout: Invalid argument > > > > $ echo 65535 | sudo tee /proc/sys/fs/fuse/default_request_timeout > > 65535 > > > > $ sysctl -a | grep fuse.default_request_timeout > > fs.fuse.default_request_timeout = 65535 > > > > $ echo 0 | sudo tee /proc/sys/fs/fuse/default_request_timeout > > 0 > > > > $ sysctl -a | grep fuse.default_request_timeout > > fs.fuse.default_request_timeout = 0 > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > Reviewed-by: Bernd Schubert <bschubert@ddn.com> > > Reviewed-by: Josef Bacik <josef@toxicpanda.com> > > Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> > > --- > > Documentation/admin-guide/sysctl/fs.rst | 25 ++++++++++++++++++++++ > > fs/fuse/fuse_i.h | 10 +++++++++ > > fs/fuse/inode.c | 28 +++++++++++++++++++++++-- > > fs/fuse/sysctl.c | 24 +++++++++++++++++++++ > > 4 files changed, 85 insertions(+), 2 deletions(-) > > > > diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst > > index f5ec6c9312e1..35aeb30bed8b 100644 > > --- a/Documentation/admin-guide/sysctl/fs.rst > > +++ b/Documentation/admin-guide/sysctl/fs.rst > > @@ -347,3 +347,28 @@ filesystems: > > ``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for > > setting/getting the maximum number of pages that can be used for servicing > > requests in FUSE. > > + > > +``/proc/sys/fs/fuse/default_request_timeout`` is a read/write file for > > +setting/getting the default timeout (in seconds) for a fuse server to > > +reply to a kernel-issued request in the event where the server did not > > +specify a timeout at mount. If the server set a timeout, > > +then default_request_timeout will be ignored. The default > > +"default_request_timeout" is set to 0. 0 indicates no default timeout. > > +The maximum value that can be set is 65535. > > + > > +``/proc/sys/fs/fuse/max_request_timeout`` is a read/write file for > > +setting/getting the maximum timeout (in seconds) for a fuse server to > > +reply to a kernel-issued request. A value greater than 0 automatically opts > > +the server into a timeout that will be set to at most "max_request_timeout", > > +even if the server did not specify a timeout and default_request_timeout is > > +set to 0. If max_request_timeout is greater than 0 and the server set a timeout > > +greater than max_request_timeout or default_request_timeout is set to a value > > +greater than max_request_timeout, the system will use max_request_timeout as the > > +timeout. 0 indicates no max request timeout. The maximum value that can be set > > +is 65535. > > + > > +For timeouts, if the server does not respond to the request by the time > > +the set timeout elapses, then the connection to the fuse server will be aborted. > > +Please note that the timeouts are not 100% precise (eg you may set 60 seconds but > > +the timeout may kick in after 70 seconds). The upper margin of error for the > > +timeout is roughly FUSE_TIMEOUT_TIMER_FREQ seconds. > > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > > index 1321cc4ed2ab..e5114831798f 100644 > > --- a/fs/fuse/fuse_i.h > > +++ b/fs/fuse/fuse_i.h > > @@ -49,6 +49,16 @@ extern const unsigned long fuse_timeout_timer_freq; > > > > /** Maximum of max_pages received in init_out */ > > extern unsigned int fuse_max_pages_limit; > > +/* > > + * Default timeout (in seconds) for the server to reply to a request > > + * before the connection is aborted, if no timeout was specified on mount. > > + */ > > +extern unsigned int fuse_default_req_timeout; > > +/* > > + * Max timeout (in seconds) for the server to reply to a request before > > + * the connection is aborted. > > + */ > > +extern unsigned int fuse_max_req_timeout; > > > > /** List of active connections */ > > extern struct list_head fuse_conn_list; > > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c > > index 79ebeb60015c..4e36d99fae52 100644 > > --- a/fs/fuse/inode.c > > +++ b/fs/fuse/inode.c > > @@ -37,6 +37,9 @@ DEFINE_MUTEX(fuse_mutex); > > static int set_global_limit(const char *val, const struct kernel_param *kp); > > > > unsigned int fuse_max_pages_limit = 256; > > +/* default is no timeout */ > > +unsigned int fuse_default_req_timeout = 0; > > +unsigned int fuse_max_req_timeout = 0; > > > > unsigned max_user_bgreq; > > module_param_call(max_user_bgreq, set_global_limit, param_get_uint, > > @@ -1268,6 +1271,24 @@ static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) > > fuse_timeout_timer_freq); > > } > > > > +static void init_server_timeout(struct fuse_conn *fc, unsigned int timeout) > > +{ > > + if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) > > + return; > > + > > + if (!timeout) > > + timeout = fuse_default_req_timeout; > > + > > + if (fuse_max_req_timeout) { > > + if (timeout) > > + timeout = min(fuse_max_req_timeout, timeout); > > Sorry for this late comment (this is v12 already!), but I wonder if it > would be worth to use clamp() instead of min(). Limiting this value to > the range [FUSE_TIMEOUT_TIMER_FREQ, fuxe_max_req_timeout] would prevent > accidentally setting the timeout to a very low value. I don't think we need to clamp to FUSE_TIMEOUT_TIMER_FREQ. If the user sets a timeout value lower than FUSE_TIMEOUT_TIMER_FREQ (15 secs), imo that should be supported. For example, if the user sets the timeout to 10 seconds, then the connection should be aborted if the request takes 13 seconds. I don't see why we need to have the min bound be FUSE_TIMEOUT_TIMER_FREQ. imo, the user-specified timeout value and FUSE_TIMEOUT_TIMER_FREQ value are orthogonal. But I also don't feel strongly about this and am fine with whichever way we go. Thanks, Joanne > > There are also some white-spaces/tabs issues with the other patch, which > are worth fixing before merging. Other than that, feel free to add my > > Reviewed-by: Luis Henriques <luis@igalia.com> > > Cheers, > -- > Luís > > > + else > > + timeout = fuse_max_req_timeout; > > + } > > + > > + set_request_timeout(fc, timeout); > > +} > > + > > struct fuse_init_args { > > struct fuse_args args; > > struct fuse_init_in in; > > @@ -1286,6 +1307,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, > > ok = false; > > else { > > unsigned long ra_pages; > > + unsigned int timeout = 0; > > > > process_init_limits(fc, arg); > > > > @@ -1404,14 +1426,16 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, > > if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) > > fc->io_uring = 1; > > > > - if ((flags & FUSE_REQUEST_TIMEOUT) && arg->request_timeout) > > - set_request_timeout(fc, arg->request_timeout); > > + if (flags & FUSE_REQUEST_TIMEOUT) > > + timeout = arg->request_timeout; > > } else { > > ra_pages = fc->max_read / PAGE_SIZE; > > fc->no_lock = 1; > > fc->no_flock = 1; > > } > > > > + init_server_timeout(fc, timeout); > > + > > fm->sb->s_bdi->ra_pages = > > min(fm->sb->s_bdi->ra_pages, ra_pages); > > fc->minor = arg->minor; > > diff --git a/fs/fuse/sysctl.c b/fs/fuse/sysctl.c > > index b272bb333005..3d542ef9d889 100644 > > --- a/fs/fuse/sysctl.c > > +++ b/fs/fuse/sysctl.c > > @@ -13,6 +13,12 @@ static struct ctl_table_header *fuse_table_header; > > /* Bound by fuse_init_out max_pages, which is a u16 */ > > static unsigned int sysctl_fuse_max_pages_limit = 65535; > > > > +/* > > + * fuse_init_out request timeouts are u16. > > + * This goes up to ~18 hours, which is plenty for a timeout. > > + */ > > +static unsigned int sysctl_fuse_req_timeout_limit = 65535; > > + > > static struct ctl_table fuse_sysctl_table[] = { > > { > > .procname = "max_pages_limit", > > @@ -23,6 +29,24 @@ static struct ctl_table fuse_sysctl_table[] = { > > .extra1 = SYSCTL_ONE, > > .extra2 = &sysctl_fuse_max_pages_limit, > > }, > > + { > > + .procname = "default_request_timeout", > > + .data = &fuse_default_req_timeout, > > + .maxlen = sizeof(fuse_default_req_timeout), > > + .mode = 0644, > > + .proc_handler = proc_douintvec_minmax, > > + .extra1 = SYSCTL_ZERO, > > + .extra2 = &sysctl_fuse_req_timeout_limit, > > + }, > > + { > > + .procname = "max_request_timeout", > > + .data = &fuse_max_req_timeout, > > + .maxlen = sizeof(fuse_max_req_timeout), > > + .mode = 0644, > > + .proc_handler = proc_douintvec_minmax, > > + .extra1 = SYSCTL_ZERO, > > + .extra2 = &sysctl_fuse_req_timeout_limit, > > + }, > > }; > > > > int fuse_sysctl_register(void) > > -- > > 2.43.5 > > >
Hi Joanne, >>> Thanks, applied and pushed with some cleanups including Luis's clamp idea. >> >> Hi Miklos, >> >> I don't think the timeouts do work with io-uring yet, I'm not sure >> yet if I have time to work on that today or tomorrow (on something >> else right now, I can try, but no promises). > > Hi Bernd, > > What are your thoughts on what is missing on the io-uring side for > timeouts? If a request times out, it will abort the connection and > AFAICT, the abort logic should already be fine for io-uring, as users > can currently abort the connection through the sysfs interface and > there's no internal difference in aborting through sysfs vs timeouts. > in fuse_check_timeout() it iterates over each fud and then fpq. In dev_uring.c fpq is is per queue but unrelated to fud. In current fuse-io-uring fud is not cloned anymore - using fud won't work. And Requests are also not queued at all on the other list fuse_check_timeout() is currently checking. Also, with a ring per core, maybe better to use a per queue check that is core bound? I.e. zero locking overhead? And I think we can also avoid iterating over hash lists (queue->fpq), but can use the 'ent_in_userspace' list. We need to iterate over these other entry queues anyway: ent_w_req_queue fuse_req_bg_queue ent_commit_queue And we also need to iterate over fuse_req_queue fuse_req_bg_queue Thanks, Bernd
On Thu, Jan 23, 2025 at 9:19 AM Bernd Schubert <bernd.schubert@fastmail.fm> wrote: > > Hi Joanne, > > >>> Thanks, applied and pushed with some cleanups including Luis's clamp idea. > >> > >> Hi Miklos, > >> > >> I don't think the timeouts do work with io-uring yet, I'm not sure > >> yet if I have time to work on that today or tomorrow (on something > >> else right now, I can try, but no promises). > > > > Hi Bernd, > > > > What are your thoughts on what is missing on the io-uring side for > > timeouts? If a request times out, it will abort the connection and > > AFAICT, the abort logic should already be fine for io-uring, as users > > can currently abort the connection through the sysfs interface and > > there's no internal difference in aborting through sysfs vs timeouts. > > > > in fuse_check_timeout() it iterates over each fud and then fpq. > In dev_uring.c fpq is is per queue but unrelated to fud. In current > fuse-io-uring fud is not cloned anymore - using fud won't work. > And Requests are also not queued at all on the other list > fuse_check_timeout() is currently checking. In the io-uring case, there still can be fuds and their associated fpqs given that /dev/fuse can be used still, no? So wouldn't the io-uring case still need this logic in fuse_check_timeout() for checking requests going through /dev/fuse? > > Also, with a ring per core, maybe better to use > a per queue check that is core bound? I.e. zero locking overhead? How do you envision a queue check that bypasses grabbing the queue->lock? The timeout handler could be triggered on any core, so I'm not seeing how it could be core bound. > And I think we can also avoid iterating over hash lists (queue->fpq), > but can use the 'ent_in_userspace' list. > > We need to iterate over these other entry queues anyway: > > ent_w_req_queue > fuse_req_bg_queue > ent_commit_queue > Why do we need to iterate through the ent lists (ent_w_req_queue and ent_commit_queue)? AFAICT, in io-uring a request is either on the fuse_req_queue/fuse_req_bg_queue or on the fpq->processing list. Even when an entry has been queued to ent_w_req_queue or ent_commit_queue, the request itself is still queued on fuse_req_queue/fuse_req_bg_queue/fpq->processing. I'm not sure I understand why we still need to look at the ent lists? > > And we also need to iterate over > > fuse_req_queue > fuse_req_bg_queue Why do we need to iterate through fuse_req_queue and fuse_req_bg_queue? fuse_uring_request_expired() checks the head of fuse_req_queue and fuse_req_bg_queue and given that requests are added to fuse_req_queue/fuse_req_bg_queue sequentially (eg added to the tail of these lists), why isn't this enough? If it's helpful, I can resubmit this patch series so that the io-uring changes are isolated to its own patch (eg have patch 1 and 2 from the original series and then have patch 3 be the io-uring changes). Thanks, Joanne > > > > Thanks, > Bernd > >
On 1/23/25 18:48, Joanne Koong wrote: > On Thu, Jan 23, 2025 at 9:19 AM Bernd Schubert > <bernd.schubert@fastmail.fm> wrote: >> >> Hi Joanne, >> >>>>> Thanks, applied and pushed with some cleanups including Luis's clamp idea. >>>> >>>> Hi Miklos, >>>> >>>> I don't think the timeouts do work with io-uring yet, I'm not sure >>>> yet if I have time to work on that today or tomorrow (on something >>>> else right now, I can try, but no promises). >>> >>> Hi Bernd, >>> >>> What are your thoughts on what is missing on the io-uring side for >>> timeouts? If a request times out, it will abort the connection and >>> AFAICT, the abort logic should already be fine for io-uring, as users >>> can currently abort the connection through the sysfs interface and >>> there's no internal difference in aborting through sysfs vs timeouts. >>> >> >> in fuse_check_timeout() it iterates over each fud and then fpq. >> In dev_uring.c fpq is is per queue but unrelated to fud. In current >> fuse-io-uring fud is not cloned anymore - using fud won't work. >> And Requests are also not queued at all on the other list >> fuse_check_timeout() is currently checking. > > In the io-uring case, there still can be fuds and their associated > fpqs given that /dev/fuse can be used still, no? So wouldn't the > io-uring case still need this logic in fuse_check_timeout() for > checking requests going through /dev/fuse? Yes, these need to be additionally checked. > >> >> Also, with a ring per core, maybe better to use >> a per queue check that is core bound? I.e. zero locking overhead? > > How do you envision a queue check that bypasses grabbing the > queue->lock? The timeout handler could be triggered on any core, so > I'm not seeing how it could be core bound. I don't want to bypass it, but maybe each queue could have its own workq and timeout checker? And then use queue_delayed_work_on()? > >> And I think we can also avoid iterating over hash lists (queue->fpq), >> but can use the 'ent_in_userspace' list. >> >> We need to iterate over these other entry queues anyway: >> >> ent_w_req_queue >> fuse_req_bg_queue >> ent_commit_queue >> > > Why do we need to iterate through the ent lists (ent_w_req_queue and > ent_commit_queue)? AFAICT, in io-uring a request is either on the > fuse_req_queue/fuse_req_bg_queue or on the fpq->processing list. Even > when an entry has been queued to ent_w_req_queue or ent_commit_queue, > the request itself is still queued on > fuse_req_queue/fuse_req_bg_queue/fpq->processing. I'm not sure I > understand why we still need to look at the ent lists? Yeah you are right, we could avoid ent_w_req_queue and ent_commit_queue if we use fpq->processing, but processing consists of 256 lists - overhead is much smaller by using the entry lists? > >> >> And we also need to iterate over >> >> fuse_req_queue >> fuse_req_bg_queue > > Why do we need to iterate through fuse_req_queue and > fuse_req_bg_queue? fuse_uring_request_expired() checks the head of > fuse_req_queue and fuse_req_bg_queue and given that requests are added > to fuse_req_queue/fuse_req_bg_queue sequentially (eg added to the tail > of these lists), why isn't this enough? I admit I'm a bit lost with that question. Aren't you pointing out the same lists as I do? > > > If it's helpful, I can resubmit this patch series so that the io-uring > changes are isolated to its own patch (eg have patch 1 and 2 from the > original series and then have patch 3 be the io-uring changes). Sounds good to me. Thanks, Bernd
On Thu, Jan 23, 2025 at 10:06 AM Bernd Schubert <bernd.schubert@fastmail.fm> wrote: > > > > On 1/23/25 18:48, Joanne Koong wrote: > > On Thu, Jan 23, 2025 at 9:19 AM Bernd Schubert > > <bernd.schubert@fastmail.fm> wrote: > >> > >> Hi Joanne, > >> > >>>>> Thanks, applied and pushed with some cleanups including Luis's clamp idea. > >>>> > >>>> Hi Miklos, > >>>> > >>>> I don't think the timeouts do work with io-uring yet, I'm not sure > >>>> yet if I have time to work on that today or tomorrow (on something > >>>> else right now, I can try, but no promises). > >>> > >>> Hi Bernd, > >>> > >>> What are your thoughts on what is missing on the io-uring side for > >>> timeouts? If a request times out, it will abort the connection and > >>> AFAICT, the abort logic should already be fine for io-uring, as users > >>> can currently abort the connection through the sysfs interface and > >>> there's no internal difference in aborting through sysfs vs timeouts. > >>> > >> > >> in fuse_check_timeout() it iterates over each fud and then fpq. > >> In dev_uring.c fpq is is per queue but unrelated to fud. In current > >> fuse-io-uring fud is not cloned anymore - using fud won't work. > >> And Requests are also not queued at all on the other list > >> fuse_check_timeout() is currently checking. > > > > In the io-uring case, there still can be fuds and their associated > > fpqs given that /dev/fuse can be used still, no? So wouldn't the > > io-uring case still need this logic in fuse_check_timeout() for > > checking requests going through /dev/fuse? > > Yes, these need to be additionally checked. > > > > >> > >> Also, with a ring per core, maybe better to use > >> a per queue check that is core bound? I.e. zero locking overhead? > > > > How do you envision a queue check that bypasses grabbing the > > queue->lock? The timeout handler could be triggered on any core, so > > I'm not seeing how it could be core bound. > > I don't want to bypass it, but maybe each queue could have its own > workq and timeout checker? And then use queue_delayed_work_on()? > > > > > >> And I think we can also avoid iterating over hash lists (queue->fpq), > >> but can use the 'ent_in_userspace' list. > >> > >> We need to iterate over these other entry queues anyway: > >> > >> ent_w_req_queue > >> fuse_req_bg_queue > >> ent_commit_queue > >> > > > > Why do we need to iterate through the ent lists (ent_w_req_queue and > > ent_commit_queue)? AFAICT, in io-uring a request is either on the > > fuse_req_queue/fuse_req_bg_queue or on the fpq->processing list. Even > > when an entry has been queued to ent_w_req_queue or ent_commit_queue, > > the request itself is still queued on > > fuse_req_queue/fuse_req_bg_queue/fpq->processing. I'm not sure I > > understand why we still need to look at the ent lists? > > Yeah you are right, we could avoid ent_w_req_queue and ent_commit_queue > if we use fpq->processing, but processing consists of 256 lists - > overhead is much smaller by using the entry lists? > > > > > >> > >> And we also need to iterate over > >> > >> fuse_req_queue > >> fuse_req_bg_queue > > > > Why do we need to iterate through fuse_req_queue and > > fuse_req_bg_queue? fuse_uring_request_expired() checks the head of > > fuse_req_queue and fuse_req_bg_queue and given that requests are added > > to fuse_req_queue/fuse_req_bg_queue sequentially (eg added to the tail > > of these lists), why isn't this enough? > > I admit I'm a bit lost with that question. Aren't you pointing out > the same lists as I do? > Oh, I thought your comment was saying that we need to "iterate" over it (eg go through every request on the lists)? It currently already checks the fuse_req_queue and fuse_req_bg_queue lists (in fuse_uring_request_expired() which gets invoked in the fuse_check_timeout() timeout handler). Maybe the fuse_uring_request_expired() addition got missed - I tried to call this out in the cover letter changelog, as I had to rebase this patchset on top of the io-uring patches, so I added this function in to make it work for io-uring. I believe this suffices for now for the io uring case (with future optimizations that can be added)? Thanks, Joanne > > > > > > If it's helpful, I can resubmit this patch series so that the io-uring > > changes are isolated to its own patch (eg have patch 1 and 2 from the > > original series and then have patch 3 be the io-uring changes). > > Sounds good to me. > > > Thanks, > Bernd
On 1/23/25 19:32, Joanne Koong wrote: > On Thu, Jan 23, 2025 at 10:06 AM Bernd Schubert > <bernd.schubert@fastmail.fm> wrote: >> >> >> >> On 1/23/25 18:48, Joanne Koong wrote: >>> On Thu, Jan 23, 2025 at 9:19 AM Bernd Schubert >>> <bernd.schubert@fastmail.fm> wrote: >>>> >>>> Hi Joanne, >>>> >>>>>>> Thanks, applied and pushed with some cleanups including Luis's clamp idea. >>>>>> >>>>>> Hi Miklos, >>>>>> >>>>>> I don't think the timeouts do work with io-uring yet, I'm not sure >>>>>> yet if I have time to work on that today or tomorrow (on something >>>>>> else right now, I can try, but no promises). >>>>> >>>>> Hi Bernd, >>>>> >>>>> What are your thoughts on what is missing on the io-uring side for >>>>> timeouts? If a request times out, it will abort the connection and >>>>> AFAICT, the abort logic should already be fine for io-uring, as users >>>>> can currently abort the connection through the sysfs interface and >>>>> there's no internal difference in aborting through sysfs vs timeouts. >>>>> >>>> >>>> in fuse_check_timeout() it iterates over each fud and then fpq. >>>> In dev_uring.c fpq is is per queue but unrelated to fud. In current >>>> fuse-io-uring fud is not cloned anymore - using fud won't work. >>>> And Requests are also not queued at all on the other list >>>> fuse_check_timeout() is currently checking. >>> >>> In the io-uring case, there still can be fuds and their associated >>> fpqs given that /dev/fuse can be used still, no? So wouldn't the >>> io-uring case still need this logic in fuse_check_timeout() for >>> checking requests going through /dev/fuse? >> >> Yes, these need to be additionally checked. >> >>> >>>> >>>> Also, with a ring per core, maybe better to use >>>> a per queue check that is core bound? I.e. zero locking overhead? >>> >>> How do you envision a queue check that bypasses grabbing the >>> queue->lock? The timeout handler could be triggered on any core, so >>> I'm not seeing how it could be core bound. >> >> I don't want to bypass it, but maybe each queue could have its own >> workq and timeout checker? And then use queue_delayed_work_on()? >> >> >>> >>>> And I think we can also avoid iterating over hash lists (queue->fpq), >>>> but can use the 'ent_in_userspace' list. >>>> >>>> We need to iterate over these other entry queues anyway: >>>> >>>> ent_w_req_queue >>>> fuse_req_bg_queue >>>> ent_commit_queue >>>> >>> >>> Why do we need to iterate through the ent lists (ent_w_req_queue and >>> ent_commit_queue)? AFAICT, in io-uring a request is either on the >>> fuse_req_queue/fuse_req_bg_queue or on the fpq->processing list. Even >>> when an entry has been queued to ent_w_req_queue or ent_commit_queue, >>> the request itself is still queued on >>> fuse_req_queue/fuse_req_bg_queue/fpq->processing. I'm not sure I >>> understand why we still need to look at the ent lists? >> >> Yeah you are right, we could avoid ent_w_req_queue and ent_commit_queue >> if we use fpq->processing, but processing consists of 256 lists - >> overhead is much smaller by using the entry lists? >> >> >>> >>>> >>>> And we also need to iterate over >>>> >>>> fuse_req_queue >>>> fuse_req_bg_queue >>> >>> Why do we need to iterate through fuse_req_queue and >>> fuse_req_bg_queue? fuse_uring_request_expired() checks the head of >>> fuse_req_queue and fuse_req_bg_queue and given that requests are added >>> to fuse_req_queue/fuse_req_bg_queue sequentially (eg added to the tail >>> of these lists), why isn't this enough? >> >> I admit I'm a bit lost with that question. Aren't you pointing out >> the same lists as I do? >> > > Oh, I thought your comment was saying that we need to "iterate" over > it (eg go through every request on the lists)? It currently already > checks the fuse_req_queue and fuse_req_bg_queue lists (in > fuse_uring_request_expired() which gets invoked in the > fuse_check_timeout() timeout handler). > > Maybe the fuse_uring_request_expired() addition got missed - I tried > to call this out in the cover letter changelog, as I had to rebase > this patchset on top of the io-uring patches, so I added this function > in to make it work for io-uring. I believe this suffices for now for > the io uring case (with future optimizations that can be added)? Ah sorry, that is me, I had missed you had already rebased it to io-uring. So we are good to land this version. Just would be good if we could optimize this soon - our test systems have 96 cores - 24576 list heads to check... I won't manage to work on it today and probably also not tomorrow, but by Monday I should have an optimized version ready. Thanks, Bernd
On 1/23/25 15:54, Miklos Szeredi wrote: > On Thu, 23 Jan 2025 at 15:41, Bernd Schubert <bschubert@ddn.com> wrote: > >> I don't think the timeouts do work with io-uring yet, I'm not sure >> yet if I have time to work on that today or tomorrow (on something >> else right now, I can try, but no promises). >> >> How shall we handle it, if I don't manage in time? > > Okay. Let's just put the timeout patches on hold until this is resolved. Sorry, my fault, I had missed that Joanne had already rebased to linux-next and already handles uring queues. Thanks, Bernd
Hi Joanne, On Thu, Jan 23 2025, Joanne Koong wrote: > On Thu, Jan 23, 2025 at 1:21 AM Luis Henriques <luis@igalia.com> wrote: >> >> Hi Joanne, >> >> On Wed, Jan 22 2025, Joanne Koong wrote: >> >> > Introduce two new sysctls, "default_request_timeout" and >> > "max_request_timeout". These control how long (in seconds) a server can >> > take to reply to a request. If the server does not reply by the timeout, >> > then the connection will be aborted. The upper bound on these sysctl >> > values is 65535. >> > >> > "default_request_timeout" sets the default timeout if no timeout is >> > specified by the fuse server on mount. 0 (default) indicates no default >> > timeout should be enforced. If the server did specify a timeout, then >> > default_request_timeout will be ignored. >> > >> > "max_request_timeout" sets the max amount of time the server may take to >> > reply to a request. 0 (default) indicates no maximum timeout. If >> > max_request_timeout is set and the fuse server attempts to set a >> > timeout greater than max_request_timeout, the system will use >> > max_request_timeout as the timeout. Similarly, if default_request_timeout >> > is greater than max_request_timeout, the system will use >> > max_request_timeout as the timeout. If the server does not request a >> > timeout and default_request_timeout is set to 0 but max_request_timeout >> > is set, then the timeout will be max_request_timeout. >> > >> > Please note that these timeouts are not 100% precise. The request may >> > take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the set max >> > timeout due to how it's internally implemented. >> > >> > $ sysctl -a | grep fuse.default_request_timeout >> > fs.fuse.default_request_timeout = 0 >> > >> > $ echo 65536 | sudo tee /proc/sys/fs/fuse/default_request_timeout >> > tee: /proc/sys/fs/fuse/default_request_timeout: Invalid argument >> > >> > $ echo 65535 | sudo tee /proc/sys/fs/fuse/default_request_timeout >> > 65535 >> > >> > $ sysctl -a | grep fuse.default_request_timeout >> > fs.fuse.default_request_timeout = 65535 >> > >> > $ echo 0 | sudo tee /proc/sys/fs/fuse/default_request_timeout >> > 0 >> > >> > $ sysctl -a | grep fuse.default_request_timeout >> > fs.fuse.default_request_timeout = 0 >> > >> > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> >> > Reviewed-by: Bernd Schubert <bschubert@ddn.com> >> > Reviewed-by: Josef Bacik <josef@toxicpanda.com> >> > Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> >> > --- >> > Documentation/admin-guide/sysctl/fs.rst | 25 ++++++++++++++++++++++ >> > fs/fuse/fuse_i.h | 10 +++++++++ >> > fs/fuse/inode.c | 28 +++++++++++++++++++++++-- >> > fs/fuse/sysctl.c | 24 +++++++++++++++++++++ >> > 4 files changed, 85 insertions(+), 2 deletions(-) >> > >> > diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst >> > index f5ec6c9312e1..35aeb30bed8b 100644 >> > --- a/Documentation/admin-guide/sysctl/fs.rst >> > +++ b/Documentation/admin-guide/sysctl/fs.rst >> > @@ -347,3 +347,28 @@ filesystems: >> > ``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for >> > setting/getting the maximum number of pages that can be used for servicing >> > requests in FUSE. >> > + >> > +``/proc/sys/fs/fuse/default_request_timeout`` is a read/write file for >> > +setting/getting the default timeout (in seconds) for a fuse server to >> > +reply to a kernel-issued request in the event where the server did not >> > +specify a timeout at mount. If the server set a timeout, >> > +then default_request_timeout will be ignored. The default >> > +"default_request_timeout" is set to 0. 0 indicates no default timeout. >> > +The maximum value that can be set is 65535. >> > + >> > +``/proc/sys/fs/fuse/max_request_timeout`` is a read/write file for >> > +setting/getting the maximum timeout (in seconds) for a fuse server to >> > +reply to a kernel-issued request. A value greater than 0 automatically opts >> > +the server into a timeout that will be set to at most "max_request_timeout", >> > +even if the server did not specify a timeout and default_request_timeout is >> > +set to 0. If max_request_timeout is greater than 0 and the server set a timeout >> > +greater than max_request_timeout or default_request_timeout is set to a value >> > +greater than max_request_timeout, the system will use max_request_timeout as the >> > +timeout. 0 indicates no max request timeout. The maximum value that can be set >> > +is 65535. >> > + >> > +For timeouts, if the server does not respond to the request by the time >> > +the set timeout elapses, then the connection to the fuse server will be aborted. >> > +Please note that the timeouts are not 100% precise (eg you may set 60 seconds but >> > +the timeout may kick in after 70 seconds). The upper margin of error for the >> > +timeout is roughly FUSE_TIMEOUT_TIMER_FREQ seconds. >> > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h >> > index 1321cc4ed2ab..e5114831798f 100644 >> > --- a/fs/fuse/fuse_i.h >> > +++ b/fs/fuse/fuse_i.h >> > @@ -49,6 +49,16 @@ extern const unsigned long fuse_timeout_timer_freq; >> > >> > /** Maximum of max_pages received in init_out */ >> > extern unsigned int fuse_max_pages_limit; >> > +/* >> > + * Default timeout (in seconds) for the server to reply to a request >> > + * before the connection is aborted, if no timeout was specified on mount. >> > + */ >> > +extern unsigned int fuse_default_req_timeout; >> > +/* >> > + * Max timeout (in seconds) for the server to reply to a request before >> > + * the connection is aborted. >> > + */ >> > +extern unsigned int fuse_max_req_timeout; >> > >> > /** List of active connections */ >> > extern struct list_head fuse_conn_list; >> > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c >> > index 79ebeb60015c..4e36d99fae52 100644 >> > --- a/fs/fuse/inode.c >> > +++ b/fs/fuse/inode.c >> > @@ -37,6 +37,9 @@ DEFINE_MUTEX(fuse_mutex); >> > static int set_global_limit(const char *val, const struct kernel_param *kp); >> > >> > unsigned int fuse_max_pages_limit = 256; >> > +/* default is no timeout */ >> > +unsigned int fuse_default_req_timeout = 0; >> > +unsigned int fuse_max_req_timeout = 0; >> > >> > unsigned max_user_bgreq; >> > module_param_call(max_user_bgreq, set_global_limit, param_get_uint, >> > @@ -1268,6 +1271,24 @@ static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) >> > fuse_timeout_timer_freq); >> > } >> > >> > +static void init_server_timeout(struct fuse_conn *fc, unsigned int timeout) >> > +{ >> > + if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) >> > + return; >> > + >> > + if (!timeout) >> > + timeout = fuse_default_req_timeout; >> > + >> > + if (fuse_max_req_timeout) { >> > + if (timeout) >> > + timeout = min(fuse_max_req_timeout, timeout); >> >> Sorry for this late comment (this is v12 already!), but I wonder if it >> would be worth to use clamp() instead of min(). Limiting this value to >> the range [FUSE_TIMEOUT_TIMER_FREQ, fuxe_max_req_timeout] would prevent >> accidentally setting the timeout to a very low value. > > I don't think we need to clamp to FUSE_TIMEOUT_TIMER_FREQ. If the user > sets a timeout value lower than FUSE_TIMEOUT_TIMER_FREQ (15 secs), imo > that should be supported. For example, if the user sets the timeout to > 10 seconds, then the connection should be aborted if the request takes > 13 seconds. I don't see why we need to have the min bound be > FUSE_TIMEOUT_TIMER_FREQ. imo, the user-specified timeout value and > FUSE_TIMEOUT_TIMER_FREQ value are orthogonal. But I also don't feel > strongly about this and am fine with whichever way we go. Sure, my comment was just a suggestion too. I just thought it would be easy to accidentally set the timeout to '1' instead of '10', and that this very low value could cause troubles. On the other hand, I understand that 15 secondss is probably too high for using as a minimum. Anyway, this was just my 2¢. Cheers,
On Thu, Jan 23, 2025 at 1:53 PM Luis Henriques <luis@igalia.com> wrote: > > Hi Joanne, > > On Thu, Jan 23 2025, Joanne Koong wrote: > > > On Thu, Jan 23, 2025 at 1:21 AM Luis Henriques <luis@igalia.com> wrote: > >> > >> Hi Joanne, > >> > >> On Wed, Jan 22 2025, Joanne Koong wrote: > >> > >> > Introduce two new sysctls, "default_request_timeout" and > >> > "max_request_timeout". These control how long (in seconds) a server can > >> > take to reply to a request. If the server does not reply by the timeout, > >> > then the connection will be aborted. The upper bound on these sysctl > >> > values is 65535. > >> > > >> > "default_request_timeout" sets the default timeout if no timeout is > >> > specified by the fuse server on mount. 0 (default) indicates no default > >> > timeout should be enforced. If the server did specify a timeout, then > >> > default_request_timeout will be ignored. > >> > > >> > "max_request_timeout" sets the max amount of time the server may take to > >> > reply to a request. 0 (default) indicates no maximum timeout. If > >> > max_request_timeout is set and the fuse server attempts to set a > >> > timeout greater than max_request_timeout, the system will use > >> > max_request_timeout as the timeout. Similarly, if default_request_timeout > >> > is greater than max_request_timeout, the system will use > >> > max_request_timeout as the timeout. If the server does not request a > >> > timeout and default_request_timeout is set to 0 but max_request_timeout > >> > is set, then the timeout will be max_request_timeout. > >> > > >> > Please note that these timeouts are not 100% precise. The request may > >> > take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the set max > >> > timeout due to how it's internally implemented. > >> > > >> > $ sysctl -a | grep fuse.default_request_timeout > >> > fs.fuse.default_request_timeout = 0 > >> > > >> > $ echo 65536 | sudo tee /proc/sys/fs/fuse/default_request_timeout > >> > tee: /proc/sys/fs/fuse/default_request_timeout: Invalid argument > >> > > >> > $ echo 65535 | sudo tee /proc/sys/fs/fuse/default_request_timeout > >> > 65535 > >> > > >> > $ sysctl -a | grep fuse.default_request_timeout > >> > fs.fuse.default_request_timeout = 65535 > >> > > >> > $ echo 0 | sudo tee /proc/sys/fs/fuse/default_request_timeout > >> > 0 > >> > > >> > $ sysctl -a | grep fuse.default_request_timeout > >> > fs.fuse.default_request_timeout = 0 > >> > > >> > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > >> > Reviewed-by: Bernd Schubert <bschubert@ddn.com> > >> > Reviewed-by: Josef Bacik <josef@toxicpanda.com> > >> > Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> > >> > --- > >> > Documentation/admin-guide/sysctl/fs.rst | 25 ++++++++++++++++++++++ > >> > fs/fuse/fuse_i.h | 10 +++++++++ > >> > fs/fuse/inode.c | 28 +++++++++++++++++++++++-- > >> > fs/fuse/sysctl.c | 24 +++++++++++++++++++++ > >> > 4 files changed, 85 insertions(+), 2 deletions(-) > >> > > >> > diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst > >> > index f5ec6c9312e1..35aeb30bed8b 100644 > >> > --- a/Documentation/admin-guide/sysctl/fs.rst > >> > +++ b/Documentation/admin-guide/sysctl/fs.rst > >> > @@ -347,3 +347,28 @@ filesystems: > >> > ``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for > >> > setting/getting the maximum number of pages that can be used for servicing > >> > requests in FUSE. > >> > + > >> > +``/proc/sys/fs/fuse/default_request_timeout`` is a read/write file for > >> > +setting/getting the default timeout (in seconds) for a fuse server to > >> > +reply to a kernel-issued request in the event where the server did not > >> > +specify a timeout at mount. If the server set a timeout, > >> > +then default_request_timeout will be ignored. The default > >> > +"default_request_timeout" is set to 0. 0 indicates no default timeout. > >> > +The maximum value that can be set is 65535. > >> > + > >> > +``/proc/sys/fs/fuse/max_request_timeout`` is a read/write file for > >> > +setting/getting the maximum timeout (in seconds) for a fuse server to > >> > +reply to a kernel-issued request. A value greater than 0 automatically opts > >> > +the server into a timeout that will be set to at most "max_request_timeout", > >> > +even if the server did not specify a timeout and default_request_timeout is > >> > +set to 0. If max_request_timeout is greater than 0 and the server set a timeout > >> > +greater than max_request_timeout or default_request_timeout is set to a value > >> > +greater than max_request_timeout, the system will use max_request_timeout as the > >> > +timeout. 0 indicates no max request timeout. The maximum value that can be set > >> > +is 65535. > >> > + > >> > +For timeouts, if the server does not respond to the request by the time > >> > +the set timeout elapses, then the connection to the fuse server will be aborted. > >> > +Please note that the timeouts are not 100% precise (eg you may set 60 seconds but > >> > +the timeout may kick in after 70 seconds). The upper margin of error for the > >> > +timeout is roughly FUSE_TIMEOUT_TIMER_FREQ seconds. > >> > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > >> > index 1321cc4ed2ab..e5114831798f 100644 > >> > --- a/fs/fuse/fuse_i.h > >> > +++ b/fs/fuse/fuse_i.h > >> > @@ -49,6 +49,16 @@ extern const unsigned long fuse_timeout_timer_freq; > >> > > >> > /** Maximum of max_pages received in init_out */ > >> > extern unsigned int fuse_max_pages_limit; > >> > +/* > >> > + * Default timeout (in seconds) for the server to reply to a request > >> > + * before the connection is aborted, if no timeout was specified on mount. > >> > + */ > >> > +extern unsigned int fuse_default_req_timeout; > >> > +/* > >> > + * Max timeout (in seconds) for the server to reply to a request before > >> > + * the connection is aborted. > >> > + */ > >> > +extern unsigned int fuse_max_req_timeout; > >> > > >> > /** List of active connections */ > >> > extern struct list_head fuse_conn_list; > >> > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c > >> > index 79ebeb60015c..4e36d99fae52 100644 > >> > --- a/fs/fuse/inode.c > >> > +++ b/fs/fuse/inode.c > >> > @@ -37,6 +37,9 @@ DEFINE_MUTEX(fuse_mutex); > >> > static int set_global_limit(const char *val, const struct kernel_param *kp); > >> > > >> > unsigned int fuse_max_pages_limit = 256; > >> > +/* default is no timeout */ > >> > +unsigned int fuse_default_req_timeout = 0; > >> > +unsigned int fuse_max_req_timeout = 0; > >> > > >> > unsigned max_user_bgreq; > >> > module_param_call(max_user_bgreq, set_global_limit, param_get_uint, > >> > @@ -1268,6 +1271,24 @@ static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) > >> > fuse_timeout_timer_freq); > >> > } > >> > > >> > +static void init_server_timeout(struct fuse_conn *fc, unsigned int timeout) > >> > +{ > >> > + if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) > >> > + return; > >> > + > >> > + if (!timeout) > >> > + timeout = fuse_default_req_timeout; > >> > + > >> > + if (fuse_max_req_timeout) { > >> > + if (timeout) > >> > + timeout = min(fuse_max_req_timeout, timeout); > >> > >> Sorry for this late comment (this is v12 already!), but I wonder if it > >> would be worth to use clamp() instead of min(). Limiting this value to > >> the range [FUSE_TIMEOUT_TIMER_FREQ, fuxe_max_req_timeout] would prevent > >> accidentally setting the timeout to a very low value. > > > > I don't think we need to clamp to FUSE_TIMEOUT_TIMER_FREQ. If the user > > sets a timeout value lower than FUSE_TIMEOUT_TIMER_FREQ (15 secs), imo > > that should be supported. For example, if the user sets the timeout to > > 10 seconds, then the connection should be aborted if the request takes > > 13 seconds. I don't see why we need to have the min bound be > > FUSE_TIMEOUT_TIMER_FREQ. imo, the user-specified timeout value and > > FUSE_TIMEOUT_TIMER_FREQ value are orthogonal. But I also don't feel > > strongly about this and am fine with whichever way we go. > > Sure, my comment was just a suggestion too. I just thought it would be > easy to accidentally set the timeout to '1' instead of '10', and that this > very low value could cause troubles. On the other hand, I understand that > 15 secondss is probably too high for using as a minimum. Anyway, this was > just my 2¢. > Hi Luis, i see that Miklos already merged the suggestion in. I don't feel strongly either way so I'll just leave it as is then. Thanks, Joanne > Cheers, > -- > Luis > > > > >>
On Thu, Jan 23, 2025 at 10:40 AM Bernd Schubert <bernd.schubert@fastmail.fm> wrote: > > > > On 1/23/25 19:32, Joanne Koong wrote: > > On Thu, Jan 23, 2025 at 10:06 AM Bernd Schubert > > <bernd.schubert@fastmail.fm> wrote: > >> > >> > >> > >> On 1/23/25 18:48, Joanne Koong wrote: > >>> On Thu, Jan 23, 2025 at 9:19 AM Bernd Schubert > >>> <bernd.schubert@fastmail.fm> wrote: > >>>> > >>>> Hi Joanne, > >>>> > >>>>>>> Thanks, applied and pushed with some cleanups including Luis's clamp idea. > >>>>>> > >>>>>> Hi Miklos, > >>>>>> > >>>>>> I don't think the timeouts do work with io-uring yet, I'm not sure > >>>>>> yet if I have time to work on that today or tomorrow (on something > >>>>>> else right now, I can try, but no promises). > >>>>> > >>>>> Hi Bernd, > >>>>> > >>>>> What are your thoughts on what is missing on the io-uring side for > >>>>> timeouts? If a request times out, it will abort the connection and > >>>>> AFAICT, the abort logic should already be fine for io-uring, as users > >>>>> can currently abort the connection through the sysfs interface and > >>>>> there's no internal difference in aborting through sysfs vs timeouts. > >>>>> > >>>> > >>>> in fuse_check_timeout() it iterates over each fud and then fpq. > >>>> In dev_uring.c fpq is is per queue but unrelated to fud. In current > >>>> fuse-io-uring fud is not cloned anymore - using fud won't work. > >>>> And Requests are also not queued at all on the other list > >>>> fuse_check_timeout() is currently checking. > >>> > >>> In the io-uring case, there still can be fuds and their associated > >>> fpqs given that /dev/fuse can be used still, no? So wouldn't the > >>> io-uring case still need this logic in fuse_check_timeout() for > >>> checking requests going through /dev/fuse? > >> > >> Yes, these need to be additionally checked. > >> > >>> > >>>> > >>>> Also, with a ring per core, maybe better to use > >>>> a per queue check that is core bound? I.e. zero locking overhead? > >>> > >>> How do you envision a queue check that bypasses grabbing the > >>> queue->lock? The timeout handler could be triggered on any core, so > >>> I'm not seeing how it could be core bound. > >> > >> I don't want to bypass it, but maybe each queue could have its own > >> workq and timeout checker? And then use queue_delayed_work_on()? > >> > >> > >>> > >>>> And I think we can also avoid iterating over hash lists (queue->fpq), > >>>> but can use the 'ent_in_userspace' list. > >>>> > >>>> We need to iterate over these other entry queues anyway: > >>>> > >>>> ent_w_req_queue > >>>> fuse_req_bg_queue > >>>> ent_commit_queue > >>>> > >>> > >>> Why do we need to iterate through the ent lists (ent_w_req_queue and > >>> ent_commit_queue)? AFAICT, in io-uring a request is either on the > >>> fuse_req_queue/fuse_req_bg_queue or on the fpq->processing list. Even > >>> when an entry has been queued to ent_w_req_queue or ent_commit_queue, > >>> the request itself is still queued on > >>> fuse_req_queue/fuse_req_bg_queue/fpq->processing. I'm not sure I > >>> understand why we still need to look at the ent lists? > >> > >> Yeah you are right, we could avoid ent_w_req_queue and ent_commit_queue > >> if we use fpq->processing, but processing consists of 256 lists - > >> overhead is much smaller by using the entry lists? > >> > >> > >>> > >>>> > >>>> And we also need to iterate over > >>>> > >>>> fuse_req_queue > >>>> fuse_req_bg_queue > >>> > >>> Why do we need to iterate through fuse_req_queue and > >>> fuse_req_bg_queue? fuse_uring_request_expired() checks the head of > >>> fuse_req_queue and fuse_req_bg_queue and given that requests are added > >>> to fuse_req_queue/fuse_req_bg_queue sequentially (eg added to the tail > >>> of these lists), why isn't this enough? > >> > >> I admit I'm a bit lost with that question. Aren't you pointing out > >> the same lists as I do? > >> > > > > Oh, I thought your comment was saying that we need to "iterate" over > > it (eg go through every request on the lists)? It currently already > > checks the fuse_req_queue and fuse_req_bg_queue lists (in > > fuse_uring_request_expired() which gets invoked in the > > fuse_check_timeout() timeout handler). > > > > Maybe the fuse_uring_request_expired() addition got missed - I tried > > to call this out in the cover letter changelog, as I had to rebase > > this patchset on top of the io-uring patches, so I added this function > > in to make it work for io-uring. I believe this suffices for now for > > the io uring case (with future optimizations that can be added)? > > > Ah sorry, that is me, I had missed you had already rebased it to > io-uring. > > So we are good to land this version. > Just would be good if we could optimize this soon - our test systems > have 96 cores - 24576 list heads to check... I won't manage to work > on it today and probably also not tomorrow, but by Monday I should > have an optimized version ready. > Oh wow, 96 cores!! I'll submit an optimized version of this today or tomorrow then, if that makes things easier for you. Thanks, Joanne > Thanks, > Bernd >
diff --git a/Documentation/admin-guide/sysctl/fs.rst b/Documentation/admin-guide/sysctl/fs.rst index f5ec6c9312e1..35aeb30bed8b 100644 --- a/Documentation/admin-guide/sysctl/fs.rst +++ b/Documentation/admin-guide/sysctl/fs.rst @@ -347,3 +347,28 @@ filesystems: ``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for setting/getting the maximum number of pages that can be used for servicing requests in FUSE. + +``/proc/sys/fs/fuse/default_request_timeout`` is a read/write file for +setting/getting the default timeout (in seconds) for a fuse server to +reply to a kernel-issued request in the event where the server did not +specify a timeout at mount. If the server set a timeout, +then default_request_timeout will be ignored. The default +"default_request_timeout" is set to 0. 0 indicates no default timeout. +The maximum value that can be set is 65535. + +``/proc/sys/fs/fuse/max_request_timeout`` is a read/write file for +setting/getting the maximum timeout (in seconds) for a fuse server to +reply to a kernel-issued request. A value greater than 0 automatically opts +the server into a timeout that will be set to at most "max_request_timeout", +even if the server did not specify a timeout and default_request_timeout is +set to 0. If max_request_timeout is greater than 0 and the server set a timeout +greater than max_request_timeout or default_request_timeout is set to a value +greater than max_request_timeout, the system will use max_request_timeout as the +timeout. 0 indicates no max request timeout. The maximum value that can be set +is 65535. + +For timeouts, if the server does not respond to the request by the time +the set timeout elapses, then the connection to the fuse server will be aborted. +Please note that the timeouts are not 100% precise (eg you may set 60 seconds but +the timeout may kick in after 70 seconds). The upper margin of error for the +timeout is roughly FUSE_TIMEOUT_TIMER_FREQ seconds. diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1321cc4ed2ab..e5114831798f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -49,6 +49,16 @@ extern const unsigned long fuse_timeout_timer_freq; /** Maximum of max_pages received in init_out */ extern unsigned int fuse_max_pages_limit; +/* + * Default timeout (in seconds) for the server to reply to a request + * before the connection is aborted, if no timeout was specified on mount. + */ +extern unsigned int fuse_default_req_timeout; +/* + * Max timeout (in seconds) for the server to reply to a request before + * the connection is aborted. + */ +extern unsigned int fuse_max_req_timeout; /** List of active connections */ extern struct list_head fuse_conn_list; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 79ebeb60015c..4e36d99fae52 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -37,6 +37,9 @@ DEFINE_MUTEX(fuse_mutex); static int set_global_limit(const char *val, const struct kernel_param *kp); unsigned int fuse_max_pages_limit = 256; +/* default is no timeout */ +unsigned int fuse_default_req_timeout = 0; +unsigned int fuse_max_req_timeout = 0; unsigned max_user_bgreq; module_param_call(max_user_bgreq, set_global_limit, param_get_uint, @@ -1268,6 +1271,24 @@ static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) fuse_timeout_timer_freq); } +static void init_server_timeout(struct fuse_conn *fc, unsigned int timeout) +{ + if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) + return; + + if (!timeout) + timeout = fuse_default_req_timeout; + + if (fuse_max_req_timeout) { + if (timeout) + timeout = min(fuse_max_req_timeout, timeout); + else + timeout = fuse_max_req_timeout; + } + + set_request_timeout(fc, timeout); +} + struct fuse_init_args { struct fuse_args args; struct fuse_init_in in; @@ -1286,6 +1307,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, ok = false; else { unsigned long ra_pages; + unsigned int timeout = 0; process_init_limits(fc, arg); @@ -1404,14 +1426,16 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) fc->io_uring = 1; - if ((flags & FUSE_REQUEST_TIMEOUT) && arg->request_timeout) - set_request_timeout(fc, arg->request_timeout); + if (flags & FUSE_REQUEST_TIMEOUT) + timeout = arg->request_timeout; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; fc->no_flock = 1; } + init_server_timeout(fc, timeout); + fm->sb->s_bdi->ra_pages = min(fm->sb->s_bdi->ra_pages, ra_pages); fc->minor = arg->minor; diff --git a/fs/fuse/sysctl.c b/fs/fuse/sysctl.c index b272bb333005..3d542ef9d889 100644 --- a/fs/fuse/sysctl.c +++ b/fs/fuse/sysctl.c @@ -13,6 +13,12 @@ static struct ctl_table_header *fuse_table_header; /* Bound by fuse_init_out max_pages, which is a u16 */ static unsigned int sysctl_fuse_max_pages_limit = 65535; +/* + * fuse_init_out request timeouts are u16. + * This goes up to ~18 hours, which is plenty for a timeout. + */ +static unsigned int sysctl_fuse_req_timeout_limit = 65535; + static struct ctl_table fuse_sysctl_table[] = { { .procname = "max_pages_limit", @@ -23,6 +29,24 @@ static struct ctl_table fuse_sysctl_table[] = { .extra1 = SYSCTL_ONE, .extra2 = &sysctl_fuse_max_pages_limit, }, + { + .procname = "default_request_timeout", + .data = &fuse_default_req_timeout, + .maxlen = sizeof(fuse_default_req_timeout), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &sysctl_fuse_req_timeout_limit, + }, + { + .procname = "max_request_timeout", + .data = &fuse_max_req_timeout, + .maxlen = sizeof(fuse_max_req_timeout), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &sysctl_fuse_req_timeout_limit, + }, }; int fuse_sysctl_register(void)