Message ID | 20241214022827.1773071-2-joannelkoong@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fuse: add kernel-enforced request timeout option | expand |
On (24/12/13 18:28), Joanne Koong wrote: > +void fuse_check_timeout(struct work_struct *work) > +{ > + struct delayed_work *dwork = to_delayed_work(work); > + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, > + timeout.work); > + struct fuse_iqueue *fiq = &fc->iq; > + struct fuse_req *req; > + struct fuse_dev *fud; > + struct fuse_pqueue *fpq; > + bool expired = false; > + int i; > + > + spin_lock(&fiq->lock); > + req = list_first_entry_or_null(&fiq->pending, struct fuse_req, list); > + if (req) > + expired = request_expired(fc, req); A nit: you can factor these out into a small helper static bool request_expired(struct fuse_conn *fc, struct list_head *list) { struct fuse_req *req; req = list_first_entry_or_null(list, struct fuse_req, list); if (!req) return false; return time_after(jiffies, req->create_time + fuse_watchdog_timeout()); } and just call it passing the corresponding list pointer abort = request_expired(fc, &fiq->pending); kinda makes the function look less busy. [..] > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > spin_unlock(&fc->lock); > > end_requests(&to_end); > + > + if (fc->timeout.req_timeout) > + cancel_delayed_work(&fc->timeout.work); When fuse_abort_conn() is called not from fuse_check_timeout(), but from somewhere else, should this use cancel_delayed_work_sync()?
On Fri, 2024-12-13 at 18:28 -0800, Joanne Koong wrote: > There are situations where fuse servers can become unresponsive or > stuck, for example if the server is deadlocked. Currently, there's no > good way to detect if a server is stuck and needs to be killed manually. > > This commit adds an option for enforcing a timeout (in seconds) for > requests where if the timeout elapses without the server responding to > the request, the connection will be automatically aborted. > > Please note that these timeouts are not 100% precise. For example, the > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > the requested timeout due to internal implementation, in order to > mitigate overhead. > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > --- > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > fs/fuse/fuse_i.h | 22 +++++++++++++ > fs/fuse/inode.c | 23 ++++++++++++++ > 3 files changed, 128 insertions(+) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index 27ccae63495d..e97ba860ffcd 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -45,6 +45,85 @@ static struct fuse_dev *fuse_get_dev(struct file *file) > return READ_ONCE(file->private_data); > } > > +static bool request_expired(struct fuse_conn *fc, struct fuse_req *req) > +{ > + return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout); > +} > + > +/* > + * Check if any requests aren't being completed by the time the request timeout > + * elapses. To do so, we: > + * - check the fiq pending list > + * - check the bg queue > + * - check the fpq io and processing lists > + * > + * To make this fast, we only check against the head request on each list since > + * these are generally queued in order of creation time (eg newer requests get > + * queued to the tail). We might miss a few edge cases (eg requests transitioning > + * between lists, re-sent requests at the head of the pending list having a > + * later creation time than other requests on that list, etc.) but that is fine > + * since if the request never gets fulfilled, it will eventually be caught. > + */ > +void fuse_check_timeout(struct work_struct *work) > +{ > + struct delayed_work *dwork = to_delayed_work(work); > + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, > + timeout.work); > + struct fuse_iqueue *fiq = &fc->iq; > + struct fuse_req *req; > + struct fuse_dev *fud; > + struct fuse_pqueue *fpq; > + bool expired = false; > + int i; > + > + spin_lock(&fiq->lock); > + req = list_first_entry_or_null(&fiq->pending, struct fuse_req, list); > + if (req) > + expired = request_expired(fc, req); > + spin_unlock(&fiq->lock); > + if (expired) > + goto abort_conn; > + > + spin_lock(&fc->bg_lock); > + req = list_first_entry_or_null(&fc->bg_queue, struct fuse_req, list); > + if (req) > + expired = request_expired(fc, req); > + spin_unlock(&fc->bg_lock); > + if (expired) > + goto abort_conn; > + > + spin_lock(&fc->lock); > + if (!fc->connected) { > + spin_unlock(&fc->lock); > + return; > + } > + list_for_each_entry(fud, &fc->devices, entry) { > + fpq = &fud->pq; > + spin_lock(&fpq->lock); > + req = list_first_entry_or_null(&fpq->io, struct fuse_req, list); > + if (req && request_expired(fc, req)) > + goto fpq_abort; > + > + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) { > + req = list_first_entry_or_null(&fpq->processing[i], struct fuse_req, list); > + if (req && request_expired(fc, req)) > + goto fpq_abort; > + } > + spin_unlock(&fpq->lock); > + } > + spin_unlock(&fc->lock); > + > + queue_delayed_work(system_wq, &fc->timeout.work, > + secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ)); > + return; > + > +fpq_abort: > + spin_unlock(&fpq->lock); > + spin_unlock(&fc->lock); > +abort_conn: > + fuse_abort_conn(fc); > +} > + > static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) > { > INIT_LIST_HEAD(&req->list); > @@ -53,6 +132,7 @@ static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) > refcount_set(&req->count, 1); > __set_bit(FR_PENDING, &req->flags); > req->fm = fm; > + req->create_time = jiffies; > } > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > spin_unlock(&fc->lock); > > end_requests(&to_end); > + > + if (fc->timeout.req_timeout) > + cancel_delayed_work(&fc->timeout.work); As Sergey pointed out, this should be a cancel_delayed_work_sync(). The workqueue job can still be running after cancel_delayed_work(), and since it requeues itself, this might not be enough to kill it completely. Also, I'd probably do this at the start of fuse_abort_conn() instead of waiting until the end. By the time you're in that function, you're killing the connection anyway, and you probably don't want the workqueue job running at the same time. They'll just end up competing for the same locks. > } else { > spin_unlock(&fc->lock); > } > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index 74744c6f2860..26eb00e5f043 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -438,6 +438,9 @@ struct fuse_req { > > /** fuse_mount this request belongs to */ > struct fuse_mount *fm; > + > + /** When (in jiffies) the request was created */ > + unsigned long create_time; > }; > > struct fuse_iqueue; > @@ -528,6 +531,17 @@ struct fuse_pqueue { > struct list_head io; > }; > > +/* Frequency (in seconds) of request timeout checks, if opted into */ > +#define FUSE_TIMEOUT_TIMER_FREQ 15 > + > +struct fuse_timeout { > + /* Worker for checking if any requests have timed out */ > + struct delayed_work work; > + > + /* Request timeout (in jiffies). 0 = no timeout */ > + unsigned long req_timeout; > +}; > + > /** > * Fuse device instance > */ > @@ -574,6 +588,8 @@ struct fuse_fs_context { > enum fuse_dax_mode dax_mode; > unsigned int max_read; > unsigned int blksize; > + /* Request timeout (in seconds). 0 = no timeout (infinite wait) */ > + unsigned int req_timeout; > const char *subtype; > > /* DAX device, may be NULL */ > @@ -923,6 +939,9 @@ struct fuse_conn { > /** IDR for backing files ids */ > struct idr backing_files_map; > #endif > + > + /** Only used if the connection enforces request timeouts */ > + struct fuse_timeout timeout; > }; > > /* > @@ -1191,6 +1210,9 @@ void fuse_request_end(struct fuse_req *req); > void fuse_abort_conn(struct fuse_conn *fc); > void fuse_wait_aborted(struct fuse_conn *fc); > > +/* Check if any requests timed out */ > +void fuse_check_timeout(struct work_struct *work); > + > /** > * Invalidate inode attributes > */ > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c > index 3ce4f4e81d09..02dac88d922e 100644 > --- a/fs/fuse/inode.c > +++ b/fs/fuse/inode.c > @@ -765,6 +765,7 @@ enum { > OPT_ALLOW_OTHER, > OPT_MAX_READ, > OPT_BLKSIZE, > + OPT_REQUEST_TIMEOUT, > OPT_ERR > }; > > @@ -779,6 +780,7 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = { > fsparam_u32 ("max_read", OPT_MAX_READ), > fsparam_u32 ("blksize", OPT_BLKSIZE), > fsparam_string ("subtype", OPT_SUBTYPE), > + fsparam_u32 ("request_timeout", OPT_REQUEST_TIMEOUT), > {} > }; > > @@ -874,6 +876,10 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) > ctx->blksize = result.uint_32; > break; > > + case OPT_REQUEST_TIMEOUT: > + ctx->req_timeout = result.uint_32; > + break; > + > default: > return -EINVAL; > } > @@ -1004,6 +1010,8 @@ void fuse_conn_put(struct fuse_conn *fc) > > if (IS_ENABLED(CONFIG_FUSE_DAX)) > fuse_dax_conn_free(fc); > + if (fc->timeout.req_timeout) > + cancel_delayed_work_sync(&fc->timeout.work); > if (fiq->ops->release) > fiq->ops->release(fiq); > put_pid_ns(fc->pid_ns); > @@ -1723,6 +1731,20 @@ int fuse_init_fs_context_submount(struct fs_context *fsc) > } > EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount); > > +static void fuse_init_fc_timeout(struct fuse_conn *fc, struct fuse_fs_context *ctx) > +{ > + if (ctx->req_timeout) { > + if (check_mul_overflow(ctx->req_timeout, HZ, &fc->timeout.req_timeout)) > + fc->timeout.req_timeout = ULONG_MAX; > + > + INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout); > + queue_delayed_work(system_wq, &fc->timeout.work, > + secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ)); > + } else { > + fc->timeout.req_timeout = 0; > + } > +} > + > int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) > { > struct fuse_dev *fud = NULL; > @@ -1785,6 +1807,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) > fc->destroy = ctx->destroy; > fc->no_control = ctx->no_control; > fc->no_force_umount = ctx->no_force_umount; > + fuse_init_fc_timeout(fc, ctx); > > err = -ENOMEM; > root = fuse_get_root_inode(sb, ctx->rootmode);
On (24/12/14 07:09), Jeff Layton wrote: > > +void fuse_check_timeout(struct work_struct *work) > > +{ > > + struct delayed_work *dwork = to_delayed_work(work); > > + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, > > + timeout.work); > > + struct fuse_iqueue *fiq = &fc->iq; > > + struct fuse_req *req; > > + struct fuse_dev *fud; > > + struct fuse_pqueue *fpq; > > + bool expired = false; > > + int i; > > + [..] > > + > > +fpq_abort: > > + spin_unlock(&fpq->lock); > > + spin_unlock(&fc->lock); > > +abort_conn: > > + fuse_abort_conn(fc); > > +} > > + > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > spin_unlock(&fc->lock); > > > > end_requests(&to_end); > > + > > + if (fc->timeout.req_timeout) > > + cancel_delayed_work(&fc->timeout.work); > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). My worry here is that fuse_abort_conn() can also be called from the deferred work handler, I'm not sure if we can cancel_delayed_work_sync() from within the same WQ context, sounds deadlock-ish: WQ -> fuse_check_timeout() -> fuse_abort_conn() -> cancel_delayed_work_sync() When fuse_abort_conn() is called from somewhere else (umount, etc.) then we can safely sync(), but fuse_check_timeout() is different. Maybe fuse_abort_conn() can become __fuse_abort_conn(), which fuse_check_timeout() will call directly, for the rest fuse_abort_conn() can be something like: static void __fuse_abort_conn() { .... } void fuse_abort_conn() { cancel_delayed_work_sync() __fuse_abort_conn(); }
On Sun, 2024-12-15 at 17:25 +0900, Sergey Senozhatsky wrote: > On (24/12/14 07:09), Jeff Layton wrote: > > > +void fuse_check_timeout(struct work_struct *work) > > > +{ > > > + struct delayed_work *dwork = to_delayed_work(work); > > > + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, > > > + timeout.work); > > > + struct fuse_iqueue *fiq = &fc->iq; > > > + struct fuse_req *req; > > > + struct fuse_dev *fud; > > > + struct fuse_pqueue *fpq; > > > + bool expired = false; > > > + int i; > > > + > [..] > > > + > > > +fpq_abort: > > > + spin_unlock(&fpq->lock); > > > + spin_unlock(&fc->lock); > > > +abort_conn: > > > + fuse_abort_conn(fc); > > > +} > > > + > > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > > spin_unlock(&fc->lock); > > > > > > end_requests(&to_end); > > > + > > > + if (fc->timeout.req_timeout) > > > + cancel_delayed_work(&fc->timeout.work); > > > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). > > My worry here is that fuse_abort_conn() can also be called from the > deferred work handler, I'm not sure if we can cancel_delayed_work_sync() > from within the same WQ context, sounds deadlock-ish: > > WQ -> fuse_check_timeout() -> fuse_abort_conn() -> cancel_delayed_work_sync() > > When fuse_abort_conn() is called from somewhere else (umount, etc.) then > we can safely sync(), but fuse_check_timeout() is different. > Very good point. > Maybe fuse_abort_conn() can become __fuse_abort_conn(), which > fuse_check_timeout() will call directly, for the rest fuse_abort_conn() > can be something like: > > static void __fuse_abort_conn() > { > .... > } > > void fuse_abort_conn() > { > cancel_delayed_work_sync() > __fuse_abort_conn(); > } That seems like a reasonable solution. It already doesn't requeue the job when calling fuse_abort_conn(), so that should work.
On Sun, Dec 15, 2024 at 7:08 AM Jeff Layton <jlayton@kernel.org> wrote: > > On Sun, 2024-12-15 at 17:25 +0900, Sergey Senozhatsky wrote: > > On (24/12/14 07:09), Jeff Layton wrote: > > > > +void fuse_check_timeout(struct work_struct *work) > > > > +{ > > > > + struct delayed_work *dwork = to_delayed_work(work); > > > > + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, > > > > + timeout.work); > > > > + struct fuse_iqueue *fiq = &fc->iq; > > > > + struct fuse_req *req; > > > > + struct fuse_dev *fud; > > > > + struct fuse_pqueue *fpq; > > > > + bool expired = false; > > > > + int i; > > > > + > > [..] > > > > + > > > > +fpq_abort: > > > > + spin_unlock(&fpq->lock); > > > > + spin_unlock(&fc->lock); > > > > +abort_conn: > > > > + fuse_abort_conn(fc); > > > > +} > > > > + > > > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > > > spin_unlock(&fc->lock); > > > > > > > > end_requests(&to_end); > > > > + > > > > + if (fc->timeout.req_timeout) > > > > + cancel_delayed_work(&fc->timeout.work); > > > > > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). > > > > My worry here is that fuse_abort_conn() can also be called from the > > deferred work handler, I'm not sure if we can cancel_delayed_work_sync() > > from within the same WQ context, sounds deadlock-ish: > > > > WQ -> fuse_check_timeout() -> fuse_abort_conn() -> cancel_delayed_work_sync() > > > > When fuse_abort_conn() is called from somewhere else (umount, etc.) then > > we can safely sync(), but fuse_check_timeout() is different. > > > > Very good point. > > > Maybe fuse_abort_conn() can become __fuse_abort_conn(), which > > fuse_check_timeout() will call directly, for the rest fuse_abort_conn() > > can be something like: > > > > static void __fuse_abort_conn() > > { > > .... > > } > > > > void fuse_abort_conn() > > { > > cancel_delayed_work_sync() > > __fuse_abort_conn(); > > } > > That seems like a reasonable solution. It already doesn't requeue the > job when calling fuse_abort_conn(), so that should work. > -- > Jeff Layton <jlayton@kernel.org> I'm not sure this is going to work either. What happens if say fuse_check_timeout() is running and is about to requeue the work and at the same time umount->fuse_abort_conn->cancel_delayed_work_sync() comes. The cancel will correctly wait for the actual work to finish but won't prevent it from getting queued again no? thanks, Etienne
On Fri, Dec 13, 2024 at 9:29 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > There are situations where fuse servers can become unresponsive or > stuck, for example if the server is deadlocked. Currently, there's no > good way to detect if a server is stuck and needs to be killed manually. > > This commit adds an option for enforcing a timeout (in seconds) for > requests where if the timeout elapses without the server responding to > the request, the connection will be automatically aborted. > > Please note that these timeouts are not 100% precise. For example, the > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > the requested timeout due to internal implementation, in order to > mitigate overhead. > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > --- > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > fs/fuse/fuse_i.h | 22 +++++++++++++ > fs/fuse/inode.c | 23 ++++++++++++++ > 3 files changed, 128 insertions(+) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index 27ccae63495d..e97ba860ffcd 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -45,6 +45,85 @@ static struct fuse_dev *fuse_get_dev(struct file *file) > return READ_ONCE(file->private_data); > } > > +static bool request_expired(struct fuse_conn *fc, struct fuse_req *req) > +{ > + return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout); > +} > + > +/* > + * Check if any requests aren't being completed by the time the request timeout > + * elapses. To do so, we: > + * - check the fiq pending list > + * - check the bg queue > + * - check the fpq io and processing lists > + * > + * To make this fast, we only check against the head request on each list since > + * these are generally queued in order of creation time (eg newer requests get > + * queued to the tail). We might miss a few edge cases (eg requests transitioning > + * between lists, re-sent requests at the head of the pending list having a > + * later creation time than other requests on that list, etc.) but that is fine > + * since if the request never gets fulfilled, it will eventually be caught. > + */ > +void fuse_check_timeout(struct work_struct *work) > +{ > + struct delayed_work *dwork = to_delayed_work(work); > + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, > + timeout.work); > + struct fuse_iqueue *fiq = &fc->iq; > + struct fuse_req *req; > + struct fuse_dev *fud; > + struct fuse_pqueue *fpq; > + bool expired = false; > + int i; > + > + spin_lock(&fiq->lock); > + req = list_first_entry_or_null(&fiq->pending, struct fuse_req, list); > + if (req) > + expired = request_expired(fc, req); > + spin_unlock(&fiq->lock); > + if (expired) > + goto abort_conn; > + > + spin_lock(&fc->bg_lock); > + req = list_first_entry_or_null(&fc->bg_queue, struct fuse_req, list); > + if (req) > + expired = request_expired(fc, req); > + spin_unlock(&fc->bg_lock); > + if (expired) > + goto abort_conn; > + > + spin_lock(&fc->lock); > + if (!fc->connected) { > + spin_unlock(&fc->lock); > + return; > + } > + list_for_each_entry(fud, &fc->devices, entry) { > + fpq = &fud->pq; > + spin_lock(&fpq->lock); Can fuse_dev_release() run concurrently to this path here? If yes say fuse_dev_release() comes in first, grab the fpq->lock and splice the fpq->processing[i] list into &to_end and release the fpq->lock which unblock this path. Then here we start checking req off the fpq->processing[i] list which is getting evicted on the other side by fuse_dev_release->end_requests(&to_end); Maybe we need a cancel_delayed_work_sync() at the beginning of fuse_dev_release ? Thanks Etienne > + req = list_first_entry_or_null(&fpq->io, struct fuse_req, list); > + if (req && request_expired(fc, req)) > + goto fpq_abort; > + > + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) { > + req = list_first_entry_or_null(&fpq->processing[i], struct fuse_req, list); > + if (req && request_expired(fc, req)) > + goto fpq_abort; > + } > + spin_unlock(&fpq->lock); > + } > + spin_unlock(&fc->lock); > + > + queue_delayed_work(system_wq, &fc->timeout.work, > + secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ)); > + return; > + > +fpq_abort: > + spin_unlock(&fpq->lock); > + spin_unlock(&fc->lock); > +abort_conn: > + fuse_abort_conn(fc); > +} > +
On (24/12/15 21:16), Etienne Martineau wrote: > > > void fuse_abort_conn() > > > { > > > cancel_delayed_work_sync() > > > __fuse_abort_conn(); > > > } > > > > That seems like a reasonable solution. It already doesn't requeue the > > job when calling fuse_abort_conn(), so that should work. > > -- > > Jeff Layton <jlayton@kernel.org> > > I'm not sure this is going to work either. > What happens if say fuse_check_timeout() is running and is about to > requeue the work and > at the same time umount->fuse_abort_conn->cancel_delayed_work_sync() comes. Good point. Perhaps a flag to make en-queueing conditional then, which will be set/cleared before cancel_delayed_work_sync()?
On Sat, Dec 14, 2024 at 4:10 AM Jeff Layton <jlayton@kernel.org> wrote: > > On Fri, 2024-12-13 at 18:28 -0800, Joanne Koong wrote: > > There are situations where fuse servers can become unresponsive or > > stuck, for example if the server is deadlocked. Currently, there's no > > good way to detect if a server is stuck and needs to be killed manually. > > > > This commit adds an option for enforcing a timeout (in seconds) for > > requests where if the timeout elapses without the server responding to > > the request, the connection will be automatically aborted. > > > > Please note that these timeouts are not 100% precise. For example, the > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > the requested timeout due to internal implementation, in order to > > mitigate overhead. > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > --- > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > fs/fuse/inode.c | 23 ++++++++++++++ > > 3 files changed, 128 insertions(+) > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > index 27ccae63495d..e97ba860ffcd 100644 > > --- a/fs/fuse/dev.c > > +++ b/fs/fuse/dev.c > > > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > spin_unlock(&fc->lock); > > > > end_requests(&to_end); > > + > > + if (fc->timeout.req_timeout) > > + cancel_delayed_work(&fc->timeout.work); > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). The > workqueue job can still be running after cancel_delayed_work(), and > since it requeues itself, this might not be enough to kill it > completely. I don't think we need to synchronously cancel it when a connection is aborted. The fuse_check_timeout() workqueue job can be simultaneously running when cancel_delayed_work() is called and can requeue itself, but then on the next trigger of the job, it will check whether the connection was aborted (eg the if (!fc->connected)... return; lines in fuse_check_timeout()) and will not requeue itself if the connection was aborted. This seemed like the simplest / cleanest approach to me. > > Also, I'd probably do this at the start of fuse_abort_conn() instead of > waiting until the end. By the time you're in that function, you're > killing the connection anyway, and you probably don't want the > workqueue job running at the same time. They'll just end up competing > for the same locks. Sounds good, I'll move this to be called right after the "if (fc->connected)" line. Thanks, Joanne > > > } else { > > spin_unlock(&fc->lock); > > } > -- > Jeff Layton <jlayton@kernel.org>
On Mon, Dec 16, 2024 at 12:32 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > On Sat, Dec 14, 2024 at 4:10 AM Jeff Layton <jlayton@kernel.org> wrote: > > > > On Fri, 2024-12-13 at 18:28 -0800, Joanne Koong wrote: > > > There are situations where fuse servers can become unresponsive or > > > stuck, for example if the server is deadlocked. Currently, there's no > > > good way to detect if a server is stuck and needs to be killed manually. > > > > > > This commit adds an option for enforcing a timeout (in seconds) for > > > requests where if the timeout elapses without the server responding to > > > the request, the connection will be automatically aborted. > > > > > > Please note that these timeouts are not 100% precise. For example, the > > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > > the requested timeout due to internal implementation, in order to > > > mitigate overhead. > > > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > > --- > > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > > fs/fuse/inode.c | 23 ++++++++++++++ > > > 3 files changed, 128 insertions(+) > > > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > > index 27ccae63495d..e97ba860ffcd 100644 > > > --- a/fs/fuse/dev.c > > > +++ b/fs/fuse/dev.c > > > > > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > > spin_unlock(&fc->lock); > > > > > > end_requests(&to_end); > > > + > > > + if (fc->timeout.req_timeout) > > > + cancel_delayed_work(&fc->timeout.work); > > > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). The > > workqueue job can still be running after cancel_delayed_work(), and > > since it requeues itself, this might not be enough to kill it > > completely. > > I don't think we need to synchronously cancel it when a connection is > aborted. The fuse_check_timeout() workqueue job can be simultaneously > running when cancel_delayed_work() is called and can requeue itself, > but then on the next trigger of the job, it will check whether the > connection was aborted (eg the if (!fc->connected)... return; lines in > fuse_check_timeout()) and will not requeue itself if the connection > was aborted. This seemed like the simplest / cleanest approach to me. > Is there a scenario where the next trigger of the job dereference struct fuse_conn *fc which already got freed because say the FUSE server has terminated? Thanks, Etienne
On Sun, Dec 15, 2024 at 6:35 PM Etienne Martineau <etmartin4313@gmail.com> wrote: > > On Fri, Dec 13, 2024 at 9:29 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > There are situations where fuse servers can become unresponsive or > > stuck, for example if the server is deadlocked. Currently, there's no > > good way to detect if a server is stuck and needs to be killed manually. > > > > This commit adds an option for enforcing a timeout (in seconds) for > > requests where if the timeout elapses without the server responding to > > the request, the connection will be automatically aborted. > > > > Please note that these timeouts are not 100% precise. For example, the > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > the requested timeout due to internal implementation, in order to > > mitigate overhead. > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > --- > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > fs/fuse/inode.c | 23 ++++++++++++++ > > 3 files changed, 128 insertions(+) > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > index 27ccae63495d..e97ba860ffcd 100644 > > --- a/fs/fuse/dev.c > > +++ b/fs/fuse/dev.c > > @@ -45,6 +45,85 @@ static struct fuse_dev *fuse_get_dev(struct file *file) > > return READ_ONCE(file->private_data); > > } > > > > +static bool request_expired(struct fuse_conn *fc, struct fuse_req *req) > > +{ > > + return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout); > > +} > > + > > +/* > > + * Check if any requests aren't being completed by the time the request timeout > > + * elapses. To do so, we: > > + * - check the fiq pending list > > + * - check the bg queue > > + * - check the fpq io and processing lists > > + * > > + * To make this fast, we only check against the head request on each list since > > + * these are generally queued in order of creation time (eg newer requests get > > + * queued to the tail). We might miss a few edge cases (eg requests transitioning > > + * between lists, re-sent requests at the head of the pending list having a > > + * later creation time than other requests on that list, etc.) but that is fine > > + * since if the request never gets fulfilled, it will eventually be caught. > > + */ > > +void fuse_check_timeout(struct work_struct *work) > > +{ > > + struct delayed_work *dwork = to_delayed_work(work); > > + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, > > + timeout.work); > > + struct fuse_iqueue *fiq = &fc->iq; > > + struct fuse_req *req; > > + struct fuse_dev *fud; > > + struct fuse_pqueue *fpq; > > + bool expired = false; > > + int i; > > + > > + spin_lock(&fiq->lock); > > + req = list_first_entry_or_null(&fiq->pending, struct fuse_req, list); > > + if (req) > > + expired = request_expired(fc, req); > > + spin_unlock(&fiq->lock); > > + if (expired) > > + goto abort_conn; > > + > > + spin_lock(&fc->bg_lock); > > + req = list_first_entry_or_null(&fc->bg_queue, struct fuse_req, list); > > + if (req) > > + expired = request_expired(fc, req); > > + spin_unlock(&fc->bg_lock); > > + if (expired) > > + goto abort_conn; > > + > > + spin_lock(&fc->lock); > > + if (!fc->connected) { > > + spin_unlock(&fc->lock); > > + return; > > + } > > + list_for_each_entry(fud, &fc->devices, entry) { > > + fpq = &fud->pq; > > + spin_lock(&fpq->lock); > > Can fuse_dev_release() run concurrently to this path here? > If yes say fuse_dev_release() comes in first, grab the fpq->lock and > splice the > fpq->processing[i] list into &to_end and release the fpq->lock which > unblock this > path. > > Then here we start checking req off the fpq->processing[i] list which is > getting evicted on the other side by fuse_dev_release->end_requests(&to_end); > > Maybe we need a cancel_delayed_work_sync() at the beginning of > fuse_dev_release ? Yes, fuse_dev_release() can run concurrently to this path here. If fuse_dev_release() comes in first, grabs the fpq->lock and splices the fpq->processing[i] lists into &to_end, then releases the fpq->lock, and then this fuse_check_timeout() grabs the fpq->lock, it'll see no requests on the fpq->processing[i] lists. When the requests are spliced onto the to_end list in fuse_dev_release(), they are removed from the &fpq->processing[i] list. For that reason I don't think we need a cancel_delayed_work_sync() at the beginning of fuse_dev_release(), but also a connection can have multiple devs associated with it and the workqueue job is per-connection and not per-device. Thanks, Joanne > Thanks > Etienne > > > + req = list_first_entry_or_null(&fpq->io, struct fuse_req, list); > > + if (req && request_expired(fc, req)) > > + goto fpq_abort; > > + > > + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) { > > + req = list_first_entry_or_null(&fpq->processing[i], struct fuse_req, list); > > + if (req && request_expired(fc, req)) > > + goto fpq_abort; > > + } > > + spin_unlock(&fpq->lock); > > + } > > + spin_unlock(&fc->lock); > > + > > + queue_delayed_work(system_wq, &fc->timeout.work, > > + secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ)); > > + return; > > + > > +fpq_abort: > > + spin_unlock(&fpq->lock); > > + spin_unlock(&fc->lock); > > +abort_conn: > > + fuse_abort_conn(fc); > > +} > > +
On Mon, Dec 16, 2024 at 9:51 AM Etienne Martineau <etmartin4313@gmail.com> wrote: > > On Mon, Dec 16, 2024 at 12:32 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > On Sat, Dec 14, 2024 at 4:10 AM Jeff Layton <jlayton@kernel.org> wrote: > > > > > > On Fri, 2024-12-13 at 18:28 -0800, Joanne Koong wrote: > > > > There are situations where fuse servers can become unresponsive or > > > > stuck, for example if the server is deadlocked. Currently, there's no > > > > good way to detect if a server is stuck and needs to be killed manually. > > > > > > > > This commit adds an option for enforcing a timeout (in seconds) for > > > > requests where if the timeout elapses without the server responding to > > > > the request, the connection will be automatically aborted. > > > > > > > > Please note that these timeouts are not 100% precise. For example, the > > > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > > > the requested timeout due to internal implementation, in order to > > > > mitigate overhead. > > > > > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > > > --- > > > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > > > fs/fuse/inode.c | 23 ++++++++++++++ > > > > 3 files changed, 128 insertions(+) > > > > > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > > > index 27ccae63495d..e97ba860ffcd 100644 > > > > --- a/fs/fuse/dev.c > > > > +++ b/fs/fuse/dev.c > > > > > > > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > > > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > > > spin_unlock(&fc->lock); > > > > > > > > end_requests(&to_end); > > > > + > > > > + if (fc->timeout.req_timeout) > > > > + cancel_delayed_work(&fc->timeout.work); > > > > > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). The > > > workqueue job can still be running after cancel_delayed_work(), and > > > since it requeues itself, this might not be enough to kill it > > > completely. > > > > I don't think we need to synchronously cancel it when a connection is > > aborted. The fuse_check_timeout() workqueue job can be simultaneously > > running when cancel_delayed_work() is called and can requeue itself, > > but then on the next trigger of the job, it will check whether the > > connection was aborted (eg the if (!fc->connected)... return; lines in > > fuse_check_timeout()) and will not requeue itself if the connection > > was aborted. This seemed like the simplest / cleanest approach to me. > > > Is there a scenario where the next trigger of the job dereference > struct fuse_conn *fc which already got freed because say the FUSE > server has terminated? This isn't possible because the struct fuse_conn *fc gets freed only after the call to "cancel_delayed_work_sync(&fc->timeout.work);" that synchronously cancels the workqueue job. This happens in the fuse_conn_put() function. Thanks, Joanne > Thanks, > Etienne
On Fri, Dec 13, 2024 at 10:53 PM Sergey Senozhatsky <senozhatsky@chromium.org> wrote: > > On (24/12/13 18:28), Joanne Koong wrote: > > +void fuse_check_timeout(struct work_struct *work) > > +{ > > + struct delayed_work *dwork = to_delayed_work(work); > > + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, > > + timeout.work); > > + struct fuse_iqueue *fiq = &fc->iq; > > + struct fuse_req *req; > > + struct fuse_dev *fud; > > + struct fuse_pqueue *fpq; > > + bool expired = false; > > + int i; > > + > > + spin_lock(&fiq->lock); > > + req = list_first_entry_or_null(&fiq->pending, struct fuse_req, list); > > + if (req) > > + expired = request_expired(fc, req); > > A nit: you can factor these out into a small helper > > static bool request_expired(struct fuse_conn *fc, struct list_head *list) > { > struct fuse_req *req; > > req = list_first_entry_or_null(list, struct fuse_req, list); > if (!req) > return false; > return time_after(jiffies, req->create_time + fuse_watchdog_timeout()); > } > > and just call it passing the corresponding list pointer > > abort = request_expired(fc, &fiq->pending); > > kinda makes the function look less busy. Good idea! I'll do this refactoring as part of v11. > > [..] > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > spin_unlock(&fc->lock); > > > > end_requests(&to_end); > > + > > + if (fc->timeout.req_timeout) > > + cancel_delayed_work(&fc->timeout.work); > > When fuse_abort_conn() is called not from fuse_check_timeout(), but from > somewhere else, should this use cancel_delayed_work_sync()? I left a comment about this under the reply to Jeff. Thanks, Joanne
On Mon, Dec 16, 2024 at 1:15 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > On Sun, Dec 15, 2024 at 6:35 PM Etienne Martineau > <etmartin4313@gmail.com> wrote: > > > > On Fri, Dec 13, 2024 at 9:29 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > There are situations where fuse servers can become unresponsive or > > > stuck, for example if the server is deadlocked. Currently, there's no > > > good way to detect if a server is stuck and needs to be killed manually. > > > > > > This commit adds an option for enforcing a timeout (in seconds) for > > > requests where if the timeout elapses without the server responding to > > > the request, the connection will be automatically aborted. > > > > > > Please note that these timeouts are not 100% precise. For example, the > > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > > the requested timeout due to internal implementation, in order to > > > mitigate overhead. > > > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > > --- > > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > > fs/fuse/inode.c | 23 ++++++++++++++ > > > 3 files changed, 128 insertions(+) > > > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > > index 27ccae63495d..e97ba860ffcd 100644 > > > --- a/fs/fuse/dev.c > > > +++ b/fs/fuse/dev.c > > > @@ -45,6 +45,85 @@ static struct fuse_dev *fuse_get_dev(struct file *file) > > > return READ_ONCE(file->private_data); > > > } > > > > > > +static bool request_expired(struct fuse_conn *fc, struct fuse_req *req) > > > +{ > > > + return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout); > > > +} > > > + > > > +/* > > > + * Check if any requests aren't being completed by the time the request timeout > > > + * elapses. To do so, we: > > > + * - check the fiq pending list > > > + * - check the bg queue > > > + * - check the fpq io and processing lists > > > + * > > > + * To make this fast, we only check against the head request on each list since > > > + * these are generally queued in order of creation time (eg newer requests get > > > + * queued to the tail). We might miss a few edge cases (eg requests transitioning > > > + * between lists, re-sent requests at the head of the pending list having a > > > + * later creation time than other requests on that list, etc.) but that is fine > > > + * since if the request never gets fulfilled, it will eventually be caught. > > > + */ > > > +void fuse_check_timeout(struct work_struct *work) > > > +{ > > > + struct delayed_work *dwork = to_delayed_work(work); > > > + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, > > > + timeout.work); > > > + struct fuse_iqueue *fiq = &fc->iq; > > > + struct fuse_req *req; > > > + struct fuse_dev *fud; > > > + struct fuse_pqueue *fpq; > > > + bool expired = false; > > > + int i; > > > + > > > + spin_lock(&fiq->lock); > > > + req = list_first_entry_or_null(&fiq->pending, struct fuse_req, list); > > > + if (req) > > > + expired = request_expired(fc, req); > > > + spin_unlock(&fiq->lock); > > > + if (expired) > > > + goto abort_conn; > > > + > > > + spin_lock(&fc->bg_lock); > > > + req = list_first_entry_or_null(&fc->bg_queue, struct fuse_req, list); > > > + if (req) > > > + expired = request_expired(fc, req); > > > + spin_unlock(&fc->bg_lock); > > > + if (expired) > > > + goto abort_conn; > > > + > > > + spin_lock(&fc->lock); > > > + if (!fc->connected) { > > > + spin_unlock(&fc->lock); > > > + return; > > > + } > > > + list_for_each_entry(fud, &fc->devices, entry) { > > > + fpq = &fud->pq; > > > + spin_lock(&fpq->lock); > > > > Can fuse_dev_release() run concurrently to this path here? > > If yes say fuse_dev_release() comes in first, grab the fpq->lock and > > splice the > > fpq->processing[i] list into &to_end and release the fpq->lock which > > unblock this > > path. > > > > Then here we start checking req off the fpq->processing[i] list which is > > getting evicted on the other side by fuse_dev_release->end_requests(&to_end); > > > > Maybe we need a cancel_delayed_work_sync() at the beginning of > > fuse_dev_release ? > > Yes, fuse_dev_release() can run concurrently to this path here. If > fuse_dev_release() comes in first, grabs the fpq->lock and splices the > fpq->processing[i] lists into &to_end, then releases the fpq->lock, > and then this fuse_check_timeout() grabs the fpq->lock, it'll see no > requests on the fpq->processing[i] lists. When the requests are > spliced onto the to_end list in fuse_dev_release(), they are removed > from the &fpq->processing[i] list. Yes, good point about list splice. After all, I realized that the same locking sequence is present in fuse_abort_conn() which is proven to work. ( otherwise we would have heard about race issues coming from fuse_dev_release() against concurrent fuse_conn_abort_write() ) > For that reason I don't think we need a cancel_delayed_work_sync() at > the beginning of fuse_dev_release(), but also a connection can have > multiple devs associated with it and the workqueue job is > per-connection and not per-device. Ok got it. Thanks, Etienne > > > Thanks, > Joanne > > > Thanks > > Etienne > > > > > + req = list_first_entry_or_null(&fpq->io, struct fuse_req, list); > > > + if (req && request_expired(fc, req)) > > > + goto fpq_abort; > > > + > > > + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) { > > > + req = list_first_entry_or_null(&fpq->processing[i], struct fuse_req, list); > > > + if (req && request_expired(fc, req)) > > > + goto fpq_abort; > > > + } > > > + spin_unlock(&fpq->lock); > > > + } > > > + spin_unlock(&fc->lock); > > > + > > > + queue_delayed_work(system_wq, &fc->timeout.work, > > > + secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ)); > > > + return; > > > + > > > +fpq_abort: > > > + spin_unlock(&fpq->lock); > > > + spin_unlock(&fc->lock); > > > +abort_conn: > > > + fuse_abort_conn(fc); > > > +} > > > +
On Mon, Dec 16, 2024 at 1:21 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > On Mon, Dec 16, 2024 at 9:51 AM Etienne Martineau > <etmartin4313@gmail.com> wrote: > > > > On Mon, Dec 16, 2024 at 12:32 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > On Sat, Dec 14, 2024 at 4:10 AM Jeff Layton <jlayton@kernel.org> wrote: > > > > > > > > On Fri, 2024-12-13 at 18:28 -0800, Joanne Koong wrote: > > > > > There are situations where fuse servers can become unresponsive or > > > > > stuck, for example if the server is deadlocked. Currently, there's no > > > > > good way to detect if a server is stuck and needs to be killed manually. > > > > > > > > > > This commit adds an option for enforcing a timeout (in seconds) for > > > > > requests where if the timeout elapses without the server responding to > > > > > the request, the connection will be automatically aborted. > > > > > > > > > > Please note that these timeouts are not 100% precise. For example, the > > > > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > > > > the requested timeout due to internal implementation, in order to > > > > > mitigate overhead. > > > > > > > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > > > > --- > > > > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > > > > fs/fuse/inode.c | 23 ++++++++++++++ > > > > > 3 files changed, 128 insertions(+) > > > > > > > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > > > > index 27ccae63495d..e97ba860ffcd 100644 > > > > > --- a/fs/fuse/dev.c > > > > > +++ b/fs/fuse/dev.c > > > > > > > > > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > > > > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > > > > spin_unlock(&fc->lock); > > > > > > > > > > end_requests(&to_end); > > > > > + > > > > > + if (fc->timeout.req_timeout) > > > > > + cancel_delayed_work(&fc->timeout.work); > > > > > > > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). The > > > > workqueue job can still be running after cancel_delayed_work(), and > > > > since it requeues itself, this might not be enough to kill it > > > > completely. > > > > > > I don't think we need to synchronously cancel it when a connection is > > > aborted. The fuse_check_timeout() workqueue job can be simultaneously > > > running when cancel_delayed_work() is called and can requeue itself, > > > but then on the next trigger of the job, it will check whether the > > > connection was aborted (eg the if (!fc->connected)... return; lines in > > > fuse_check_timeout()) and will not requeue itself if the connection > > > was aborted. This seemed like the simplest / cleanest approach to me. > > > > > Is there a scenario where the next trigger of the job dereference > > struct fuse_conn *fc which already got freed because say the FUSE > > server has terminated? > > This isn't possible because the struct fuse_conn *fc gets freed only > after the call to "cancel_delayed_work_sync(&fc->timeout.work);" that > synchronously cancels the workqueue job. This happens in the > fuse_conn_put() function. > cancel_delayed_work_sync() won't prevent the work from re-queuing itself if it's already running. I think we need some flag like Sergey pointed out here https://lore.kernel.org/linux-fsdevel/CAMHPp_S2ANAguT6fYfNcXjTZxU14nh2Zv=5=8dG8qUnD3F8e7A@mail.gmail.com/T/#m543550031f31a9210996ccf815d5bc2a4290f540 Maybe we don't requeue when fc->count becomes 0? Thanks, Etienne > > Thanks, > Joanne >
On Mon, Dec 16, 2024 at 2:09 PM Etienne Martineau <etmartin4313@gmail.com> wrote: > > On Mon, Dec 16, 2024 at 1:21 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > On Mon, Dec 16, 2024 at 9:51 AM Etienne Martineau > > <etmartin4313@gmail.com> wrote: > > > > > > On Mon, Dec 16, 2024 at 12:32 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > > > On Sat, Dec 14, 2024 at 4:10 AM Jeff Layton <jlayton@kernel.org> wrote: > > > > > > > > > > On Fri, 2024-12-13 at 18:28 -0800, Joanne Koong wrote: > > > > > > There are situations where fuse servers can become unresponsive or > > > > > > stuck, for example if the server is deadlocked. Currently, there's no > > > > > > good way to detect if a server is stuck and needs to be killed manually. > > > > > > > > > > > > This commit adds an option for enforcing a timeout (in seconds) for > > > > > > requests where if the timeout elapses without the server responding to > > > > > > the request, the connection will be automatically aborted. > > > > > > > > > > > > Please note that these timeouts are not 100% precise. For example, the > > > > > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > > > > > the requested timeout due to internal implementation, in order to > > > > > > mitigate overhead. > > > > > > > > > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > > > > > --- > > > > > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > > > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > > > > > fs/fuse/inode.c | 23 ++++++++++++++ > > > > > > 3 files changed, 128 insertions(+) > > > > > > > > > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > > > > > index 27ccae63495d..e97ba860ffcd 100644 > > > > > > --- a/fs/fuse/dev.c > > > > > > +++ b/fs/fuse/dev.c > > > > > > > > > > > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > > > > > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > > > > > spin_unlock(&fc->lock); > > > > > > > > > > > > end_requests(&to_end); > > > > > > + > > > > > > + if (fc->timeout.req_timeout) > > > > > > + cancel_delayed_work(&fc->timeout.work); > > > > > > > > > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). The > > > > > workqueue job can still be running after cancel_delayed_work(), and > > > > > since it requeues itself, this might not be enough to kill it > > > > > completely. > > > > > > > > I don't think we need to synchronously cancel it when a connection is > > > > aborted. The fuse_check_timeout() workqueue job can be simultaneously > > > > running when cancel_delayed_work() is called and can requeue itself, > > > > but then on the next trigger of the job, it will check whether the > > > > connection was aborted (eg the if (!fc->connected)... return; lines in > > > > fuse_check_timeout()) and will not requeue itself if the connection > > > > was aborted. This seemed like the simplest / cleanest approach to me. > > > > > > > Is there a scenario where the next trigger of the job dereference > > > struct fuse_conn *fc which already got freed because say the FUSE > > > server has terminated? > > > > This isn't possible because the struct fuse_conn *fc gets freed only > > after the call to "cancel_delayed_work_sync(&fc->timeout.work);" that > > synchronously cancels the workqueue job. This happens in the > > fuse_conn_put() function. > > > cancel_delayed_work_sync() won't prevent the work from re-queuing > itself if it's already running. > I think we need some flag like Sergey pointed out here > https://lore.kernel.org/linux-fsdevel/CAMHPp_S2ANAguT6fYfNcXjTZxU14nh2Zv=5=8dG8qUnD3F8e7A@mail.gmail.com/T/#m543550031f31a9210996ccf815d5bc2a4290f540 > Maybe we don't requeue when fc->count becomes 0? The connection will have been aborted when cancel_delayed_work_sync() is called (otherwise we will have a lot of memory crashes/leaks). If the fuse_check_timeout() workqueue job is running while cancel_delayed_work_sync() is called, there's the "if (!fc->connected) { ... return; }" path that returns and avoids requeueing. Thanks, Joanne > Thanks, > Etienne > > > > Thanks, > > Joanne > >
On Mon, Dec 16, 2024 at 8:26 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > On Mon, Dec 16, 2024 at 2:09 PM Etienne Martineau > <etmartin4313@gmail.com> wrote: > > > > On Mon, Dec 16, 2024 at 1:21 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > On Mon, Dec 16, 2024 at 9:51 AM Etienne Martineau > > > <etmartin4313@gmail.com> wrote: > > > > > > > > On Mon, Dec 16, 2024 at 12:32 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > > > > > On Sat, Dec 14, 2024 at 4:10 AM Jeff Layton <jlayton@kernel.org> wrote: > > > > > > > > > > > > On Fri, 2024-12-13 at 18:28 -0800, Joanne Koong wrote: > > > > > > > There are situations where fuse servers can become unresponsive or > > > > > > > stuck, for example if the server is deadlocked. Currently, there's no > > > > > > > good way to detect if a server is stuck and needs to be killed manually. > > > > > > > > > > > > > > This commit adds an option for enforcing a timeout (in seconds) for > > > > > > > requests where if the timeout elapses without the server responding to > > > > > > > the request, the connection will be automatically aborted. > > > > > > > > > > > > > > Please note that these timeouts are not 100% precise. For example, the > > > > > > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > > > > > > the requested timeout due to internal implementation, in order to > > > > > > > mitigate overhead. > > > > > > > > > > > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > > > > > > --- > > > > > > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > > > > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > > > > > > fs/fuse/inode.c | 23 ++++++++++++++ > > > > > > > 3 files changed, 128 insertions(+) > > > > > > > > > > > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > > > > > > index 27ccae63495d..e97ba860ffcd 100644 > > > > > > > --- a/fs/fuse/dev.c > > > > > > > +++ b/fs/fuse/dev.c > > > > > > > > > > > > > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > > > > > > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > > > > > > spin_unlock(&fc->lock); > > > > > > > > > > > > > > end_requests(&to_end); > > > > > > > + > > > > > > > + if (fc->timeout.req_timeout) > > > > > > > + cancel_delayed_work(&fc->timeout.work); > > > > > > > > > > > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). The > > > > > > workqueue job can still be running after cancel_delayed_work(), and > > > > > > since it requeues itself, this might not be enough to kill it > > > > > > completely. > > > > > > > > > > I don't think we need to synchronously cancel it when a connection is > > > > > aborted. The fuse_check_timeout() workqueue job can be simultaneously > > > > > running when cancel_delayed_work() is called and can requeue itself, > > > > > but then on the next trigger of the job, it will check whether the > > > > > connection was aborted (eg the if (!fc->connected)... return; lines in > > > > > fuse_check_timeout()) and will not requeue itself if the connection > > > > > was aborted. This seemed like the simplest / cleanest approach to me. > > > > > > > > > Is there a scenario where the next trigger of the job dereference > > > > struct fuse_conn *fc which already got freed because say the FUSE > > > > server has terminated? > > > > > > This isn't possible because the struct fuse_conn *fc gets freed only > > > after the call to "cancel_delayed_work_sync(&fc->timeout.work);" that > > > synchronously cancels the workqueue job. This happens in the > > > fuse_conn_put() function. > > > > > cancel_delayed_work_sync() won't prevent the work from re-queuing > > itself if it's already running. > > I think we need some flag like Sergey pointed out here > > https://lore.kernel.org/linux-fsdevel/CAMHPp_S2ANAguT6fYfNcXjTZxU14nh2Zv=5=8dG8qUnD3F8e7A@mail.gmail.com/T/#m543550031f31a9210996ccf815d5bc2a4290f540 > > Maybe we don't requeue when fc->count becomes 0? > > The connection will have been aborted when cancel_delayed_work_sync() > is called (otherwise we will have a lot of memory crashes/leaks). If > the fuse_check_timeout() workqueue job is running while > cancel_delayed_work_sync() is called, there's the "if (!fc->connected) > { ... return; }" path that returns and avoids requeueing. > I ran some tests and from what I see, calling cancel_delayed_work_sync() on a workqueue that is currently running and re-queueing itself is enough to kill it completely. For that reason I believe we don't even need the cancel_delayed_work() in fuse_abort_conn() because everything is taken care of by fuse_conn_put(); thanks, Etienne > > Thanks, > Joanne
On Tue, Dec 17, 2024 at 12:02 PM Etienne Martineau <etmartin4313@gmail.com> wrote: > > On Mon, Dec 16, 2024 at 8:26 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > On Mon, Dec 16, 2024 at 2:09 PM Etienne Martineau > > <etmartin4313@gmail.com> wrote: > > > > > > On Mon, Dec 16, 2024 at 1:21 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > > > On Mon, Dec 16, 2024 at 9:51 AM Etienne Martineau > > > > <etmartin4313@gmail.com> wrote: > > > > > > > > > > On Mon, Dec 16, 2024 at 12:32 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > > > > > > > On Sat, Dec 14, 2024 at 4:10 AM Jeff Layton <jlayton@kernel.org> wrote: > > > > > > > > > > > > > > On Fri, 2024-12-13 at 18:28 -0800, Joanne Koong wrote: > > > > > > > > There are situations where fuse servers can become unresponsive or > > > > > > > > stuck, for example if the server is deadlocked. Currently, there's no > > > > > > > > good way to detect if a server is stuck and needs to be killed manually. > > > > > > > > > > > > > > > > This commit adds an option for enforcing a timeout (in seconds) for > > > > > > > > requests where if the timeout elapses without the server responding to > > > > > > > > the request, the connection will be automatically aborted. > > > > > > > > > > > > > > > > Please note that these timeouts are not 100% precise. For example, the > > > > > > > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > > > > > > > the requested timeout due to internal implementation, in order to > > > > > > > > mitigate overhead. > > > > > > > > > > > > > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > > > > > > > --- > > > > > > > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > > > > > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > > > > > > > fs/fuse/inode.c | 23 ++++++++++++++ > > > > > > > > 3 files changed, 128 insertions(+) > > > > > > > > > > > > > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > > > > > > > index 27ccae63495d..e97ba860ffcd 100644 > > > > > > > > --- a/fs/fuse/dev.c > > > > > > > > +++ b/fs/fuse/dev.c > > > > > > > > > > > > > > > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > > > > > > > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > > > > > > > spin_unlock(&fc->lock); > > > > > > > > > > > > > > > > end_requests(&to_end); > > > > > > > > + > > > > > > > > + if (fc->timeout.req_timeout) > > > > > > > > + cancel_delayed_work(&fc->timeout.work); > > > > > > > > > > > > > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). The > > > > > > > workqueue job can still be running after cancel_delayed_work(), and > > > > > > > since it requeues itself, this might not be enough to kill it > > > > > > > completely. > > > > > > > > > > > > I don't think we need to synchronously cancel it when a connection is > > > > > > aborted. The fuse_check_timeout() workqueue job can be simultaneously > > > > > > running when cancel_delayed_work() is called and can requeue itself, > > > > > > but then on the next trigger of the job, it will check whether the > > > > > > connection was aborted (eg the if (!fc->connected)... return; lines in > > > > > > fuse_check_timeout()) and will not requeue itself if the connection > > > > > > was aborted. This seemed like the simplest / cleanest approach to me. > > > > > > > > > > > Is there a scenario where the next trigger of the job dereference > > > > > struct fuse_conn *fc which already got freed because say the FUSE > > > > > server has terminated? > > > > > > > > This isn't possible because the struct fuse_conn *fc gets freed only > > > > after the call to "cancel_delayed_work_sync(&fc->timeout.work);" that > > > > synchronously cancels the workqueue job. This happens in the > > > > fuse_conn_put() function. > > > > > > > cancel_delayed_work_sync() won't prevent the work from re-queuing > > > itself if it's already running. > > > I think we need some flag like Sergey pointed out here > > > https://lore.kernel.org/linux-fsdevel/CAMHPp_S2ANAguT6fYfNcXjTZxU14nh2Zv=5=8dG8qUnD3F8e7A@mail.gmail.com/T/#m543550031f31a9210996ccf815d5bc2a4290f540 > > > Maybe we don't requeue when fc->count becomes 0? > > > > The connection will have been aborted when cancel_delayed_work_sync() > > is called (otherwise we will have a lot of memory crashes/leaks). If > > the fuse_check_timeout() workqueue job is running while > > cancel_delayed_work_sync() is called, there's the "if (!fc->connected) > > { ... return; }" path that returns and avoids requeueing. > > > I ran some tests and from what I see, calling > cancel_delayed_work_sync() on a workqueue that is currently running > and re-queueing itself is enough to kill it completely. For that > reason I believe we don't even need the cancel_delayed_work() in > fuse_abort_conn() because everything is taken care of by > fuse_conn_put(); I think the cancel_delayed_work() in fuse_abort_conn() would still be good to have. There are some instances where the connection gets aborted but the connection doesn't get freed (eg user forgets to unmount the fuse filesystem or the unmount only happens a lot later). When the connection is aborted however, this will automatically cancel the workqueue job on the next run (on the next run, the job won't requeue itself if it sees that the connection was aborted) so we technically don't need the cancel_delayed_work() because of this, but imo it'd be good to minimize the number of workqueue jobs that get run and canceling it asap is preferable. Thanks, Joanne > thanks, > Etienne > > > > > Thanks, > > Joanne
On Tue, Dec 17, 2024 at 3:37 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > On Tue, Dec 17, 2024 at 12:02 PM Etienne Martineau > <etmartin4313@gmail.com> wrote: > > > > On Mon, Dec 16, 2024 at 8:26 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > On Mon, Dec 16, 2024 at 2:09 PM Etienne Martineau > > > <etmartin4313@gmail.com> wrote: > > > > > > > > On Mon, Dec 16, 2024 at 1:21 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > > > > > On Mon, Dec 16, 2024 at 9:51 AM Etienne Martineau > > > > > <etmartin4313@gmail.com> wrote: > > > > > > > > > > > > On Mon, Dec 16, 2024 at 12:32 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > > > > > > > > > On Sat, Dec 14, 2024 at 4:10 AM Jeff Layton <jlayton@kernel.org> wrote: > > > > > > > > > > > > > > > > On Fri, 2024-12-13 at 18:28 -0800, Joanne Koong wrote: > > > > > > > > > There are situations where fuse servers can become unresponsive or > > > > > > > > > stuck, for example if the server is deadlocked. Currently, there's no > > > > > > > > > good way to detect if a server is stuck and needs to be killed manually. > > > > > > > > > > > > > > > > > > This commit adds an option for enforcing a timeout (in seconds) for > > > > > > > > > requests where if the timeout elapses without the server responding to > > > > > > > > > the request, the connection will be automatically aborted. > > > > > > > > > > > > > > > > > > Please note that these timeouts are not 100% precise. For example, the > > > > > > > > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > > > > > > > > the requested timeout due to internal implementation, in order to > > > > > > > > > mitigate overhead. > > > > > > > > > > > > > > > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > > > > > > > > --- > > > > > > > > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > > > > > > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > > > > > > > > fs/fuse/inode.c | 23 ++++++++++++++ > > > > > > > > > 3 files changed, 128 insertions(+) > > > > > > > > > > > > > > > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > > > > > > > > index 27ccae63495d..e97ba860ffcd 100644 > > > > > > > > > --- a/fs/fuse/dev.c > > > > > > > > > +++ b/fs/fuse/dev.c > > > > > > > > > > > > > > > > > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > > > > > > > > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > > > > > > > > spin_unlock(&fc->lock); > > > > > > > > > > > > > > > > > > end_requests(&to_end); > > > > > > > > > + > > > > > > > > > + if (fc->timeout.req_timeout) > > > > > > > > > + cancel_delayed_work(&fc->timeout.work); > > > > > > > > > > > > > > > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). The > > > > > > > > workqueue job can still be running after cancel_delayed_work(), and > > > > > > > > since it requeues itself, this might not be enough to kill it > > > > > > > > completely. > > > > > > > > > > > > > > I don't think we need to synchronously cancel it when a connection is > > > > > > > aborted. The fuse_check_timeout() workqueue job can be simultaneously > > > > > > > running when cancel_delayed_work() is called and can requeue itself, > > > > > > > but then on the next trigger of the job, it will check whether the > > > > > > > connection was aborted (eg the if (!fc->connected)... return; lines in > > > > > > > fuse_check_timeout()) and will not requeue itself if the connection > > > > > > > was aborted. This seemed like the simplest / cleanest approach to me. > > > > > > > > > > > > > Is there a scenario where the next trigger of the job dereference > > > > > > struct fuse_conn *fc which already got freed because say the FUSE > > > > > > server has terminated? > > > > > > > > > > This isn't possible because the struct fuse_conn *fc gets freed only > > > > > after the call to "cancel_delayed_work_sync(&fc->timeout.work);" that > > > > > synchronously cancels the workqueue job. This happens in the > > > > > fuse_conn_put() function. > > > > > > > > > cancel_delayed_work_sync() won't prevent the work from re-queuing > > > > itself if it's already running. > > > > I think we need some flag like Sergey pointed out here > > > > https://lore.kernel.org/linux-fsdevel/CAMHPp_S2ANAguT6fYfNcXjTZxU14nh2Zv=5=8dG8qUnD3F8e7A@mail.gmail.com/T/#m543550031f31a9210996ccf815d5bc2a4290f540 > > > > Maybe we don't requeue when fc->count becomes 0? > > > > > > The connection will have been aborted when cancel_delayed_work_sync() > > > is called (otherwise we will have a lot of memory crashes/leaks). If > > > the fuse_check_timeout() workqueue job is running while > > > cancel_delayed_work_sync() is called, there's the "if (!fc->connected) > > > { ... return; }" path that returns and avoids requeueing. > > > > > I ran some tests and from what I see, calling > > cancel_delayed_work_sync() on a workqueue that is currently running > > and re-queueing itself is enough to kill it completely. For that > > reason I believe we don't even need the cancel_delayed_work() in > > fuse_abort_conn() because everything is taken care of by > > fuse_conn_put(); > > I think the cancel_delayed_work() in fuse_abort_conn() would still be > good to have. There are some instances where the connection gets > aborted but the connection doesn't get freed (eg user forgets to > unmount the fuse filesystem or the unmount only happens a lot later). > When the connection is aborted however, this will automatically cancel > the workqueue job on the next run (on the next run, the job won't > requeue itself if it sees that the connection was aborted) so we > technically don't need the cancel_delayed_work() because of this, but > imo it'd be good to minimize the number of workqueue jobs that get run > and canceling it asap is preferable. > Ok, it makes sense. Also in fuse_check_timeout() does it make sense to leverage fc->num_waiting to save some cycle in the function? Something like: diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index e97ba860ffcd..344af61124f4 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -97,6 +97,10 @@ void fuse_check_timeout(struct work_struct *work) spin_unlock(&fc->lock); return; } + if (!fc->num_waiting){ + spin_unlock(&fc->lock); + goto out; + } list_for_each_entry(fud, &fc->devices, entry) { fpq = &fud->pq; spin_lock(&fpq->lock); @@ -113,6 +117,7 @@ void fuse_check_timeout(struct work_struct *work) } spin_unlock(&fc->lock); +out: queue_delayed_work(system_wq, &fc->timeout.work, secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ)); return; thanks Etienne > > Thanks, > Joanne >
On Wed, Dec 18, 2024 at 7:32 AM Etienne Martineau <etmartin4313@gmail.com> wrote: > > On Tue, Dec 17, 2024 at 3:37 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > On Tue, Dec 17, 2024 at 12:02 PM Etienne Martineau > > <etmartin4313@gmail.com> wrote: > > > > > > On Mon, Dec 16, 2024 at 8:26 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > > > On Mon, Dec 16, 2024 at 2:09 PM Etienne Martineau > > > > <etmartin4313@gmail.com> wrote: > > > > > > > > > > On Mon, Dec 16, 2024 at 1:21 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > > > > > > > On Mon, Dec 16, 2024 at 9:51 AM Etienne Martineau > > > > > > <etmartin4313@gmail.com> wrote: > > > > > > > > > > > > > > On Mon, Dec 16, 2024 at 12:32 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > > > > > > > > > > > On Sat, Dec 14, 2024 at 4:10 AM Jeff Layton <jlayton@kernel.org> wrote: > > > > > > > > > > > > > > > > > > On Fri, 2024-12-13 at 18:28 -0800, Joanne Koong wrote: > > > > > > > > > > There are situations where fuse servers can become unresponsive or > > > > > > > > > > stuck, for example if the server is deadlocked. Currently, there's no > > > > > > > > > > good way to detect if a server is stuck and needs to be killed manually. > > > > > > > > > > > > > > > > > > > > This commit adds an option for enforcing a timeout (in seconds) for > > > > > > > > > > requests where if the timeout elapses without the server responding to > > > > > > > > > > the request, the connection will be automatically aborted. > > > > > > > > > > > > > > > > > > > > Please note that these timeouts are not 100% precise. For example, the > > > > > > > > > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > > > > > > > > > the requested timeout due to internal implementation, in order to > > > > > > > > > > mitigate overhead. > > > > > > > > > > > > > > > > > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > > > > > > > > > --- > > > > > > > > > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > > > > > > > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > > > > > > > > > fs/fuse/inode.c | 23 ++++++++++++++ > > > > > > > > > > 3 files changed, 128 insertions(+) > > > > > > > > > > > > > > > > > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > > > > > > > > > index 27ccae63495d..e97ba860ffcd 100644 > > > > > > > > > > --- a/fs/fuse/dev.c > > > > > > > > > > +++ b/fs/fuse/dev.c > > > > > > > > > > > > > > > > > > > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > > > > > > > > > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > > > > > > > > > spin_unlock(&fc->lock); > > > > > > > > > > > > > > > > > > > > end_requests(&to_end); > > > > > > > > > > + > > > > > > > > > > + if (fc->timeout.req_timeout) > > > > > > > > > > + cancel_delayed_work(&fc->timeout.work); > > > > > > > > > > > > > > > > > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). The > > > > > > > > > workqueue job can still be running after cancel_delayed_work(), and > > > > > > > > > since it requeues itself, this might not be enough to kill it > > > > > > > > > completely. > > > > > > > > > > > > > > > > I don't think we need to synchronously cancel it when a connection is > > > > > > > > aborted. The fuse_check_timeout() workqueue job can be simultaneously > > > > > > > > running when cancel_delayed_work() is called and can requeue itself, > > > > > > > > but then on the next trigger of the job, it will check whether the > > > > > > > > connection was aborted (eg the if (!fc->connected)... return; lines in > > > > > > > > fuse_check_timeout()) and will not requeue itself if the connection > > > > > > > > was aborted. This seemed like the simplest / cleanest approach to me. > > > > > > > > > > > > > > > Is there a scenario where the next trigger of the job dereference > > > > > > > struct fuse_conn *fc which already got freed because say the FUSE > > > > > > > server has terminated? > > > > > > > > > > > > This isn't possible because the struct fuse_conn *fc gets freed only > > > > > > after the call to "cancel_delayed_work_sync(&fc->timeout.work);" that > > > > > > synchronously cancels the workqueue job. This happens in the > > > > > > fuse_conn_put() function. > > > > > > > > > > > cancel_delayed_work_sync() won't prevent the work from re-queuing > > > > > itself if it's already running. > > > > > I think we need some flag like Sergey pointed out here > > > > > https://lore.kernel.org/linux-fsdevel/CAMHPp_S2ANAguT6fYfNcXjTZxU14nh2Zv=5=8dG8qUnD3F8e7A@mail.gmail.com/T/#m543550031f31a9210996ccf815d5bc2a4290f540 > > > > > Maybe we don't requeue when fc->count becomes 0? > > > > > > > > The connection will have been aborted when cancel_delayed_work_sync() > > > > is called (otherwise we will have a lot of memory crashes/leaks). If > > > > the fuse_check_timeout() workqueue job is running while > > > > cancel_delayed_work_sync() is called, there's the "if (!fc->connected) > > > > { ... return; }" path that returns and avoids requeueing. > > > > > > > I ran some tests and from what I see, calling > > > cancel_delayed_work_sync() on a workqueue that is currently running > > > and re-queueing itself is enough to kill it completely. For that > > > reason I believe we don't even need the cancel_delayed_work() in > > > fuse_abort_conn() because everything is taken care of by > > > fuse_conn_put(); > > > > I think the cancel_delayed_work() in fuse_abort_conn() would still be > > good to have. There are some instances where the connection gets > > aborted but the connection doesn't get freed (eg user forgets to > > unmount the fuse filesystem or the unmount only happens a lot later). > > When the connection is aborted however, this will automatically cancel > > the workqueue job on the next run (on the next run, the job won't > > requeue itself if it sees that the connection was aborted) so we > > technically don't need the cancel_delayed_work() because of this, but > > imo it'd be good to minimize the number of workqueue jobs that get run > > and canceling it asap is preferable. > > > Ok, it makes sense. > Also in fuse_check_timeout() does it make sense to leverage > fc->num_waiting to save some cycle in the function? > Something like: > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index e97ba860ffcd..344af61124f4 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -97,6 +97,10 @@ void fuse_check_timeout(struct work_struct *work) > spin_unlock(&fc->lock); > return; > } > + if (!fc->num_waiting){ > + spin_unlock(&fc->lock); > + goto out; > + } > list_for_each_entry(fud, &fc->devices, entry) { > fpq = &fud->pq; > spin_lock(&fpq->lock); > @@ -113,6 +117,7 @@ void fuse_check_timeout(struct work_struct *work) > } > spin_unlock(&fc->lock); > > +out: > queue_delayed_work(system_wq, &fc->timeout.work, > secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ)); > return; > I like this idea and it makes sense to me. I think "fc->num_waiting" needs to be atomically read though, which doesn't depend on holding the fc lock, so we could do this check at the top of the function before grabbing the fiq lock and checking any expirations. I'll incorporate this into v11. Thanks, Joanne > thanks > Etienne > > > > > Thanks, > > Joanne > >
On Mon, Dec 16, 2024 at 2:09 PM Etienne Martineau <etmartin4313@gmail.com> wrote: > > On Mon, Dec 16, 2024 at 1:21 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > On Mon, Dec 16, 2024 at 9:51 AM Etienne Martineau > > <etmartin4313@gmail.com> wrote: > > > > > > On Mon, Dec 16, 2024 at 12:32 PM Joanne Koong <joannelkoong@gmail.com> wrote: > > > > > > > > On Sat, Dec 14, 2024 at 4:10 AM Jeff Layton <jlayton@kernel.org> wrote: > > > > > > > > > > On Fri, 2024-12-13 at 18:28 -0800, Joanne Koong wrote: > > > > > > There are situations where fuse servers can become unresponsive or > > > > > > stuck, for example if the server is deadlocked. Currently, there's no > > > > > > good way to detect if a server is stuck and needs to be killed manually. > > > > > > > > > > > > This commit adds an option for enforcing a timeout (in seconds) for > > > > > > requests where if the timeout elapses without the server responding to > > > > > > the request, the connection will be automatically aborted. > > > > > > > > > > > > Please note that these timeouts are not 100% precise. For example, the > > > > > > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond > > > > > > the requested timeout due to internal implementation, in order to > > > > > > mitigate overhead. > > > > > > > > > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > > > > > --- > > > > > > fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > > > > fs/fuse/fuse_i.h | 22 +++++++++++++ > > > > > > fs/fuse/inode.c | 23 ++++++++++++++ > > > > > > 3 files changed, 128 insertions(+) > > > > > > > > > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > > > > > index 27ccae63495d..e97ba860ffcd 100644 > > > > > > --- a/fs/fuse/dev.c > > > > > > +++ b/fs/fuse/dev.c > > > > > > > > > > > > static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) > > > > > > @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) > > > > > > spin_unlock(&fc->lock); > > > > > > > > > > > > end_requests(&to_end); > > > > > > + > > > > > > + if (fc->timeout.req_timeout) > > > > > > + cancel_delayed_work(&fc->timeout.work); > > > > > > > > > > As Sergey pointed out, this should be a cancel_delayed_work_sync(). The > > > > > workqueue job can still be running after cancel_delayed_work(), and > > > > > since it requeues itself, this might not be enough to kill it > > > > > completely. > > > > > > > > I don't think we need to synchronously cancel it when a connection is > > > > aborted. The fuse_check_timeout() workqueue job can be simultaneously > > > > running when cancel_delayed_work() is called and can requeue itself, > > > > but then on the next trigger of the job, it will check whether the > > > > connection was aborted (eg the if (!fc->connected)... return; lines in > > > > fuse_check_timeout()) and will not requeue itself if the connection > > > > was aborted. This seemed like the simplest / cleanest approach to me. > > > > > > > Is there a scenario where the next trigger of the job dereference > > > struct fuse_conn *fc which already got freed because say the FUSE > > > server has terminated? > > > > This isn't possible because the struct fuse_conn *fc gets freed only > > after the call to "cancel_delayed_work_sync(&fc->timeout.work);" that > > synchronously cancels the workqueue job. This happens in the > > fuse_conn_put() function. > > > cancel_delayed_work_sync() won't prevent the work from re-queuing > itself if it's already running. Also btw, I think cancel_delayed_work_sync() does actually prevent the work from re-queuing itself if it's already running. The api comment (in kernel/workqueue.c) says: * Cancel @work and wait for its execution to finish. This function can be used * even if the work re-queues itself or migrates to another workqueue. On return * from this function, @work is guaranteed to be not pending or executing on any * CPU as long as there aren't racing enqueues. > I think we need some flag like Sergey pointed out here > https://lore.kernel.org/linux-fsdevel/CAMHPp_S2ANAguT6fYfNcXjTZxU14nh2Zv=5=8dG8qUnD3F8e7A@mail.gmail.com/T/#m543550031f31a9210996ccf815d5bc2a4290f540 > Maybe we don't requeue when fc->count becomes 0? > Thanks, > Etienne > > > > Thanks, > > Joanne > >
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 27ccae63495d..e97ba860ffcd 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -45,6 +45,85 @@ static struct fuse_dev *fuse_get_dev(struct file *file) return READ_ONCE(file->private_data); } +static bool request_expired(struct fuse_conn *fc, struct fuse_req *req) +{ + return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout); +} + +/* + * Check if any requests aren't being completed by the time the request timeout + * elapses. To do so, we: + * - check the fiq pending list + * - check the bg queue + * - check the fpq io and processing lists + * + * To make this fast, we only check against the head request on each list since + * these are generally queued in order of creation time (eg newer requests get + * queued to the tail). We might miss a few edge cases (eg requests transitioning + * between lists, re-sent requests at the head of the pending list having a + * later creation time than other requests on that list, etc.) but that is fine + * since if the request never gets fulfilled, it will eventually be caught. + */ +void fuse_check_timeout(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct fuse_conn *fc = container_of(dwork, struct fuse_conn, + timeout.work); + struct fuse_iqueue *fiq = &fc->iq; + struct fuse_req *req; + struct fuse_dev *fud; + struct fuse_pqueue *fpq; + bool expired = false; + int i; + + spin_lock(&fiq->lock); + req = list_first_entry_or_null(&fiq->pending, struct fuse_req, list); + if (req) + expired = request_expired(fc, req); + spin_unlock(&fiq->lock); + if (expired) + goto abort_conn; + + spin_lock(&fc->bg_lock); + req = list_first_entry_or_null(&fc->bg_queue, struct fuse_req, list); + if (req) + expired = request_expired(fc, req); + spin_unlock(&fc->bg_lock); + if (expired) + goto abort_conn; + + spin_lock(&fc->lock); + if (!fc->connected) { + spin_unlock(&fc->lock); + return; + } + list_for_each_entry(fud, &fc->devices, entry) { + fpq = &fud->pq; + spin_lock(&fpq->lock); + req = list_first_entry_or_null(&fpq->io, struct fuse_req, list); + if (req && request_expired(fc, req)) + goto fpq_abort; + + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) { + req = list_first_entry_or_null(&fpq->processing[i], struct fuse_req, list); + if (req && request_expired(fc, req)) + goto fpq_abort; + } + spin_unlock(&fpq->lock); + } + spin_unlock(&fc->lock); + + queue_delayed_work(system_wq, &fc->timeout.work, + secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ)); + return; + +fpq_abort: + spin_unlock(&fpq->lock); + spin_unlock(&fc->lock); +abort_conn: + fuse_abort_conn(fc); +} + static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) { INIT_LIST_HEAD(&req->list); @@ -53,6 +132,7 @@ static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) refcount_set(&req->count, 1); __set_bit(FR_PENDING, &req->flags); req->fm = fm; + req->create_time = jiffies; } static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) @@ -2308,6 +2388,9 @@ void fuse_abort_conn(struct fuse_conn *fc) spin_unlock(&fc->lock); end_requests(&to_end); + + if (fc->timeout.req_timeout) + cancel_delayed_work(&fc->timeout.work); } else { spin_unlock(&fc->lock); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 74744c6f2860..26eb00e5f043 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -438,6 +438,9 @@ struct fuse_req { /** fuse_mount this request belongs to */ struct fuse_mount *fm; + + /** When (in jiffies) the request was created */ + unsigned long create_time; }; struct fuse_iqueue; @@ -528,6 +531,17 @@ struct fuse_pqueue { struct list_head io; }; +/* Frequency (in seconds) of request timeout checks, if opted into */ +#define FUSE_TIMEOUT_TIMER_FREQ 15 + +struct fuse_timeout { + /* Worker for checking if any requests have timed out */ + struct delayed_work work; + + /* Request timeout (in jiffies). 0 = no timeout */ + unsigned long req_timeout; +}; + /** * Fuse device instance */ @@ -574,6 +588,8 @@ struct fuse_fs_context { enum fuse_dax_mode dax_mode; unsigned int max_read; unsigned int blksize; + /* Request timeout (in seconds). 0 = no timeout (infinite wait) */ + unsigned int req_timeout; const char *subtype; /* DAX device, may be NULL */ @@ -923,6 +939,9 @@ struct fuse_conn { /** IDR for backing files ids */ struct idr backing_files_map; #endif + + /** Only used if the connection enforces request timeouts */ + struct fuse_timeout timeout; }; /* @@ -1191,6 +1210,9 @@ void fuse_request_end(struct fuse_req *req); void fuse_abort_conn(struct fuse_conn *fc); void fuse_wait_aborted(struct fuse_conn *fc); +/* Check if any requests timed out */ +void fuse_check_timeout(struct work_struct *work); + /** * Invalidate inode attributes */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 3ce4f4e81d09..02dac88d922e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -765,6 +765,7 @@ enum { OPT_ALLOW_OTHER, OPT_MAX_READ, OPT_BLKSIZE, + OPT_REQUEST_TIMEOUT, OPT_ERR }; @@ -779,6 +780,7 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = { fsparam_u32 ("max_read", OPT_MAX_READ), fsparam_u32 ("blksize", OPT_BLKSIZE), fsparam_string ("subtype", OPT_SUBTYPE), + fsparam_u32 ("request_timeout", OPT_REQUEST_TIMEOUT), {} }; @@ -874,6 +876,10 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param) ctx->blksize = result.uint_32; break; + case OPT_REQUEST_TIMEOUT: + ctx->req_timeout = result.uint_32; + break; + default: return -EINVAL; } @@ -1004,6 +1010,8 @@ void fuse_conn_put(struct fuse_conn *fc) if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_conn_free(fc); + if (fc->timeout.req_timeout) + cancel_delayed_work_sync(&fc->timeout.work); if (fiq->ops->release) fiq->ops->release(fiq); put_pid_ns(fc->pid_ns); @@ -1723,6 +1731,20 @@ int fuse_init_fs_context_submount(struct fs_context *fsc) } EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount); +static void fuse_init_fc_timeout(struct fuse_conn *fc, struct fuse_fs_context *ctx) +{ + if (ctx->req_timeout) { + if (check_mul_overflow(ctx->req_timeout, HZ, &fc->timeout.req_timeout)) + fc->timeout.req_timeout = ULONG_MAX; + + INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout); + queue_delayed_work(system_wq, &fc->timeout.work, + secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ)); + } else { + fc->timeout.req_timeout = 0; + } +} + int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) { struct fuse_dev *fud = NULL; @@ -1785,6 +1807,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) fc->destroy = ctx->destroy; fc->no_control = ctx->no_control; fc->no_force_umount = ctx->no_force_umount; + fuse_init_fc_timeout(fc, ctx); err = -ENOMEM; root = fuse_get_root_inode(sb, ctx->rootmode);
There are situations where fuse servers can become unresponsive or stuck, for example if the server is deadlocked. Currently, there's no good way to detect if a server is stuck and needs to be killed manually. This commit adds an option for enforcing a timeout (in seconds) for requests where if the timeout elapses without the server responding to the request, the connection will be automatically aborted. Please note that these timeouts are not 100% precise. For example, the request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond the requested timeout due to internal implementation, in order to mitigate overhead. Signed-off-by: Joanne Koong <joannelkoong@gmail.com> --- fs/fuse/dev.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/fuse/fuse_i.h | 22 +++++++++++++ fs/fuse/inode.c | 23 ++++++++++++++ 3 files changed, 128 insertions(+)