diff mbox series

[v11,1/2] fuse: add kernel-enforced timeout option for requests

Message ID 20241218222630.99920-2-joannelkoong@gmail.com (mailing list archive)
State New
Headers show
Series fuse: add kernel-enforced request timeout option | expand

Commit Message

Joanne Koong Dec. 18, 2024, 10:26 p.m. UTC
There are situations where fuse servers can become unresponsive or
stuck, for example if the server is deadlocked. Currently, there's no
good way to detect if a server is stuck and needs to be killed manually.

This commit adds an option for enforcing a timeout (in seconds) for
requests where if the timeout elapses without the server responding to
the request, the connection will be automatically aborted.

Please note that these timeouts are not 100% precise. For example, the
request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond
the requested timeout due to internal implementation, in order to
mitigate overhead.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 fs/fuse/dev.c    | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/fuse_i.h | 22 +++++++++++++
 fs/fuse/inode.c  | 23 +++++++++++++
 3 files changed, 130 insertions(+)

Comments

Jeff Layton Dec. 19, 2024, 12:19 p.m. UTC | #1
On Wed, 2024-12-18 at 14:26 -0800, Joanne Koong wrote:
> There are situations where fuse servers can become unresponsive or
> stuck, for example if the server is deadlocked. Currently, there's no
> good way to detect if a server is stuck and needs to be killed manually.
> 
> This commit adds an option for enforcing a timeout (in seconds) for
> requests where if the timeout elapses without the server responding to
> the request, the connection will be automatically aborted.
> 
> Please note that these timeouts are not 100% precise. For example, the
> request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond
> the requested timeout due to internal implementation, in order to
> mitigate overhead.
> 
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
>  fs/fuse/dev.c    | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/fuse/fuse_i.h | 22 +++++++++++++
>  fs/fuse/inode.c  | 23 +++++++++++++
>  3 files changed, 130 insertions(+)
> 
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 27ccae63495d..bcf8a7994944 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -45,6 +45,87 @@ static struct fuse_dev *fuse_get_dev(struct file *file)
>  	return READ_ONCE(file->private_data);
>  }
>  
> +static bool request_expired(struct fuse_conn *fc, struct list_head *list)
> +{
> +	struct fuse_req *req;
> +
> +	req = list_first_entry_or_null(list, struct fuse_req, list);
> +	if (!req)
> +		return false;
> +	return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout);
> +}

Shouldn't that be time_is_after_jiffies() ? This is going to return
true when you've not yet hit the timeout, no?

> +
> +/*
> + * Check if any requests aren't being completed by the time the request timeout
> + * elapses. To do so, we:
> + * - check the fiq pending list
> + * - check the bg queue
> + * - check the fpq io and processing lists
> + *
> + * To make this fast, we only check against the head request on each list since
> + * these are generally queued in order of creation time (eg newer requests get
> + * queued to the tail). We might miss a few edge cases (eg requests transitioning
> + * between lists, re-sent requests at the head of the pending list having a
> + * later creation time than other requests on that list, etc.) but that is fine
> + * since if the request never gets fulfilled, it will eventually be caught.
> + */
> +void fuse_check_timeout(struct work_struct *work)
> +{
> +	struct delayed_work *dwork = to_delayed_work(work);
> +	struct fuse_conn *fc = container_of(dwork, struct fuse_conn,
> +					    timeout.work);
> +	struct fuse_iqueue *fiq = &fc->iq;
> +	struct fuse_dev *fud;
> +	struct fuse_pqueue *fpq;
> +	bool expired = false;
> +	int i;
> +
> +	if (!atomic_read(&fc->num_waiting))
> +	    goto out;
> +

This is fine for an initial pass, but it might be more interesting to
not queue the workqueue job at all when there is nothing in flight.

> +	spin_lock(&fiq->lock);
> +	expired = request_expired(fc, &fiq->pending);
> +	spin_unlock(&fiq->lock);
> +	if (expired)
> +		goto abort_conn;
> +
> +	spin_lock(&fc->bg_lock);
> +	expired = request_expired(fc, &fc->bg_queue);
> +	spin_unlock(&fc->bg_lock);
> +	if (expired)
> +		goto abort_conn;
> +
> +	spin_lock(&fc->lock);
> +	if (!fc->connected) {
> +		spin_unlock(&fc->lock);
> +		return;
> +	}
> +	list_for_each_entry(fud, &fc->devices, entry) {
> +		fpq = &fud->pq;
> +		spin_lock(&fpq->lock);
> +		if (request_expired(fc, &fpq->io))
> +			goto fpq_abort;
> +
> +		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) {
> +			if (request_expired(fc, &fpq->processing[i]))
> +				goto fpq_abort;
> +		}
> +		spin_unlock(&fpq->lock);
> +	}
> +	spin_unlock(&fc->lock);
> +
> +out:
> +	queue_delayed_work(system_wq, &fc->timeout.work,
> +			   secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ));
> +	return;
> +
> +fpq_abort:
> +	spin_unlock(&fpq->lock);
> +	spin_unlock(&fc->lock);
> +abort_conn:
> +	fuse_abort_conn(fc);
> +}
> +
>  static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
>  {
>  	INIT_LIST_HEAD(&req->list);
> @@ -53,6 +134,7 @@ static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
>  	refcount_set(&req->count, 1);
>  	__set_bit(FR_PENDING, &req->flags);
>  	req->fm = fm;
> +	req->create_time = jiffies;
>  }
>  
>  static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
> @@ -2260,6 +2342,9 @@ void fuse_abort_conn(struct fuse_conn *fc)
>  		LIST_HEAD(to_end);
>  		unsigned int i;
>  
> +		if (fc->timeout.req_timeout)
> +			cancel_delayed_work(&fc->timeout.work);
> +
>  		/* Background queuing checks fc->connected under bg_lock */
>  		spin_lock(&fc->bg_lock);
>  		fc->connected = 0;
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 74744c6f2860..26eb00e5f043 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -438,6 +438,9 @@ struct fuse_req {
>  
>  	/** fuse_mount this request belongs to */
>  	struct fuse_mount *fm;
> +
> +	/** When (in jiffies) the request was created */
> +	unsigned long create_time;
>  };
>  
>  struct fuse_iqueue;
> @@ -528,6 +531,17 @@ struct fuse_pqueue {
>  	struct list_head io;
>  };
>  
> +/* Frequency (in seconds) of request timeout checks, if opted into */
> +#define FUSE_TIMEOUT_TIMER_FREQ 15
> +
> +struct fuse_timeout {
> +	/* Worker for checking if any requests have timed out */
> +	struct delayed_work work;
> +
> +	/* Request timeout (in jiffies). 0 = no timeout */
> +	unsigned long req_timeout;
> +};
> +
>  /**
>   * Fuse device instance
>   */
> @@ -574,6 +588,8 @@ struct fuse_fs_context {
>  	enum fuse_dax_mode dax_mode;
>  	unsigned int max_read;
>  	unsigned int blksize;
> +	/*  Request timeout (in seconds). 0 = no timeout (infinite wait) */
> +	unsigned int req_timeout;
>  	const char *subtype;
>  
>  	/* DAX device, may be NULL */
> @@ -923,6 +939,9 @@ struct fuse_conn {
>  	/** IDR for backing files ids */
>  	struct idr backing_files_map;
>  #endif
> +
> +	/** Only used if the connection enforces request timeouts */
> +	struct fuse_timeout timeout;
>  };
>  
>  /*
> @@ -1191,6 +1210,9 @@ void fuse_request_end(struct fuse_req *req);
>  void fuse_abort_conn(struct fuse_conn *fc);
>  void fuse_wait_aborted(struct fuse_conn *fc);
>  
> +/* Check if any requests timed out */
> +void fuse_check_timeout(struct work_struct *work);
> +
>  /**
>   * Invalidate inode attributes
>   */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 3ce4f4e81d09..02dac88d922e 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -765,6 +765,7 @@ enum {
>  	OPT_ALLOW_OTHER,
>  	OPT_MAX_READ,
>  	OPT_BLKSIZE,
> +	OPT_REQUEST_TIMEOUT,
>  	OPT_ERR
>  };
>  
> @@ -779,6 +780,7 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = {
>  	fsparam_u32	("max_read",		OPT_MAX_READ),
>  	fsparam_u32	("blksize",		OPT_BLKSIZE),
>  	fsparam_string	("subtype",		OPT_SUBTYPE),
> +	fsparam_u32	("request_timeout",	OPT_REQUEST_TIMEOUT),
>  	{}
>  };
>  
> @@ -874,6 +876,10 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
>  		ctx->blksize = result.uint_32;
>  		break;
>  
> +	case OPT_REQUEST_TIMEOUT:
> +		ctx->req_timeout = result.uint_32;
> +		break;
> +
>  	default:
>  		return -EINVAL;
>  	}
> @@ -1004,6 +1010,8 @@ void fuse_conn_put(struct fuse_conn *fc)
>  
>  		if (IS_ENABLED(CONFIG_FUSE_DAX))
>  			fuse_dax_conn_free(fc);
> +		if (fc->timeout.req_timeout)
> +			cancel_delayed_work_sync(&fc->timeout.work);
>  		if (fiq->ops->release)
>  			fiq->ops->release(fiq);
>  		put_pid_ns(fc->pid_ns);
> @@ -1723,6 +1731,20 @@ int fuse_init_fs_context_submount(struct fs_context *fsc)
>  }
>  EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);
>  
> +static void fuse_init_fc_timeout(struct fuse_conn *fc, struct fuse_fs_context *ctx)
> +{
> +	if (ctx->req_timeout) {
> +		if (check_mul_overflow(ctx->req_timeout, HZ, &fc->timeout.req_timeout))
> +			fc->timeout.req_timeout = ULONG_MAX;
> +
> +		INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout);
> +		queue_delayed_work(system_wq, &fc->timeout.work,
> +				   secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ));
> +	} else {
> +		fc->timeout.req_timeout = 0;
> +	}
> +}
> +
>  int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
>  {
>  	struct fuse_dev *fud = NULL;
> @@ -1785,6 +1807,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
>  	fc->destroy = ctx->destroy;
>  	fc->no_control = ctx->no_control;
>  	fc->no_force_umount = ctx->no_force_umount;
> +	fuse_init_fc_timeout(fc, ctx);
>  
>  	err = -ENOMEM;
>  	root = fuse_get_root_inode(sb, ctx->rootmode);
Jeff Layton Dec. 19, 2024, 2:06 p.m. UTC | #2
On Thu, 2024-12-19 at 07:19 -0500, Jeff Layton wrote:
> On Wed, 2024-12-18 at 14:26 -0800, Joanne Koong wrote:
> > There are situations where fuse servers can become unresponsive or
> > stuck, for example if the server is deadlocked. Currently, there's no
> > good way to detect if a server is stuck and needs to be killed manually.
> > 
> > This commit adds an option for enforcing a timeout (in seconds) for
> > requests where if the timeout elapses without the server responding to
> > the request, the connection will be automatically aborted.
> > 
> > Please note that these timeouts are not 100% precise. For example, the
> > request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond
> > the requested timeout due to internal implementation, in order to
> > mitigate overhead.
> > 
> > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > ---
> >  fs/fuse/dev.c    | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  fs/fuse/fuse_i.h | 22 +++++++++++++
> >  fs/fuse/inode.c  | 23 +++++++++++++
> >  3 files changed, 130 insertions(+)
> > 
> > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> > index 27ccae63495d..bcf8a7994944 100644
> > --- a/fs/fuse/dev.c
> > +++ b/fs/fuse/dev.c
> > @@ -45,6 +45,87 @@ static struct fuse_dev *fuse_get_dev(struct file *file)
> >  	return READ_ONCE(file->private_data);
> >  }
> >  
> > +static bool request_expired(struct fuse_conn *fc, struct list_head *list)
> > +{
> > +	struct fuse_req *req;
> > +
> > +	req = list_first_entry_or_null(list, struct fuse_req, list);
> > +	if (!req)
> > +		return false;
> > +	return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout);
> > +}
> 
> Shouldn't that be time_is_after_jiffies() ? This is going to return
> true when you've not yet hit the timeout, no?
> 

My mistake, I misinterpreted the time_is_before_jiffies() macro. You
can add:

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Etienne Martineau Dec. 19, 2024, 8:44 p.m. UTC | #3
On Wed, Dec 18, 2024 at 5:27 PM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> There are situations where fuse servers can become unresponsive or
> stuck, for example if the server is deadlocked. Currently, there's no
> good way to detect if a server is stuck and needs to be killed manually.
>
> This commit adds an option for enforcing a timeout (in seconds) for
> requests where if the timeout elapses without the server responding to
> the request, the connection will be automatically aborted.
>
> Please note that these timeouts are not 100% precise. For example, the
> request may take roughly an extra FUSE_TIMEOUT_TIMER_FREQ seconds beyond
> the requested timeout due to internal implementation, in order to
> mitigate overhead.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>

This solution seems to be holding fine with my TC. However, personally
as a non-fuse person, I feel that it's hard to judge if the check
timeout logic is doing the right thing in every scenario and more
specifically for my use-case where I'm going to backport that logic to
older stable branches.

So for that reason I've been trying to figure out a simpler solution
and I stumbled on something last night which is giving good results so
far. Just a basic time distribution sliding window. Very
straightforward, no list, no timer scale issue and low overhead.
Sending that patch as a RFC to see if we can leverage that trick
maybe... OR something along those lines?

https://lore.kernel.org/linux-fsdevel/20241219204149.11958-2-etmartin4313@gmail.com/T/#mcfa362bf41860e151177a2aa49eee8a141324477

PS I don't want to put a monkey wrench into this series and again this
solution is holding fine for me. I'm just looking for something more
straightforward if possible.

thanks,
Etienne



>  fs/fuse/dev.c    | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/fuse/fuse_i.h | 22 +++++++++++++
>  fs/fuse/inode.c  | 23 +++++++++++++
>  3 files changed, 130 insertions(+)
>
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 27ccae63495d..bcf8a7994944 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -45,6 +45,87 @@ static struct fuse_dev *fuse_get_dev(struct file *file)
>         return READ_ONCE(file->private_data);
>  }
>
> +static bool request_expired(struct fuse_conn *fc, struct list_head *list)
> +{
> +       struct fuse_req *req;
> +
> +       req = list_first_entry_or_null(list, struct fuse_req, list);
> +       if (!req)
> +               return false;
> +       return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout);
> +}
> +
> +/*
> + * Check if any requests aren't being completed by the time the request timeout
> + * elapses. To do so, we:
> + * - check the fiq pending list
> + * - check the bg queue
> + * - check the fpq io and processing lists
> + *
> + * To make this fast, we only check against the head request on each list since
> + * these are generally queued in order of creation time (eg newer requests get
> + * queued to the tail). We might miss a few edge cases (eg requests transitioning
> + * between lists, re-sent requests at the head of the pending list having a
> + * later creation time than other requests on that list, etc.) but that is fine
> + * since if the request never gets fulfilled, it will eventually be caught.
> + */
> +void fuse_check_timeout(struct work_struct *work)
> +{
> +       struct delayed_work *dwork = to_delayed_work(work);
> +       struct fuse_conn *fc = container_of(dwork, struct fuse_conn,
> +                                           timeout.work);
> +       struct fuse_iqueue *fiq = &fc->iq;
> +       struct fuse_dev *fud;
> +       struct fuse_pqueue *fpq;
> +       bool expired = false;
> +       int i;
> +
> +       if (!atomic_read(&fc->num_waiting))
> +           goto out;
> +
> +       spin_lock(&fiq->lock);
> +       expired = request_expired(fc, &fiq->pending);
> +       spin_unlock(&fiq->lock);
> +       if (expired)
> +               goto abort_conn;
> +
> +       spin_lock(&fc->bg_lock);
> +       expired = request_expired(fc, &fc->bg_queue);
> +       spin_unlock(&fc->bg_lock);
> +       if (expired)
> +               goto abort_conn;
> +
> +       spin_lock(&fc->lock);
> +       if (!fc->connected) {
> +               spin_unlock(&fc->lock);
> +               return;
> +       }
> +       list_for_each_entry(fud, &fc->devices, entry) {
> +               fpq = &fud->pq;
> +               spin_lock(&fpq->lock);
> +               if (request_expired(fc, &fpq->io))
> +                       goto fpq_abort;
> +
> +               for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) {
> +                       if (request_expired(fc, &fpq->processing[i]))
> +                               goto fpq_abort;
> +               }
> +               spin_unlock(&fpq->lock);
> +       }
> +       spin_unlock(&fc->lock);
> +
> +out:
> +       queue_delayed_work(system_wq, &fc->timeout.work,
> +                          secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ));
> +       return;
> +
> +fpq_abort:
> +       spin_unlock(&fpq->lock);
> +       spin_unlock(&fc->lock);
> +abort_conn:
> +       fuse_abort_conn(fc);
> +}
> +
>  static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
>  {
>         INIT_LIST_HEAD(&req->list);
> @@ -53,6 +134,7 @@ static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
>         refcount_set(&req->count, 1);
>         __set_bit(FR_PENDING, &req->flags);
>         req->fm = fm;
> +       req->create_time = jiffies;
>  }
>
>  static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
> @@ -2260,6 +2342,9 @@ void fuse_abort_conn(struct fuse_conn *fc)
>                 LIST_HEAD(to_end);
>                 unsigned int i;
>
> +               if (fc->timeout.req_timeout)
> +                       cancel_delayed_work(&fc->timeout.work);
> +
>                 /* Background queuing checks fc->connected under bg_lock */
>                 spin_lock(&fc->bg_lock);
>                 fc->connected = 0;
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 74744c6f2860..26eb00e5f043 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -438,6 +438,9 @@ struct fuse_req {
>
>         /** fuse_mount this request belongs to */
>         struct fuse_mount *fm;
> +
> +       /** When (in jiffies) the request was created */
> +       unsigned long create_time;
>  };
>
>  struct fuse_iqueue;
> @@ -528,6 +531,17 @@ struct fuse_pqueue {
>         struct list_head io;
>  };
>
> +/* Frequency (in seconds) of request timeout checks, if opted into */
> +#define FUSE_TIMEOUT_TIMER_FREQ 15
> +
> +struct fuse_timeout {
> +       /* Worker for checking if any requests have timed out */
> +       struct delayed_work work;
> +
> +       /* Request timeout (in jiffies). 0 = no timeout */
> +       unsigned long req_timeout;
> +};
> +
>  /**
>   * Fuse device instance
>   */
> @@ -574,6 +588,8 @@ struct fuse_fs_context {
>         enum fuse_dax_mode dax_mode;
>         unsigned int max_read;
>         unsigned int blksize;
> +       /*  Request timeout (in seconds). 0 = no timeout (infinite wait) */
> +       unsigned int req_timeout;
>         const char *subtype;
>
>         /* DAX device, may be NULL */
> @@ -923,6 +939,9 @@ struct fuse_conn {
>         /** IDR for backing files ids */
>         struct idr backing_files_map;
>  #endif
> +
> +       /** Only used if the connection enforces request timeouts */
> +       struct fuse_timeout timeout;
>  };
>
>  /*
> @@ -1191,6 +1210,9 @@ void fuse_request_end(struct fuse_req *req);
>  void fuse_abort_conn(struct fuse_conn *fc);
>  void fuse_wait_aborted(struct fuse_conn *fc);
>
> +/* Check if any requests timed out */
> +void fuse_check_timeout(struct work_struct *work);
> +
>  /**
>   * Invalidate inode attributes
>   */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 3ce4f4e81d09..02dac88d922e 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -765,6 +765,7 @@ enum {
>         OPT_ALLOW_OTHER,
>         OPT_MAX_READ,
>         OPT_BLKSIZE,
> +       OPT_REQUEST_TIMEOUT,
>         OPT_ERR
>  };
>
> @@ -779,6 +780,7 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = {
>         fsparam_u32     ("max_read",            OPT_MAX_READ),
>         fsparam_u32     ("blksize",             OPT_BLKSIZE),
>         fsparam_string  ("subtype",             OPT_SUBTYPE),
> +       fsparam_u32     ("request_timeout",     OPT_REQUEST_TIMEOUT),
>         {}
>  };
>
> @@ -874,6 +876,10 @@ static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
>                 ctx->blksize = result.uint_32;
>                 break;
>
> +       case OPT_REQUEST_TIMEOUT:
> +               ctx->req_timeout = result.uint_32;
> +               break;
> +
>         default:
>                 return -EINVAL;
>         }
> @@ -1004,6 +1010,8 @@ void fuse_conn_put(struct fuse_conn *fc)
>
>                 if (IS_ENABLED(CONFIG_FUSE_DAX))
>                         fuse_dax_conn_free(fc);
> +               if (fc->timeout.req_timeout)
> +                       cancel_delayed_work_sync(&fc->timeout.work);
>                 if (fiq->ops->release)
>                         fiq->ops->release(fiq);
>                 put_pid_ns(fc->pid_ns);
> @@ -1723,6 +1731,20 @@ int fuse_init_fs_context_submount(struct fs_context *fsc)
>  }
>  EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);
>
> +static void fuse_init_fc_timeout(struct fuse_conn *fc, struct fuse_fs_context *ctx)
> +{
> +       if (ctx->req_timeout) {
> +               if (check_mul_overflow(ctx->req_timeout, HZ, &fc->timeout.req_timeout))
> +                       fc->timeout.req_timeout = ULONG_MAX;
> +
> +               INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout);
> +               queue_delayed_work(system_wq, &fc->timeout.work,
> +                                  secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ));
> +       } else {
> +               fc->timeout.req_timeout = 0;
> +       }
> +}
> +
>  int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
>  {
>         struct fuse_dev *fud = NULL;
> @@ -1785,6 +1807,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
>         fc->destroy = ctx->destroy;
>         fc->no_control = ctx->no_control;
>         fc->no_force_umount = ctx->no_force_umount;
> +       fuse_init_fc_timeout(fc, ctx);
>
>         err = -ENOMEM;
>         root = fuse_get_root_inode(sb, ctx->rootmode);
> --
> 2.43.5
>
diff mbox series

Patch

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 27ccae63495d..bcf8a7994944 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -45,6 +45,87 @@  static struct fuse_dev *fuse_get_dev(struct file *file)
 	return READ_ONCE(file->private_data);
 }
 
+static bool request_expired(struct fuse_conn *fc, struct list_head *list)
+{
+	struct fuse_req *req;
+
+	req = list_first_entry_or_null(list, struct fuse_req, list);
+	if (!req)
+		return false;
+	return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout);
+}
+
+/*
+ * Check if any requests aren't being completed by the time the request timeout
+ * elapses. To do so, we:
+ * - check the fiq pending list
+ * - check the bg queue
+ * - check the fpq io and processing lists
+ *
+ * To make this fast, we only check against the head request on each list since
+ * these are generally queued in order of creation time (eg newer requests get
+ * queued to the tail). We might miss a few edge cases (eg requests transitioning
+ * between lists, re-sent requests at the head of the pending list having a
+ * later creation time than other requests on that list, etc.) but that is fine
+ * since if the request never gets fulfilled, it will eventually be caught.
+ */
+void fuse_check_timeout(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct fuse_conn *fc = container_of(dwork, struct fuse_conn,
+					    timeout.work);
+	struct fuse_iqueue *fiq = &fc->iq;
+	struct fuse_dev *fud;
+	struct fuse_pqueue *fpq;
+	bool expired = false;
+	int i;
+
+	if (!atomic_read(&fc->num_waiting))
+	    goto out;
+
+	spin_lock(&fiq->lock);
+	expired = request_expired(fc, &fiq->pending);
+	spin_unlock(&fiq->lock);
+	if (expired)
+		goto abort_conn;
+
+	spin_lock(&fc->bg_lock);
+	expired = request_expired(fc, &fc->bg_queue);
+	spin_unlock(&fc->bg_lock);
+	if (expired)
+		goto abort_conn;
+
+	spin_lock(&fc->lock);
+	if (!fc->connected) {
+		spin_unlock(&fc->lock);
+		return;
+	}
+	list_for_each_entry(fud, &fc->devices, entry) {
+		fpq = &fud->pq;
+		spin_lock(&fpq->lock);
+		if (request_expired(fc, &fpq->io))
+			goto fpq_abort;
+
+		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) {
+			if (request_expired(fc, &fpq->processing[i]))
+				goto fpq_abort;
+		}
+		spin_unlock(&fpq->lock);
+	}
+	spin_unlock(&fc->lock);
+
+out:
+	queue_delayed_work(system_wq, &fc->timeout.work,
+			   secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ));
+	return;
+
+fpq_abort:
+	spin_unlock(&fpq->lock);
+	spin_unlock(&fc->lock);
+abort_conn:
+	fuse_abort_conn(fc);
+}
+
 static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
 {
 	INIT_LIST_HEAD(&req->list);
@@ -53,6 +134,7 @@  static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
 	refcount_set(&req->count, 1);
 	__set_bit(FR_PENDING, &req->flags);
 	req->fm = fm;
+	req->create_time = jiffies;
 }
 
 static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
@@ -2260,6 +2342,9 @@  void fuse_abort_conn(struct fuse_conn *fc)
 		LIST_HEAD(to_end);
 		unsigned int i;
 
+		if (fc->timeout.req_timeout)
+			cancel_delayed_work(&fc->timeout.work);
+
 		/* Background queuing checks fc->connected under bg_lock */
 		spin_lock(&fc->bg_lock);
 		fc->connected = 0;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 74744c6f2860..26eb00e5f043 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -438,6 +438,9 @@  struct fuse_req {
 
 	/** fuse_mount this request belongs to */
 	struct fuse_mount *fm;
+
+	/** When (in jiffies) the request was created */
+	unsigned long create_time;
 };
 
 struct fuse_iqueue;
@@ -528,6 +531,17 @@  struct fuse_pqueue {
 	struct list_head io;
 };
 
+/* Frequency (in seconds) of request timeout checks, if opted into */
+#define FUSE_TIMEOUT_TIMER_FREQ 15
+
+struct fuse_timeout {
+	/* Worker for checking if any requests have timed out */
+	struct delayed_work work;
+
+	/* Request timeout (in jiffies). 0 = no timeout */
+	unsigned long req_timeout;
+};
+
 /**
  * Fuse device instance
  */
@@ -574,6 +588,8 @@  struct fuse_fs_context {
 	enum fuse_dax_mode dax_mode;
 	unsigned int max_read;
 	unsigned int blksize;
+	/*  Request timeout (in seconds). 0 = no timeout (infinite wait) */
+	unsigned int req_timeout;
 	const char *subtype;
 
 	/* DAX device, may be NULL */
@@ -923,6 +939,9 @@  struct fuse_conn {
 	/** IDR for backing files ids */
 	struct idr backing_files_map;
 #endif
+
+	/** Only used if the connection enforces request timeouts */
+	struct fuse_timeout timeout;
 };
 
 /*
@@ -1191,6 +1210,9 @@  void fuse_request_end(struct fuse_req *req);
 void fuse_abort_conn(struct fuse_conn *fc);
 void fuse_wait_aborted(struct fuse_conn *fc);
 
+/* Check if any requests timed out */
+void fuse_check_timeout(struct work_struct *work);
+
 /**
  * Invalidate inode attributes
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3ce4f4e81d09..02dac88d922e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -765,6 +765,7 @@  enum {
 	OPT_ALLOW_OTHER,
 	OPT_MAX_READ,
 	OPT_BLKSIZE,
+	OPT_REQUEST_TIMEOUT,
 	OPT_ERR
 };
 
@@ -779,6 +780,7 @@  static const struct fs_parameter_spec fuse_fs_parameters[] = {
 	fsparam_u32	("max_read",		OPT_MAX_READ),
 	fsparam_u32	("blksize",		OPT_BLKSIZE),
 	fsparam_string	("subtype",		OPT_SUBTYPE),
+	fsparam_u32	("request_timeout",	OPT_REQUEST_TIMEOUT),
 	{}
 };
 
@@ -874,6 +876,10 @@  static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
 		ctx->blksize = result.uint_32;
 		break;
 
+	case OPT_REQUEST_TIMEOUT:
+		ctx->req_timeout = result.uint_32;
+		break;
+
 	default:
 		return -EINVAL;
 	}
@@ -1004,6 +1010,8 @@  void fuse_conn_put(struct fuse_conn *fc)
 
 		if (IS_ENABLED(CONFIG_FUSE_DAX))
 			fuse_dax_conn_free(fc);
+		if (fc->timeout.req_timeout)
+			cancel_delayed_work_sync(&fc->timeout.work);
 		if (fiq->ops->release)
 			fiq->ops->release(fiq);
 		put_pid_ns(fc->pid_ns);
@@ -1723,6 +1731,20 @@  int fuse_init_fs_context_submount(struct fs_context *fsc)
 }
 EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);
 
+static void fuse_init_fc_timeout(struct fuse_conn *fc, struct fuse_fs_context *ctx)
+{
+	if (ctx->req_timeout) {
+		if (check_mul_overflow(ctx->req_timeout, HZ, &fc->timeout.req_timeout))
+			fc->timeout.req_timeout = ULONG_MAX;
+
+		INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout);
+		queue_delayed_work(system_wq, &fc->timeout.work,
+				   secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ));
+	} else {
+		fc->timeout.req_timeout = 0;
+	}
+}
+
 int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
 {
 	struct fuse_dev *fud = NULL;
@@ -1785,6 +1807,7 @@  int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
 	fc->destroy = ctx->destroy;
 	fc->no_control = ctx->no_control;
 	fc->no_force_umount = ctx->no_force_umount;
+	fuse_init_fc_timeout(fc, ctx);
 
 	err = -ENOMEM;
 	root = fuse_get_root_inode(sb, ctx->rootmode);