diff mbox series

[4/4] virtiofs: Support blocking posix locks (fcntl(F_SETLKW))

Message ID 20191115205705.2046-5-vgoyal@redhat.com (mailing list archive)
State New, archived
Headers show
Series virtiofs: Add a notification queue | expand

Commit Message

Vivek Goyal Nov. 15, 2019, 8:57 p.m. UTC
As of now we don't support blocking variant of posix locks and daemon returns
-EOPNOTSUPP. Reason being that it can lead to deadlocks. Virtqueue size is
limited and it is possible we fill virtqueue with all the requests of
fcntl(F_SETLKW) and wait for reply. And later a subsequent unlock request
can't make progress because virtqueue is full. And that means F_SETLKW can't
make progress and we are deadlocked.

Use notification queue to solve this problem. After submitting lock request
device will send a reply asking requester to wait. Once lock is available,
requester will get a notification saying locking is available. That way
we don't keep the request virtueue busy while we are waiting for lock
and further unlock requests can make progress.

When we get a reply in response to lock request, we need a way to know if
we need to wait for notification or not. I have overloaded the
fuse_out_header->error field. If value is ->error is 1, that's a signal
to caller to wait for lock notification.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 fs/fuse/virtio_fs.c       | 78 ++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/fuse.h |  7 ++++
 2 files changed, 84 insertions(+), 1 deletion(-)

Comments

Stefan Hajnoczi Nov. 21, 2019, 5 p.m. UTC | #1
On Fri, Nov 15, 2019 at 03:57:05PM -0500, Vivek Goyal wrote:
> As of now we don't support blocking variant of posix locks and daemon returns
> -EOPNOTSUPP. Reason being that it can lead to deadlocks. Virtqueue size is
> limited and it is possible we fill virtqueue with all the requests of
> fcntl(F_SETLKW) and wait for reply. And later a subsequent unlock request
> can't make progress because virtqueue is full. And that means F_SETLKW can't
> make progress and we are deadlocked.
> 
> Use notification queue to solve this problem. After submitting lock request
> device will send a reply asking requester to wait. Once lock is available,
> requester will get a notification saying locking is available. That way
> we don't keep the request virtueue busy while we are waiting for lock
> and further unlock requests can make progress.
> 
> When we get a reply in response to lock request, we need a way to know if
> we need to wait for notification or not. I have overloaded the
> fuse_out_header->error field. If value is ->error is 1, that's a signal
> to caller to wait for lock notification.
> 
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> ---
>  fs/fuse/virtio_fs.c       | 78 ++++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/fuse.h |  7 ++++
>  2 files changed, 84 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 21d8d9d7d317..8aa9fc996556 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -35,6 +35,7 @@ struct virtio_fs_vq {
>  	struct work_struct done_work;
>  	struct list_head queued_reqs;
>  	struct list_head end_reqs;	/* End these requests */
> +	struct list_head wait_reqs;	/* requests waiting for notification */
>  	struct virtio_fs_notify_node *notify_nodes;
>  	struct list_head notify_reqs;	/* List for queuing notify requests */
>  	struct delayed_work dispatch_work;
> @@ -85,7 +86,6 @@ struct virtio_fs_notify_node {
>  
>  static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq);
>  
> -
>  static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
>  {
>  	struct virtio_fs *fs = vq->vdev->priv;
> @@ -513,13 +513,75 @@ static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq)
>  	return 0;
>  }
>  
> +static int notify_complete_waiting_req(struct virtio_fs *vfs,
> +				       struct fuse_notify_lock_out *out_args)
> +{
> +	struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_REQUEST];
> +	struct fuse_req *req, *next;
> +	bool found = false;
> +	struct fuse_conn *fc = fsvq->fud->fc;
> +
> +	/* Find waiting request with the unique number and end it */
> +	spin_lock(&fsvq->lock);
> +		list_for_each_entry_safe(req, next, &fsvq->wait_reqs, list) {
> +			if (req->in.h.unique == out_args->id) {
> +				list_del_init(&req->list);
> +				clear_bit(FR_SENT, &req->flags);
> +				/* Transfer error code from notify */
> +				req->out.h.error = out_args->error;
> +				found = true;
> +				break;
> +			}
> +		}
> +	spin_unlock(&fsvq->lock);
> +
> +	/*
> +	 * TODO: It is possible that some re-ordering happens in notify
> +	 * comes before request is complete. Deal with it.
> +	 */
> +	if (found) {
> +		fuse_request_end(fc, req);
> +		spin_lock(&fsvq->lock);
> +		dec_in_flight_req(fsvq);
> +		spin_unlock(&fsvq->lock);
> +	} else
> +		pr_debug("virtio-fs: Did not find waiting request with"
> +		       " unique=0x%llx\n", out_args->id);
> +
> +	return 0;
> +}
> +
> +static int virtio_fs_handle_notify(struct virtio_fs *vfs,
> +				   struct virtio_fs_notify *notify)
> +{
> +	int ret = 0;
> +	struct fuse_out_header *oh = &notify->out_hdr;
> +	struct fuse_notify_lock_out *lo;
> +
> +	/*
> +	 * For notifications, oh.unique is 0 and oh->error contains code
> +	 * for which notification as arrived.
> +	 */
> +	switch(oh->error) {
> +	case FUSE_NOTIFY_LOCK:
> +		lo = (struct fuse_notify_lock_out *) &notify->outarg;
> +		notify_complete_waiting_req(vfs, lo);
> +		break;
> +	default:
> +		printk("virtio-fs: Unexpected notification %d\n", oh->error);
> +	}
> +	return ret;
> +}

Is this specific to virtio or can be it handled in common code?

> +
>  static void virtio_fs_notify_done_work(struct work_struct *work)
>  {
>  	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
>  						 done_work);
>  	struct virtqueue *vq = fsvq->vq;
> +	struct virtio_fs *vfs = vq->vdev->priv;
>  	LIST_HEAD(reqs);
>  	struct virtio_fs_notify_node *notify, *next;
> +	struct fuse_out_header *oh;
>  
>  	spin_lock(&fsvq->lock);
>  	do {
> @@ -535,6 +597,10 @@ static void virtio_fs_notify_done_work(struct work_struct *work)
>  
>  	/* Process notify */
>  	list_for_each_entry_safe(notify, next, &reqs, list) {
> +		oh = &notify->notify.out_hdr;
> +		WARN_ON(oh->unique);
> +		/* Handle notification */
> +		virtio_fs_handle_notify(vfs, &notify->notify);
>  		spin_lock(&fsvq->lock);
>  		dec_in_flight_req(fsvq);
>  		list_del_init(&notify->list);
> @@ -656,6 +722,15 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
>  		 * TODO verify that server properly follows FUSE protocol
>  		 * (oh.uniq, oh.len)
>  		 */
> +		if (req->out.h.error == 1) {
> +			/* Wait for notification to complete request */
> +			list_del_init(&req->list);
> +			spin_lock(&fsvq->lock);
> +			list_add_tail(&req->list, &fsvq->wait_reqs);
> +			spin_unlock(&fsvq->lock);
> +			continue;
> +		}
> +
>  		args = req->args;
>  		copy_args_from_argbuf(args, req);
>  
> @@ -705,6 +780,7 @@ static int virtio_fs_init_vq(struct virtio_fs *fs, struct virtio_fs_vq *fsvq,
>  	strncpy(fsvq->name, name, VQ_NAME_LEN);
>  	spin_lock_init(&fsvq->lock);
>  	INIT_LIST_HEAD(&fsvq->queued_reqs);
> +	INIT_LIST_HEAD(&fsvq->wait_reqs);
>  	INIT_LIST_HEAD(&fsvq->end_reqs);
>  	INIT_LIST_HEAD(&fsvq->notify_reqs);
>  	init_completion(&fsvq->in_flight_zero);
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 373cada89815..45f0c4efec8e 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -481,6 +481,7 @@ enum fuse_notify_code {
>  	FUSE_NOTIFY_STORE = 4,
>  	FUSE_NOTIFY_RETRIEVE = 5,
>  	FUSE_NOTIFY_DELETE = 6,
> +	FUSE_NOTIFY_LOCK = 7,
>  	FUSE_NOTIFY_CODE_MAX,
>  };
>  
> @@ -868,6 +869,12 @@ struct fuse_notify_retrieve_in {
>  	uint64_t	dummy4;
>  };
>  
> +struct fuse_notify_lock_out {
> +	uint64_t	id;

Please call this field "unique" or "lock_unique" so it's clear this
identifier is the fuse_header_in->unique value of the lock request.

> +	int32_t		error;
> +	int32_t		padding;
> +};
> +
>  /* Device ioctls: */
>  #define FUSE_DEV_IOC_CLONE	_IOR(229, 0, uint32_t)
>  
> -- 
> 2.20.1
>
Vivek Goyal Nov. 22, 2019, 1 p.m. UTC | #2
On Thu, Nov 21, 2019 at 05:00:20PM +0000, Stefan Hajnoczi wrote:

[..]
> > +static int virtio_fs_handle_notify(struct virtio_fs *vfs,
> > +				   struct virtio_fs_notify *notify)
> > +{
> > +	int ret = 0;
> > +	struct fuse_out_header *oh = &notify->out_hdr;
> > +	struct fuse_notify_lock_out *lo;
> > +
> > +	/*
> > +	 * For notifications, oh.unique is 0 and oh->error contains code
> > +	 * for which notification as arrived.
> > +	 */
> > +	switch(oh->error) {
> > +	case FUSE_NOTIFY_LOCK:
> > +		lo = (struct fuse_notify_lock_out *) &notify->outarg;
> > +		notify_complete_waiting_req(vfs, lo);
> > +		break;
> > +	default:
> > +		printk("virtio-fs: Unexpected notification %d\n", oh->error);
> > +	}
> > +	return ret;
> > +}
> 
> Is this specific to virtio or can be it handled in common code?

This is not specific to virtio_fs. In principle, regular fuse daemon could
implement something similar. Though they might not have to because client
can just block without introducing deadlock possibilities.

Anyway, I will look into moving this code into fuse common.

[..]
> > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> > index 373cada89815..45f0c4efec8e 100644
> > --- a/include/uapi/linux/fuse.h
> > +++ b/include/uapi/linux/fuse.h
> > @@ -481,6 +481,7 @@ enum fuse_notify_code {
> >  	FUSE_NOTIFY_STORE = 4,
> >  	FUSE_NOTIFY_RETRIEVE = 5,
> >  	FUSE_NOTIFY_DELETE = 6,
> > +	FUSE_NOTIFY_LOCK = 7,
> >  	FUSE_NOTIFY_CODE_MAX,
> >  };
> >  
> > @@ -868,6 +869,12 @@ struct fuse_notify_retrieve_in {
> >  	uint64_t	dummy4;
> >  };
> >  
> > +struct fuse_notify_lock_out {
> > +	uint64_t	id;
> 
> Please call this field "unique" or "lock_unique" so it's clear this
> identifier is the fuse_header_in->unique value of the lock request.

Ok, will do.

Vivek
diff mbox series

Patch

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 21d8d9d7d317..8aa9fc996556 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -35,6 +35,7 @@  struct virtio_fs_vq {
 	struct work_struct done_work;
 	struct list_head queued_reqs;
 	struct list_head end_reqs;	/* End these requests */
+	struct list_head wait_reqs;	/* requests waiting for notification */
 	struct virtio_fs_notify_node *notify_nodes;
 	struct list_head notify_reqs;	/* List for queuing notify requests */
 	struct delayed_work dispatch_work;
@@ -85,7 +86,6 @@  struct virtio_fs_notify_node {
 
 static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq);
 
-
 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
 {
 	struct virtio_fs *fs = vq->vdev->priv;
@@ -513,13 +513,75 @@  static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq)
 	return 0;
 }
 
+static int notify_complete_waiting_req(struct virtio_fs *vfs,
+				       struct fuse_notify_lock_out *out_args)
+{
+	struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_REQUEST];
+	struct fuse_req *req, *next;
+	bool found = false;
+	struct fuse_conn *fc = fsvq->fud->fc;
+
+	/* Find waiting request with the unique number and end it */
+	spin_lock(&fsvq->lock);
+		list_for_each_entry_safe(req, next, &fsvq->wait_reqs, list) {
+			if (req->in.h.unique == out_args->id) {
+				list_del_init(&req->list);
+				clear_bit(FR_SENT, &req->flags);
+				/* Transfer error code from notify */
+				req->out.h.error = out_args->error;
+				found = true;
+				break;
+			}
+		}
+	spin_unlock(&fsvq->lock);
+
+	/*
+	 * TODO: It is possible that some re-ordering happens in notify
+	 * comes before request is complete. Deal with it.
+	 */
+	if (found) {
+		fuse_request_end(fc, req);
+		spin_lock(&fsvq->lock);
+		dec_in_flight_req(fsvq);
+		spin_unlock(&fsvq->lock);
+	} else
+		pr_debug("virtio-fs: Did not find waiting request with"
+		       " unique=0x%llx\n", out_args->id);
+
+	return 0;
+}
+
+static int virtio_fs_handle_notify(struct virtio_fs *vfs,
+				   struct virtio_fs_notify *notify)
+{
+	int ret = 0;
+	struct fuse_out_header *oh = &notify->out_hdr;
+	struct fuse_notify_lock_out *lo;
+
+	/*
+	 * For notifications, oh.unique is 0 and oh->error contains code
+	 * for which notification as arrived.
+	 */
+	switch(oh->error) {
+	case FUSE_NOTIFY_LOCK:
+		lo = (struct fuse_notify_lock_out *) &notify->outarg;
+		notify_complete_waiting_req(vfs, lo);
+		break;
+	default:
+		printk("virtio-fs: Unexpected notification %d\n", oh->error);
+	}
+	return ret;
+}
+
 static void virtio_fs_notify_done_work(struct work_struct *work)
 {
 	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
 						 done_work);
 	struct virtqueue *vq = fsvq->vq;
+	struct virtio_fs *vfs = vq->vdev->priv;
 	LIST_HEAD(reqs);
 	struct virtio_fs_notify_node *notify, *next;
+	struct fuse_out_header *oh;
 
 	spin_lock(&fsvq->lock);
 	do {
@@ -535,6 +597,10 @@  static void virtio_fs_notify_done_work(struct work_struct *work)
 
 	/* Process notify */
 	list_for_each_entry_safe(notify, next, &reqs, list) {
+		oh = &notify->notify.out_hdr;
+		WARN_ON(oh->unique);
+		/* Handle notification */
+		virtio_fs_handle_notify(vfs, &notify->notify);
 		spin_lock(&fsvq->lock);
 		dec_in_flight_req(fsvq);
 		list_del_init(&notify->list);
@@ -656,6 +722,15 @@  static void virtio_fs_requests_done_work(struct work_struct *work)
 		 * TODO verify that server properly follows FUSE protocol
 		 * (oh.uniq, oh.len)
 		 */
+		if (req->out.h.error == 1) {
+			/* Wait for notification to complete request */
+			list_del_init(&req->list);
+			spin_lock(&fsvq->lock);
+			list_add_tail(&req->list, &fsvq->wait_reqs);
+			spin_unlock(&fsvq->lock);
+			continue;
+		}
+
 		args = req->args;
 		copy_args_from_argbuf(args, req);
 
@@ -705,6 +780,7 @@  static int virtio_fs_init_vq(struct virtio_fs *fs, struct virtio_fs_vq *fsvq,
 	strncpy(fsvq->name, name, VQ_NAME_LEN);
 	spin_lock_init(&fsvq->lock);
 	INIT_LIST_HEAD(&fsvq->queued_reqs);
+	INIT_LIST_HEAD(&fsvq->wait_reqs);
 	INIT_LIST_HEAD(&fsvq->end_reqs);
 	INIT_LIST_HEAD(&fsvq->notify_reqs);
 	init_completion(&fsvq->in_flight_zero);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 373cada89815..45f0c4efec8e 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -481,6 +481,7 @@  enum fuse_notify_code {
 	FUSE_NOTIFY_STORE = 4,
 	FUSE_NOTIFY_RETRIEVE = 5,
 	FUSE_NOTIFY_DELETE = 6,
+	FUSE_NOTIFY_LOCK = 7,
 	FUSE_NOTIFY_CODE_MAX,
 };
 
@@ -868,6 +869,12 @@  struct fuse_notify_retrieve_in {
 	uint64_t	dummy4;
 };
 
+struct fuse_notify_lock_out {
+	uint64_t	id;
+	int32_t		error;
+	int32_t		padding;
+};
+
 /* Device ioctls: */
 #define FUSE_DEV_IOC_CLONE	_IOR(229, 0, uint32_t)