diff mbox

[v4,12/13] xen/pvcalls: implement release command

Message ID 1505516440-11111-12-git-send-email-sstabellini@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Stefano Stabellini Sept. 15, 2017, 11 p.m. UTC
Send PVCALLS_RELEASE to the backend and wait for a reply. Take both
in_mutex and out_mutex to avoid concurrent accesses. Then, free the
socket.

For passive sockets, check whether we have already pre-allocated an
active socket for the purpose of being accepted. If so, free that as
well.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 104 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |   1 +
 2 files changed, 105 insertions(+)

Comments

Boris Ostrovsky Sept. 22, 2017, 10:43 p.m. UTC | #1
>  
> +static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
> +				   struct sock_mapping *map)

I just noticed: pvcalls_front_free_map() is referenced by patches 2 and 8.

> +{
> +	int i;
> +
> +	unbind_from_irqhandler(map->active.irq, map);
> +
> +	spin_lock(&bedata->socket_lock);
> +	if (!list_empty(&map->list))
> +		list_del_init(&map->list);
> +	spin_unlock(&bedata->socket_lock);
> +
> +	for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++)
> +		gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0);
> +	gnttab_end_foreign_access(map->active.ref, 0, 0);
> +	free_page((unsigned long)map->active.ring);
> +}
> +
>  int pvcalls_front_socket(struct socket *sock)
>  {
>  	struct pvcalls_bedata *bedata;
> @@ -960,6 +978,92 @@ unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
>  	return ret;
>  }
>  
> +int pvcalls_front_release(struct socket *sock)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +	int req_id, notify, ret;
> +	struct xen_pvcalls_request *req;
> +
> +	pvcalls_enter;
> +	if (!pvcalls_front_dev) {
> +		pvcalls_exit;
> +		return -EIO;
> +	}
> +	if (sock->sk == NULL) {
> +		pvcalls_exit;
> +		return 0;
> +	}
> +
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) sock->sk->sk_send_head;
> +	if (map == NULL) {
> +		pvcalls_exit;
> +		return 0;
> +	}
> +
> +	spin_lock(&bedata->socket_lock);
> +	ret = get_request(bedata, &req_id);
> +	if (ret < 0) {
> +		spin_unlock(&bedata->socket_lock);
> +		pvcalls_exit;
> +		return ret;
> +	}
> +	sock->sk->sk_send_head = NULL;
> +
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_RELEASE;
> +	req->u.release.id = (uint64_t)map;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->socket_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	if (map->active_socket) {
> +		/*
> +		 * Set in_error and wake up inflight_conn_req to force
> +		 * recvmsg waiters to exit.
> +		 */
> +		map->active.ring->in_error = -EBADF;
> +		wake_up_interruptible(&map->active.inflight_conn_req);
> +
> +		/*
> +		 * Wait until there are no more waiters on the mutexes.
> +		 * We know that no new waiters can be added because sk_send_head
> +		 * is set to NULL -- we only need to wait for the existing
> +		 * waiters to return.
> +		 */
> +		while (!mutex_trylock(&map->active.in_mutex) ||
> +			   !mutex_trylock(&map->active.out_mutex))
> +			cpu_relax();


What if you manage to grab the locks before waiters get to run? for
example, in recvmsg:

	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
		wait_event_interruptible(map->active.inflight_conn_req,
					 pvcalls_front_read_todo(map));
	}
	ret = __read_ring(map->active.ring, &map->active.data,
			  &msg->msg_iter, len, flags);

map will be freed (by pvcalls_front_free_map() below) before __read_ring
is passed the just-freed ring.


> +
> +		pvcalls_front_free_map(bedata, map);
> +		kfree(map);


-boris
Boris Ostrovsky Sept. 22, 2017, 10:48 p.m. UTC | #2
>> +		 */
>> +		map->active.ring->in_error = -EBADF;
>> +		wake_up_interruptible(&map->active.inflight_conn_req);
>> +
>> +		/*
>> +		 * Wait until there are no more waiters on the mutexes.
>> +		 * We know that no new waiters can be added because sk_send_head
>> +		 * is set to NULL -- we only need to wait for the existing
>> +		 * waiters to return.
>> +		 */
>> +		while (!mutex_trylock(&map->active.in_mutex) ||
>> +			   !mutex_trylock(&map->active.out_mutex))
>> +			cpu_relax();
>
> What if you manage to grab the locks before waiters get to run? for
> example, in recvmsg:
>
> 	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
> 		wait_event_interruptible(map->active.inflight_conn_req,
> 					 pvcalls_front_read_todo(map));
> 	}
> 	ret = __read_ring(map->active.ring, &map->active.data,
> 			  &msg->msg_iter, len, flags);
>
> map will be freed (by pvcalls_front_free_map() below) before __read_ring
> is passed the just-freed ring.

Actually, since you don't drop the locks I am not sure recvmsg side will
even get there.

-boris

>
>
>> +
>> +		pvcalls_front_free_map(bedata, map);
>> +		kfree(map);
>
> -boris
>
>
Stefano Stabellini Oct. 6, 2017, 11:49 p.m. UTC | #3
On Fri, 22 Sep 2017, Boris Ostrovsky wrote:
> > +static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
> > +				   struct sock_mapping *map)
> 
> I just noticed: pvcalls_front_free_map() is referenced by patches 2 and 8.

I'll create an empty stub earlier.


> > +{
> > +	int i;
> > +
> > +	unbind_from_irqhandler(map->active.irq, map);
> > +
> > +	spin_lock(&bedata->socket_lock);
> > +	if (!list_empty(&map->list))
> > +		list_del_init(&map->list);
> > +	spin_unlock(&bedata->socket_lock);
> > +
> > +	for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++)
> > +		gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0);
> > +	gnttab_end_foreign_access(map->active.ref, 0, 0);
> > +	free_page((unsigned long)map->active.ring);
> > +}
> > +
> >  int pvcalls_front_socket(struct socket *sock)
> >  {
> >  	struct pvcalls_bedata *bedata;
> > @@ -960,6 +978,92 @@ unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
> >  	return ret;
> >  }
> >  
> > +int pvcalls_front_release(struct socket *sock)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +	int req_id, notify, ret;
> > +	struct xen_pvcalls_request *req;
> > +
> > +	pvcalls_enter;
> > +	if (!pvcalls_front_dev) {
> > +		pvcalls_exit;
> > +		return -EIO;
> > +	}
> > +	if (sock->sk == NULL) {
> > +		pvcalls_exit;
> > +		return 0;
> > +	}
> > +
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) sock->sk->sk_send_head;
> > +	if (map == NULL) {
> > +		pvcalls_exit;
> > +		return 0;
> > +	}
> > +
> > +	spin_lock(&bedata->socket_lock);
> > +	ret = get_request(bedata, &req_id);
> > +	if (ret < 0) {
> > +		spin_unlock(&bedata->socket_lock);
> > +		pvcalls_exit;
> > +		return ret;
> > +	}
> > +	sock->sk->sk_send_head = NULL;
> > +
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_RELEASE;
> > +	req->u.release.id = (uint64_t)map;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->socket_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	if (map->active_socket) {
> > +		/*
> > +		 * Set in_error and wake up inflight_conn_req to force
> > +		 * recvmsg waiters to exit.
> > +		 */
> > +		map->active.ring->in_error = -EBADF;
> > +		wake_up_interruptible(&map->active.inflight_conn_req);
> > +
> > +		/*
> > +		 * Wait until there are no more waiters on the mutexes.
> > +		 * We know that no new waiters can be added because sk_send_head
> > +		 * is set to NULL -- we only need to wait for the existing
> > +		 * waiters to return.
> > +		 */
> > +		while (!mutex_trylock(&map->active.in_mutex) ||
> > +			   !mutex_trylock(&map->active.out_mutex))
> > +			cpu_relax();
> 
> 
> What if you manage to grab the locks before waiters get to run? for
> example, in recvmsg:
> 
> 	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
> 		wait_event_interruptible(map->active.inflight_conn_req,
> 					 pvcalls_front_read_todo(map));
> 	}
> 	ret = __read_ring(map->active.ring, &map->active.data,
> 			  &msg->msg_iter, len, flags);
> 
> map will be freed (by pvcalls_front_free_map() below) before __read_ring
> is passed the just-freed ring.

I don't think that is possible: mutex_trylock should return 0 on
contention. I don't think is possible for mutex_trylock to return 1 when
there are waiters waiting for their chance to run.



 
> > +
> > +		pvcalls_front_free_map(bedata, map);
> > +		kfree(map);
> 
> 
> -boris
> 
>
diff mbox

Patch

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 8a90213..5f8e94a 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -209,6 +209,24 @@  static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
 	return IRQ_HANDLED;
 }
 
+static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
+				   struct sock_mapping *map)
+{
+	int i;
+
+	unbind_from_irqhandler(map->active.irq, map);
+
+	spin_lock(&bedata->socket_lock);
+	if (!list_empty(&map->list))
+		list_del_init(&map->list);
+	spin_unlock(&bedata->socket_lock);
+
+	for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++)
+		gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0);
+	gnttab_end_foreign_access(map->active.ref, 0, 0);
+	free_page((unsigned long)map->active.ring);
+}
+
 int pvcalls_front_socket(struct socket *sock)
 {
 	struct pvcalls_bedata *bedata;
@@ -960,6 +978,92 @@  unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
 	return ret;
 }
 
+int pvcalls_front_release(struct socket *sock)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+	int req_id, notify, ret;
+	struct xen_pvcalls_request *req;
+
+	pvcalls_enter;
+	if (!pvcalls_front_dev) {
+		pvcalls_exit;
+		return -EIO;
+	}
+	if (sock->sk == NULL) {
+		pvcalls_exit;
+		return 0;
+	}
+
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) sock->sk->sk_send_head;
+	if (map == NULL) {
+		pvcalls_exit;
+		return 0;
+	}
+
+	spin_lock(&bedata->socket_lock);
+	ret = get_request(bedata, &req_id);
+	if (ret < 0) {
+		spin_unlock(&bedata->socket_lock);
+		pvcalls_exit;
+		return ret;
+	}
+	sock->sk->sk_send_head = NULL;
+
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_RELEASE;
+	req->u.release.id = (uint64_t)map;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->socket_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	if (map->active_socket) {
+		/*
+		 * Set in_error and wake up inflight_conn_req to force
+		 * recvmsg waiters to exit.
+		 */
+		map->active.ring->in_error = -EBADF;
+		wake_up_interruptible(&map->active.inflight_conn_req);
+
+		/*
+		 * Wait until there are no more waiters on the mutexes.
+		 * We know that no new waiters can be added because sk_send_head
+		 * is set to NULL -- we only need to wait for the existing
+		 * waiters to return.
+		 */
+		while (!mutex_trylock(&map->active.in_mutex) ||
+			   !mutex_trylock(&map->active.out_mutex))
+			cpu_relax();
+
+		pvcalls_front_free_map(bedata, map);
+		kfree(map);
+	} else {
+		spin_lock(&bedata->socket_lock);
+		if (READ_ONCE(map->passive.inflight_req_id) !=
+		    PVCALLS_INVALID_ID) {
+			pvcalls_front_free_map(bedata,
+					       map->passive.accept_map);
+			kfree(map->passive.accept_map);
+		}
+		list_del_init(&map->list);
+		kfree(map);
+		spin_unlock(&bedata->socket_lock);
+	}
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+
+	pvcalls_exit;
+	return 0;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index 25e05b8..3332978 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -23,5 +23,6 @@  int pvcalls_front_recvmsg(struct socket *sock,
 unsigned int pvcalls_front_poll(struct file *file,
 				struct socket *sock,
 				poll_table *wait);
+int pvcalls_front_release(struct socket *sock);
 
 #endif