diff mbox

[v2,11/13] xen/pvcalls: implement release command

Message ID 1501017730-12797-11-git-send-email-sstabellini@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Stefano Stabellini July 25, 2017, 9:22 p.m. UTC
Send PVCALLS_RELEASE to the backend and wait for a reply. Take both
in_mutex and out_mutex to avoid concurrent accesses. Then, free the
socket.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 85 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  1 +
 2 files changed, 86 insertions(+)

Comments

Boris Ostrovsky July 27, 2017, 6:39 p.m. UTC | #1
> +int pvcalls_front_release(struct socket *sock)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +	int req_id, notify;
> +	struct xen_pvcalls_request *req;
> +
> +	if (!pvcalls_front_dev)
> +		return -EIO;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +	if (!bedata)
> +		return -EIO;

Some (all?) other ops don't check bedata validity. Should they all do?

> +
> +	if (sock->sk == NULL)
> +		return 0;
> +
> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> +	if (map == NULL)
> +		return 0;
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +	WRITE_ONCE(sock->sk->sk_send_head, NULL);
> +
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_RELEASE;
> +	req->u.release.id = (uint64_t)sock;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> +
> +	if (map->active_socket) {
> +		/* 
> +		 * Set in_error and wake up inflight_conn_req to force
> +		 * recvmsg waiters to exit.
> +		 */
> +		map->active.ring->in_error = -EBADF;
> +		wake_up_interruptible(&map->active.inflight_conn_req);
> +
> +		mutex_lock(&map->active.in_mutex);
> +		mutex_lock(&map->active.out_mutex);
> +		pvcalls_front_free_map(bedata, map);
> +		mutex_unlock(&map->active.out_mutex);
> +		mutex_unlock(&map->active.in_mutex);
> +		kfree(map);

Since you are locking here I assume you expect that someone else might
also be trying to lock the map. But you are freeing it immediately after
unlocking. Wouldn't that mean that whoever is trying to grab the lock
might then dereference freed memory?


-boris

> +	} else {
> +		spin_lock(&bedata->pvcallss_lock);
> +		list_del_init(&map->list);
> +		kfree(map);
> +		spin_unlock(&bedata->pvcallss_lock);
> +	}
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +
> +	return 0;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index 25e05b8..3332978 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -23,5 +23,6 @@ int pvcalls_front_recvmsg(struct socket *sock,
>  unsigned int pvcalls_front_poll(struct file *file,
>  				struct socket *sock,
>  				poll_table *wait);
> +int pvcalls_front_release(struct socket *sock);
>  
>  #endif
Stefano Stabellini July 31, 2017, 10:34 p.m. UTC | #2
On Thu, 27 Jul 2017, Boris Ostrovsky wrote:
> > +int pvcalls_front_release(struct socket *sock)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map;
> > +	int req_id, notify;
> > +	struct xen_pvcalls_request *req;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -EIO;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +	if (!bedata)
> > +		return -EIO;
> 
> Some (all?) other ops don't check bedata validity. Should they all do?

No, I don't think they should: dev_set_drvdata is called in the probe
function (pvcalls_front_probe). I'll remove it.


> > +
> > +	if (sock->sk == NULL)
> > +		return 0;
> > +
> > +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> > +	if (map == NULL)
> > +		return 0;
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +	WRITE_ONCE(sock->sk->sk_send_head, NULL);
> > +
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	req->cmd = PVCALLS_RELEASE;
> > +	req->u.release.id = (uint64_t)sock;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> > +
> > +	if (map->active_socket) {
> > +		/* 
> > +		 * Set in_error and wake up inflight_conn_req to force
> > +		 * recvmsg waiters to exit.
> > +		 */
> > +		map->active.ring->in_error = -EBADF;
> > +		wake_up_interruptible(&map->active.inflight_conn_req);
> > +
> > +		mutex_lock(&map->active.in_mutex);
> > +		mutex_lock(&map->active.out_mutex);
> > +		pvcalls_front_free_map(bedata, map);
> > +		mutex_unlock(&map->active.out_mutex);
> > +		mutex_unlock(&map->active.in_mutex);
> > +		kfree(map);
> 
> Since you are locking here I assume you expect that someone else might
> also be trying to lock the map. But you are freeing it immediately after
> unlocking. Wouldn't that mean that whoever is trying to grab the lock
> might then dereference freed memory?

The lock is to make sure there are no recvmsg or sendmsg in progress. We
are sure that no newer sendmsg or recvmsg are waiting for
pvcalls_front_release to release the lock because before send a message
to the backend we set sk_send_head to NULL.


> > +	} else {
> > +		spin_lock(&bedata->pvcallss_lock);
> > +		list_del_init(&map->list);
> > +		kfree(map);
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +	}
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +
> > +	return 0;
> > +}
> > +
> >  static const struct xenbus_device_id pvcalls_front_ids[] = {
> >  	{ "pvcalls" },
> >  	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index 25e05b8..3332978 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -23,5 +23,6 @@ int pvcalls_front_recvmsg(struct socket *sock,
> >  unsigned int pvcalls_front_poll(struct file *file,
> >  				struct socket *sock,
> >  				poll_table *wait);
> > +int pvcalls_front_release(struct socket *sock);
> >  
> >  #endif
Boris Ostrovsky Aug. 1, 2017, 3:23 p.m. UTC | #3
On 07/31/2017 06:34 PM, Stefano Stabellini wrote:
> On Thu, 27 Jul 2017, Boris Ostrovsky wrote:
>>> +int pvcalls_front_release(struct socket *sock)
>>> +{
>>> +	struct pvcalls_bedata *bedata;
>>> +	struct sock_mapping *map;
>>> +	int req_id, notify;
>>> +	struct xen_pvcalls_request *req;
>>> +
>>> +	if (!pvcalls_front_dev)
>>> +		return -EIO;
>>> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
>>> +	if (!bedata)
>>> +		return -EIO;
>> Some (all?) other ops don't check bedata validity. Should they all do?
> No, I don't think they should: dev_set_drvdata is called in the probe
> function (pvcalls_front_probe). I'll remove it.
>
>
>>> +
>>> +	if (sock->sk == NULL)
>>> +		return 0;
>>> +
>>> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
>>> +	if (map == NULL)
>>> +		return 0;
>>> +
>>> +	spin_lock(&bedata->pvcallss_lock);
>>> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
>>> +	if (RING_FULL(&bedata->ring) ||
>>> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
>>> +		spin_unlock(&bedata->pvcallss_lock);
>>> +		return -EAGAIN;
>>> +	}
>>> +	WRITE_ONCE(sock->sk->sk_send_head, NULL);
>>> +
>>> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
>>> +	req->req_id = req_id;
>>> +	req->cmd = PVCALLS_RELEASE;
>>> +	req->u.release.id = (uint64_t)sock;
>>> +
>>> +	bedata->ring.req_prod_pvt++;
>>> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
>>> +	spin_unlock(&bedata->pvcallss_lock);
>>> +	if (notify)
>>> +		notify_remote_via_irq(bedata->irq);
>>> +
>>> +	wait_event(bedata->inflight_req,
>>> +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
>>> +
>>> +	if (map->active_socket) {
>>> +		/* 
>>> +		 * Set in_error and wake up inflight_conn_req to force
>>> +		 * recvmsg waiters to exit.
>>> +		 */
>>> +		map->active.ring->in_error = -EBADF;
>>> +		wake_up_interruptible(&map->active.inflight_conn_req);
>>> +
>>> +		mutex_lock(&map->active.in_mutex);
>>> +		mutex_lock(&map->active.out_mutex);
>>> +		pvcalls_front_free_map(bedata, map);
>>> +		mutex_unlock(&map->active.out_mutex);
>>> +		mutex_unlock(&map->active.in_mutex);
>>> +		kfree(map);
>> Since you are locking here I assume you expect that someone else might
>> also be trying to lock the map. But you are freeing it immediately after
>> unlocking. Wouldn't that mean that whoever is trying to grab the lock
>> might then dereference freed memory?
> The lock is to make sure there are no recvmsg or sendmsg in progress. We
> are sure that no newer sendmsg or recvmsg are waiting for
> pvcalls_front_release to release the lock because before send a message
> to the backend we set sk_send_head to NULL.

Is there a chance that whoever is potentially calling send/rcvmsg has
checked that sk_send_head is non-NULL but hasn't grabbed the lock yet?

Freeing a structure containing a lock right after releasing the lock
looks weird (to me). Is there any other way to synchronize with
sender/receiver? Any other lock?

BTW, I also noticed that in rcvmsg you are calling
wait_event_interruptible() while holding the lock. Have you tested with
CONFIG_DEBUG_ATOMIC_SLEEP? (or maybe it's some other config  option that
would complain about those sorts of thing)

-boris
Jürgen Groß Aug. 1, 2017, 3:34 p.m. UTC | #4
On 01/08/17 17:23, Boris Ostrovsky wrote:
> On 07/31/2017 06:34 PM, Stefano Stabellini wrote:
>> On Thu, 27 Jul 2017, Boris Ostrovsky wrote:
>>>> +int pvcalls_front_release(struct socket *sock)
>>>> +{
>>>> +	struct pvcalls_bedata *bedata;
>>>> +	struct sock_mapping *map;
>>>> +	int req_id, notify;
>>>> +	struct xen_pvcalls_request *req;
>>>> +
>>>> +	if (!pvcalls_front_dev)
>>>> +		return -EIO;
>>>> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
>>>> +	if (!bedata)
>>>> +		return -EIO;
>>> Some (all?) other ops don't check bedata validity. Should they all do?
>> No, I don't think they should: dev_set_drvdata is called in the probe
>> function (pvcalls_front_probe). I'll remove it.
>>
>>
>>>> +
>>>> +	if (sock->sk == NULL)
>>>> +		return 0;
>>>> +
>>>> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
>>>> +	if (map == NULL)
>>>> +		return 0;
>>>> +
>>>> +	spin_lock(&bedata->pvcallss_lock);
>>>> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
>>>> +	if (RING_FULL(&bedata->ring) ||
>>>> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
>>>> +		spin_unlock(&bedata->pvcallss_lock);
>>>> +		return -EAGAIN;
>>>> +	}
>>>> +	WRITE_ONCE(sock->sk->sk_send_head, NULL);
>>>> +
>>>> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
>>>> +	req->req_id = req_id;
>>>> +	req->cmd = PVCALLS_RELEASE;
>>>> +	req->u.release.id = (uint64_t)sock;
>>>> +
>>>> +	bedata->ring.req_prod_pvt++;
>>>> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
>>>> +	spin_unlock(&bedata->pvcallss_lock);
>>>> +	if (notify)
>>>> +		notify_remote_via_irq(bedata->irq);
>>>> +
>>>> +	wait_event(bedata->inflight_req,
>>>> +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
>>>> +
>>>> +	if (map->active_socket) {
>>>> +		/* 
>>>> +		 * Set in_error and wake up inflight_conn_req to force
>>>> +		 * recvmsg waiters to exit.
>>>> +		 */
>>>> +		map->active.ring->in_error = -EBADF;
>>>> +		wake_up_interruptible(&map->active.inflight_conn_req);
>>>> +
>>>> +		mutex_lock(&map->active.in_mutex);
>>>> +		mutex_lock(&map->active.out_mutex);
>>>> +		pvcalls_front_free_map(bedata, map);
>>>> +		mutex_unlock(&map->active.out_mutex);
>>>> +		mutex_unlock(&map->active.in_mutex);
>>>> +		kfree(map);
>>> Since you are locking here I assume you expect that someone else might
>>> also be trying to lock the map. But you are freeing it immediately after
>>> unlocking. Wouldn't that mean that whoever is trying to grab the lock
>>> might then dereference freed memory?
>> The lock is to make sure there are no recvmsg or sendmsg in progress. We
>> are sure that no newer sendmsg or recvmsg are waiting for
>> pvcalls_front_release to release the lock because before send a message
>> to the backend we set sk_send_head to NULL.
> 
> Is there a chance that whoever is potentially calling send/rcvmsg has
> checked that sk_send_head is non-NULL but hasn't grabbed the lock yet?
> 
> Freeing a structure containing a lock right after releasing the lock
> looks weird (to me). Is there any other way to synchronize with
> sender/receiver? Any other lock?

Right. This looks fishy. Either you don't need the locks or you can't
just free the area right after releasing the lock.

> BTW, I also noticed that in rcvmsg you are calling
> wait_event_interruptible() while holding the lock. Have you tested with
> CONFIG_DEBUG_ATOMIC_SLEEP? (or maybe it's some other config  option that
> would complain about those sorts of thing)

I believe sleeping while holding a mutex is allowed. Sleeping in
spinlocked paths is bad.

BTW: You are looking for CONFIG_DEBUG_MUTEXES (see
Documentation/locking/mutex-design.txt ).


Juergen
Boris Ostrovsky Aug. 1, 2017, 3:57 p.m. UTC | #5
>> BTW, I also noticed that in rcvmsg you are calling
>> wait_event_interruptible() while holding the lock. Have you tested with
>> CONFIG_DEBUG_ATOMIC_SLEEP? (or maybe it's some other config  option that
>> would complain about those sorts of thing)
> I believe sleeping while holding a mutex is allowed. Sleeping in
> spinlocked paths is bad.

Oh, right. I was thinking about spinlocks. Sorry.

-boris

>
> BTW: You are looking for CONFIG_DEBUG_MUTEXES (see
> Documentation/locking/mutex-design.txt ).
>
>
> Juergen
Stefano Stabellini Sept. 11, 2017, 11:56 p.m. UTC | #6
On Tue, 1 Aug 2017, Juergen Gross wrote:
> >>>> +	if (sock->sk == NULL)
> >>>> +		return 0;
> >>>> +
> >>>> +	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
> >>>> +	if (map == NULL)
> >>>> +		return 0;
> >>>> +
> >>>> +	spin_lock(&bedata->pvcallss_lock);
> >>>> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> >>>> +	if (RING_FULL(&bedata->ring) ||
> >>>> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> >>>> +		spin_unlock(&bedata->pvcallss_lock);
> >>>> +		return -EAGAIN;
> >>>> +	}
> >>>> +	WRITE_ONCE(sock->sk->sk_send_head, NULL);
> >>>> +
> >>>> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> >>>> +	req->req_id = req_id;
> >>>> +	req->cmd = PVCALLS_RELEASE;
> >>>> +	req->u.release.id = (uint64_t)sock;
> >>>> +
> >>>> +	bedata->ring.req_prod_pvt++;
> >>>> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> >>>> +	spin_unlock(&bedata->pvcallss_lock);
> >>>> +	if (notify)
> >>>> +		notify_remote_via_irq(bedata->irq);
> >>>> +
> >>>> +	wait_event(bedata->inflight_req,
> >>>> +		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> >>>> +
> >>>> +	if (map->active_socket) {
> >>>> +		/* 
> >>>> +		 * Set in_error and wake up inflight_conn_req to force
> >>>> +		 * recvmsg waiters to exit.
> >>>> +		 */
> >>>> +		map->active.ring->in_error = -EBADF;
> >>>> +		wake_up_interruptible(&map->active.inflight_conn_req);
> >>>> +
> >>>> +		mutex_lock(&map->active.in_mutex);
> >>>> +		mutex_lock(&map->active.out_mutex);
> >>>> +		pvcalls_front_free_map(bedata, map);
> >>>> +		mutex_unlock(&map->active.out_mutex);
> >>>> +		mutex_unlock(&map->active.in_mutex);
> >>>> +		kfree(map);
> >>> Since you are locking here I assume you expect that someone else might
> >>> also be trying to lock the map. But you are freeing it immediately after
> >>> unlocking. Wouldn't that mean that whoever is trying to grab the lock
> >>> might then dereference freed memory?
> >> The lock is to make sure there are no recvmsg or sendmsg in progress. We
> >> are sure that no newer sendmsg or recvmsg are waiting for
> >> pvcalls_front_release to release the lock because before send a message
> >> to the backend we set sk_send_head to NULL.
> > 
> > Is there a chance that whoever is potentially calling send/rcvmsg has
> > checked that sk_send_head is non-NULL but hasn't grabbed the lock yet?
> > 
> > Freeing a structure containing a lock right after releasing the lock
> > looks weird (to me). Is there any other way to synchronize with
> > sender/receiver? Any other lock?
> 
> Right. This looks fishy. Either you don't need the locks or you can't
> just free the area right after releasing the lock.

I changed this code, you'll see soon in the new patch series I am going
to send. There were two very similar mutex_unlock/kfree problems:

1) pvcalls_front_release
2) pvcalls_front_remove

For 2), I introduced a refcount. I only free the data structs when the
refcount reaches 0.

For 1), I could introduce a similar refcount that would serve the same
purpose, but instead I used mutex_trylock, effectively using the
internal count in in_mutex and out_mutex for the same purpose.
diff mbox

Patch

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 833b717..5a4040e 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -184,6 +184,23 @@  static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map)
 	return IRQ_HANDLED;
 }
 
+static void pvcalls_front_free_map(struct pvcalls_bedata *bedata,
+				   struct sock_mapping *map)
+{
+	int i;
+
+	spin_lock(&bedata->pvcallss_lock);
+	if (!list_empty(&map->list))
+		list_del_init(&map->list);
+	spin_unlock(&bedata->pvcallss_lock);
+
+	for (i = 0; i < (1 << map->active.ring->ring_order); i++)
+		gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0);
+	gnttab_end_foreign_access(map->active.ref, 0, 0);
+	free_page((unsigned long)map->active.ring);
+	unbind_from_irqhandler(map->active.irq, map);
+}
+
 int pvcalls_front_socket(struct socket *sock)
 {
 	struct pvcalls_bedata *bedata;
@@ -819,6 +836,74 @@  unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
 		return pvcalls_front_poll_passive(file, bedata, map, wait);
 }
 
+int pvcalls_front_release(struct socket *sock)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+	int req_id, notify;
+	struct xen_pvcalls_request *req;
+
+	if (!pvcalls_front_dev)
+		return -EIO;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+	if (!bedata)
+		return -EIO;
+
+	if (sock->sk == NULL)
+		return 0;
+
+	map = (struct sock_mapping *) READ_ONCE(sock->sk->sk_send_head);
+	if (map == NULL)
+		return 0;
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	WRITE_ONCE(sock->sk->sk_send_head, NULL);
+
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_RELEASE;
+	req->u.release.id = (uint64_t)sock;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	if (map->active_socket) {
+		/* 
+		 * Set in_error and wake up inflight_conn_req to force
+		 * recvmsg waiters to exit.
+		 */
+		map->active.ring->in_error = -EBADF;
+		wake_up_interruptible(&map->active.inflight_conn_req);
+
+		mutex_lock(&map->active.in_mutex);
+		mutex_lock(&map->active.out_mutex);
+		pvcalls_front_free_map(bedata, map);
+		mutex_unlock(&map->active.out_mutex);
+		mutex_unlock(&map->active.in_mutex);
+		kfree(map);
+	} else {
+		spin_lock(&bedata->pvcallss_lock);
+		list_del_init(&map->list);
+		kfree(map);
+		spin_unlock(&bedata->pvcallss_lock);
+	}
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+
+	return 0;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index 25e05b8..3332978 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -23,5 +23,6 @@  int pvcalls_front_recvmsg(struct socket *sock,
 unsigned int pvcalls_front_poll(struct file *file,
 				struct socket *sock,
 				poll_table *wait);
+int pvcalls_front_release(struct socket *sock);
 
 #endif