Message ID | 1505516440-11111-12-git-send-email-sstabellini@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> > +static void pvcalls_front_free_map(struct pvcalls_bedata *bedata, > + struct sock_mapping *map) I just noticed: pvcalls_front_free_map() is referenced by patches 2 and 8. > +{ > + int i; > + > + unbind_from_irqhandler(map->active.irq, map); > + > + spin_lock(&bedata->socket_lock); > + if (!list_empty(&map->list)) > + list_del_init(&map->list); > + spin_unlock(&bedata->socket_lock); > + > + for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++) > + gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0); > + gnttab_end_foreign_access(map->active.ref, 0, 0); > + free_page((unsigned long)map->active.ring); > +} > + > int pvcalls_front_socket(struct socket *sock) > { > struct pvcalls_bedata *bedata; > @@ -960,6 +978,92 @@ unsigned int pvcalls_front_poll(struct file *file, struct socket *sock, > return ret; > } > > +int pvcalls_front_release(struct socket *sock) > +{ > + struct pvcalls_bedata *bedata; > + struct sock_mapping *map; > + int req_id, notify, ret; > + struct xen_pvcalls_request *req; > + > + pvcalls_enter; > + if (!pvcalls_front_dev) { > + pvcalls_exit; > + return -EIO; > + } > + if (sock->sk == NULL) { > + pvcalls_exit; > + return 0; > + } > + > + bedata = dev_get_drvdata(&pvcalls_front_dev->dev); > + > + map = (struct sock_mapping *) sock->sk->sk_send_head; > + if (map == NULL) { > + pvcalls_exit; > + return 0; > + } > + > + spin_lock(&bedata->socket_lock); > + ret = get_request(bedata, &req_id); > + if (ret < 0) { > + spin_unlock(&bedata->socket_lock); > + pvcalls_exit; > + return ret; > + } > + sock->sk->sk_send_head = NULL; > + > + req = RING_GET_REQUEST(&bedata->ring, req_id); > + req->req_id = req_id; > + req->cmd = PVCALLS_RELEASE; > + req->u.release.id = (uint64_t)map; > + > + bedata->ring.req_prod_pvt++; > + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify); > + spin_unlock(&bedata->socket_lock); > + if (notify) > + notify_remote_via_irq(bedata->irq); > + > + wait_event(bedata->inflight_req, > + READ_ONCE(bedata->rsp[req_id].req_id) == req_id); > + > + if (map->active_socket) { > + /* > + * Set in_error and wake up inflight_conn_req to force > + * recvmsg waiters to exit. > + */ > + map->active.ring->in_error = -EBADF; > + wake_up_interruptible(&map->active.inflight_conn_req); > + > + /* > + * Wait until there are no more waiters on the mutexes. > + * We know that no new waiters can be added because sk_send_head > + * is set to NULL -- we only need to wait for the existing > + * waiters to return. > + */ > + while (!mutex_trylock(&map->active.in_mutex) || > + !mutex_trylock(&map->active.out_mutex)) > + cpu_relax(); What if you manage to grab the locks before waiters get to run? for example, in recvmsg: while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) { wait_event_interruptible(map->active.inflight_conn_req, pvcalls_front_read_todo(map)); } ret = __read_ring(map->active.ring, &map->active.data, &msg->msg_iter, len, flags); map will be freed (by pvcalls_front_free_map() below) before __read_ring is passed the just-freed ring. > + > + pvcalls_front_free_map(bedata, map); > + kfree(map); -boris
>> + */ >> + map->active.ring->in_error = -EBADF; >> + wake_up_interruptible(&map->active.inflight_conn_req); >> + >> + /* >> + * Wait until there are no more waiters on the mutexes. >> + * We know that no new waiters can be added because sk_send_head >> + * is set to NULL -- we only need to wait for the existing >> + * waiters to return. >> + */ >> + while (!mutex_trylock(&map->active.in_mutex) || >> + !mutex_trylock(&map->active.out_mutex)) >> + cpu_relax(); > > What if you manage to grab the locks before waiters get to run? for > example, in recvmsg: > > while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) { > wait_event_interruptible(map->active.inflight_conn_req, > pvcalls_front_read_todo(map)); > } > ret = __read_ring(map->active.ring, &map->active.data, > &msg->msg_iter, len, flags); > > map will be freed (by pvcalls_front_free_map() below) before __read_ring > is passed the just-freed ring. Actually, since you don't drop the locks I am not sure recvmsg side will even get there. -boris > > >> + >> + pvcalls_front_free_map(bedata, map); >> + kfree(map); > > -boris > >
On Fri, 22 Sep 2017, Boris Ostrovsky wrote: > > +static void pvcalls_front_free_map(struct pvcalls_bedata *bedata, > > + struct sock_mapping *map) > > I just noticed: pvcalls_front_free_map() is referenced by patches 2 and 8. I'll create an empty stub earlier. > > +{ > > + int i; > > + > > + unbind_from_irqhandler(map->active.irq, map); > > + > > + spin_lock(&bedata->socket_lock); > > + if (!list_empty(&map->list)) > > + list_del_init(&map->list); > > + spin_unlock(&bedata->socket_lock); > > + > > + for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++) > > + gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0); > > + gnttab_end_foreign_access(map->active.ref, 0, 0); > > + free_page((unsigned long)map->active.ring); > > +} > > + > > int pvcalls_front_socket(struct socket *sock) > > { > > struct pvcalls_bedata *bedata; > > @@ -960,6 +978,92 @@ unsigned int pvcalls_front_poll(struct file *file, struct socket *sock, > > return ret; > > } > > > > +int pvcalls_front_release(struct socket *sock) > > +{ > > + struct pvcalls_bedata *bedata; > > + struct sock_mapping *map; > > + int req_id, notify, ret; > > + struct xen_pvcalls_request *req; > > + > > + pvcalls_enter; > > + if (!pvcalls_front_dev) { > > + pvcalls_exit; > > + return -EIO; > > + } > > + if (sock->sk == NULL) { > > + pvcalls_exit; > > + return 0; > > + } > > + > > + bedata = dev_get_drvdata(&pvcalls_front_dev->dev); > > + > > + map = (struct sock_mapping *) sock->sk->sk_send_head; > > + if (map == NULL) { > > + pvcalls_exit; > > + return 0; > > + } > > + > > + spin_lock(&bedata->socket_lock); > > + ret = get_request(bedata, &req_id); > > + if (ret < 0) { > > + spin_unlock(&bedata->socket_lock); > > + pvcalls_exit; > > + return ret; > > + } > > + sock->sk->sk_send_head = NULL; > > + > > + req = RING_GET_REQUEST(&bedata->ring, req_id); > > + req->req_id = req_id; > > + req->cmd = PVCALLS_RELEASE; > > + req->u.release.id = (uint64_t)map; > > + > > + bedata->ring.req_prod_pvt++; > > + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify); > > + spin_unlock(&bedata->socket_lock); > > + if (notify) > > + notify_remote_via_irq(bedata->irq); > > + > > + wait_event(bedata->inflight_req, > > + READ_ONCE(bedata->rsp[req_id].req_id) == req_id); > > + > > + if (map->active_socket) { > > + /* > > + * Set in_error and wake up inflight_conn_req to force > > + * recvmsg waiters to exit. > > + */ > > + map->active.ring->in_error = -EBADF; > > + wake_up_interruptible(&map->active.inflight_conn_req); > > + > > + /* > > + * Wait until there are no more waiters on the mutexes. > > + * We know that no new waiters can be added because sk_send_head > > + * is set to NULL -- we only need to wait for the existing > > + * waiters to return. > > + */ > > + while (!mutex_trylock(&map->active.in_mutex) || > > + !mutex_trylock(&map->active.out_mutex)) > > + cpu_relax(); > > > What if you manage to grab the locks before waiters get to run? for > example, in recvmsg: > > while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) { > wait_event_interruptible(map->active.inflight_conn_req, > pvcalls_front_read_todo(map)); > } > ret = __read_ring(map->active.ring, &map->active.data, > &msg->msg_iter, len, flags); > > map will be freed (by pvcalls_front_free_map() below) before __read_ring > is passed the just-freed ring. I don't think that is possible: mutex_trylock should return 0 on contention. I don't think is possible for mutex_trylock to return 1 when there are waiters waiting for their chance to run. > > + > > + pvcalls_front_free_map(bedata, map); > > + kfree(map); > > > -boris > >
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 8a90213..5f8e94a 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -209,6 +209,24 @@ static irqreturn_t pvcalls_front_conn_handler(int irq, void *sock_map) return IRQ_HANDLED; } +static void pvcalls_front_free_map(struct pvcalls_bedata *bedata, + struct sock_mapping *map) +{ + int i; + + unbind_from_irqhandler(map->active.irq, map); + + spin_lock(&bedata->socket_lock); + if (!list_empty(&map->list)) + list_del_init(&map->list); + spin_unlock(&bedata->socket_lock); + + for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++) + gnttab_end_foreign_access(map->active.ring->ref[i], 0, 0); + gnttab_end_foreign_access(map->active.ref, 0, 0); + free_page((unsigned long)map->active.ring); +} + int pvcalls_front_socket(struct socket *sock) { struct pvcalls_bedata *bedata; @@ -960,6 +978,92 @@ unsigned int pvcalls_front_poll(struct file *file, struct socket *sock, return ret; } +int pvcalls_front_release(struct socket *sock) +{ + struct pvcalls_bedata *bedata; + struct sock_mapping *map; + int req_id, notify, ret; + struct xen_pvcalls_request *req; + + pvcalls_enter; + if (!pvcalls_front_dev) { + pvcalls_exit; + return -EIO; + } + if (sock->sk == NULL) { + pvcalls_exit; + return 0; + } + + bedata = dev_get_drvdata(&pvcalls_front_dev->dev); + + map = (struct sock_mapping *) sock->sk->sk_send_head; + if (map == NULL) { + pvcalls_exit; + return 0; + } + + spin_lock(&bedata->socket_lock); + ret = get_request(bedata, &req_id); + if (ret < 0) { + spin_unlock(&bedata->socket_lock); + pvcalls_exit; + return ret; + } + sock->sk->sk_send_head = NULL; + + req = RING_GET_REQUEST(&bedata->ring, req_id); + req->req_id = req_id; + req->cmd = PVCALLS_RELEASE; + req->u.release.id = (uint64_t)map; + + bedata->ring.req_prod_pvt++; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify); + spin_unlock(&bedata->socket_lock); + if (notify) + notify_remote_via_irq(bedata->irq); + + wait_event(bedata->inflight_req, + READ_ONCE(bedata->rsp[req_id].req_id) == req_id); + + if (map->active_socket) { + /* + * Set in_error and wake up inflight_conn_req to force + * recvmsg waiters to exit. + */ + map->active.ring->in_error = -EBADF; + wake_up_interruptible(&map->active.inflight_conn_req); + + /* + * Wait until there are no more waiters on the mutexes. + * We know that no new waiters can be added because sk_send_head + * is set to NULL -- we only need to wait for the existing + * waiters to return. + */ + while (!mutex_trylock(&map->active.in_mutex) || + !mutex_trylock(&map->active.out_mutex)) + cpu_relax(); + + pvcalls_front_free_map(bedata, map); + kfree(map); + } else { + spin_lock(&bedata->socket_lock); + if (READ_ONCE(map->passive.inflight_req_id) != + PVCALLS_INVALID_ID) { + pvcalls_front_free_map(bedata, + map->passive.accept_map); + kfree(map->passive.accept_map); + } + list_del_init(&map->list); + kfree(map); + spin_unlock(&bedata->socket_lock); + } + WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID); + + pvcalls_exit; + return 0; +} + static const struct xenbus_device_id pvcalls_front_ids[] = { { "pvcalls" }, { "" } diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h index 25e05b8..3332978 100644 --- a/drivers/xen/pvcalls-front.h +++ b/drivers/xen/pvcalls-front.h @@ -23,5 +23,6 @@ int pvcalls_front_recvmsg(struct socket *sock, unsigned int pvcalls_front_poll(struct file *file, struct socket *sock, poll_table *wait); +int pvcalls_front_release(struct socket *sock); #endif
Send PVCALLS_RELEASE to the backend and wait for a reply. Take both in_mutex and out_mutex to avoid concurrent accesses. Then, free the socket. For passive sockets, check whether we have already pre-allocated an active socket for the purpose of being accepted. If so, free that as well. Signed-off-by: Stefano Stabellini <stefano@aporeto.com> CC: boris.ostrovsky@oracle.com CC: jgross@suse.com --- drivers/xen/pvcalls-front.c | 104 ++++++++++++++++++++++++++++++++++++++++++++ drivers/xen/pvcalls-front.h | 1 + 2 files changed, 105 insertions(+)