diff mbox

[v4,11/13] xen/pvcalls: implement poll command

Message ID 1505516440-11111-11-git-send-email-sstabellini@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Stefano Stabellini Sept. 15, 2017, 11 p.m. UTC
For active sockets, check the indexes and use the inflight_conn_req
waitqueue to wait.

For passive sockets if an accept is outstanding
(PVCALLS_FLAG_ACCEPT_INFLIGHT), check if it has been answered by looking
at bedata->rsp[req_id]. If so, return POLLIN.  Otherwise use the
inflight_accept_req waitqueue.

If no accepts are inflight, send PVCALLS_POLL to the backend. If we have
outstanding POLL requests awaiting for a response use the inflight_req
waitqueue: inflight_req is awaken when a new response is received; on
wakeup we check whether the POLL response is arrived by looking at the
PVCALLS_FLAG_POLL_RET flag. We set the flag from
pvcalls_front_event_handler, if the response was for a POLL command.

In pvcalls_front_event_handler, get the struct sock_mapping from the
poll id (we previously converted struct sock_mapping* to uint64_t and
used it as id).

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 144 +++++++++++++++++++++++++++++++++++++++++---
 drivers/xen/pvcalls-front.h |   3 +
 2 files changed, 138 insertions(+), 9 deletions(-)

Comments

Andrea Parri Sept. 19, 2017, 3:19 p.m. UTC | #1
Hi Stefano,

On Fri, Sep 15, 2017 at 04:00:38PM -0700, Stefano Stabellini wrote:
> For active sockets, check the indexes and use the inflight_conn_req
> waitqueue to wait.
> 
> For passive sockets if an accept is outstanding
> (PVCALLS_FLAG_ACCEPT_INFLIGHT), check if it has been answered by looking
> at bedata->rsp[req_id]. If so, return POLLIN.  Otherwise use the
> inflight_accept_req waitqueue.
> 
> If no accepts are inflight, send PVCALLS_POLL to the backend. If we have
> outstanding POLL requests awaiting for a response use the inflight_req
> waitqueue: inflight_req is awaken when a new response is received; on
> wakeup we check whether the POLL response is arrived by looking at the
> PVCALLS_FLAG_POLL_RET flag. We set the flag from
> pvcalls_front_event_handler, if the response was for a POLL command.
> 
> In pvcalls_front_event_handler, get the struct sock_mapping from the
> poll id (we previously converted struct sock_mapping* to uint64_t and
> used it as id).
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-front.c | 144 +++++++++++++++++++++++++++++++++++++++++---
>  drivers/xen/pvcalls-front.h |   3 +
>  2 files changed, 138 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index 01a5a69..8a90213 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -85,6 +85,8 @@ struct sock_mapping {
>  		 * Only one poll operation can be inflight for a given socket.
>  		 */
>  #define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
> +#define PVCALLS_FLAG_POLL_INFLIGHT   1
> +#define PVCALLS_FLAG_POLL_RET        2
>  			uint8_t flags;
>  			uint32_t inflight_req_id;
>  			struct sock_mapping *accept_map;
> @@ -155,15 +157,32 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
>  		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
>  
>  		req_id = rsp->req_id;
> -		dst = (uint8_t *)&bedata->rsp[req_id] + sizeof(rsp->req_id);
> -		src = (uint8_t *)rsp + sizeof(rsp->req_id);
> -		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> -		/*
> -		 * First copy the rest of the data, then req_id. It is
> -		 * paired with the barrier when accessing bedata->rsp.
> -		 */
> -		smp_wmb();
> -		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> +		if (rsp->cmd == PVCALLS_POLL) {
> +			struct sock_mapping *map = (struct sock_mapping *)
> +						   rsp->u.poll.id;
> +
> +			set_bit(PVCALLS_FLAG_POLL_RET,
> +				(void *)&map->passive.flags);
> +			/*
> +			 * Set RET, then clear INFLIGHT. It pairs with
> +			 * the checks at the beginning of
> +			 * pvcalls_front_poll_passive.
> +			 */
> +			smp_wmb();

pvcalls_front_poll_passive() seems to first check RET, then INFLIGHT
(no "crossing of mem. locations"): can you elaborate here?


> +			clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> +				  (void *)&map->passive.flags);
> +		} else {
> +			dst = (uint8_t *)&bedata->rsp[req_id] +
> +			      sizeof(rsp->req_id);
> +			src = (uint8_t *)rsp + sizeof(rsp->req_id);
> +			memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> +			/*
> +			 * First copy the rest of the data, then req_id. It is
> +			 * paired with the barrier when accessing bedata->rsp.
> +			 */
> +			smp_wmb();

Would you point me to the "pairing barrier"? (not sure I understand
the logic here...)


> +			WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);

Could this be rewritten as

   WRITE_ONCE(bedata->rsp[req_id].req_id, req_id);


> +		}
>  
>  		done = 1;
>  		bedata->ring.rsp_cons++;
> @@ -834,6 +853,113 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
>  	return ret;
>  }
>  
> +static unsigned int pvcalls_front_poll_passive(struct file *file,
> +					       struct pvcalls_bedata *bedata,
> +					       struct sock_mapping *map,
> +					       poll_table *wait)
> +{
> +	int notify, req_id, ret;
> +	struct xen_pvcalls_request *req;
> +
> +	if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> +		     (void *)&map->passive.flags)) {
> +		uint32_t req_id = READ_ONCE(map->passive.inflight_req_id);
> +
> +		if (req_id != PVCALLS_INVALID_ID &&
> +		    READ_ONCE(bedata->rsp[req_id].req_id) == req_id)
> +			return POLLIN | POLLRDNORM;
> +
> +		poll_wait(file, &map->passive.inflight_accept_req, wait);
> +		return 0;
> +	}
> +
> +	if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
> +			       (void *)&map->passive.flags))
> +		return POLLIN | POLLRDNORM;
> +
> +	/*
> +	 * First check RET, then INFLIGHT. No barriers necessary to
> +	 * ensure execution ordering because of the conditional
> +	 * instructions creating control dependencies.
> +	 */

IAC, these test_and_{clear,set}_bit() are "fully ordered".

  Andrea


> +
> +	if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> +			     (void *)&map->passive.flags)) {
> +		poll_wait(file, &bedata->inflight_req, wait);
> +		return 0;
> +	}
> +
> +	spin_lock(&bedata->socket_lock);
> +	ret = get_request(bedata, &req_id);
> +	if (ret < 0) {
> +		spin_unlock(&bedata->socket_lock);
> +		return ret;
> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	req->cmd = PVCALLS_POLL;
> +	req->u.poll.id = (uint64_t) map;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->socket_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	poll_wait(file, &bedata->inflight_req, wait);
> +	return 0;
> +}
> +
> +static unsigned int pvcalls_front_poll_active(struct file *file,
> +					      struct pvcalls_bedata *bedata,
> +					      struct sock_mapping *map,
> +					      poll_table *wait)
> +{
> +	unsigned int mask = 0;
> +	int32_t in_error, out_error;
> +	struct pvcalls_data_intf *intf = map->active.ring;
> +
> +	out_error = intf->out_error;
> +	in_error = intf->in_error;
> +
> +	poll_wait(file, &map->active.inflight_conn_req, wait);
> +	if (pvcalls_front_write_todo(map))
> +		mask |= POLLOUT | POLLWRNORM;
> +	if (pvcalls_front_read_todo(map))
> +		mask |= POLLIN | POLLRDNORM;
> +	if (in_error != 0 || out_error != 0)
> +		mask |= POLLERR;
> +
> +	return mask;
> +}
> +
> +unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
> +			       poll_table *wait)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map;
> +	int ret;
> +
> +	pvcalls_enter;
> +	if (!pvcalls_front_dev) {
> +		pvcalls_exit;
> +		return POLLNVAL;
> +	}
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) sock->sk->sk_send_head;
> +	if (!map) {
> +		pvcalls_exit;
> +		return POLLNVAL;
> +	}
> +	if (map->active_socket)
> +		ret = pvcalls_front_poll_active(file, bedata, map, wait);
> +	else
> +		ret = pvcalls_front_poll_passive(file, bedata, map, wait);
> +	pvcalls_exit;
> +	return ret;
> +}
> +
>  static const struct xenbus_device_id pvcalls_front_ids[] = {
>  	{ "pvcalls" },
>  	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index de24041..25e05b8 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -20,5 +20,8 @@ int pvcalls_front_recvmsg(struct socket *sock,
>  			  struct msghdr *msg,
>  			  size_t len,
>  			  int flags);
> +unsigned int pvcalls_front_poll(struct file *file,
> +				struct socket *sock,
> +				poll_table *wait);
>  
>  #endif
> -- 
> 1.9.1
>
Boris Ostrovsky Sept. 22, 2017, 10:27 p.m. UTC | #2
>  
> +static unsigned int pvcalls_front_poll_passive(struct file *file,
> +					       struct pvcalls_bedata *bedata,
> +					       struct sock_mapping *map,
> +					       poll_table *wait)
> +{
> +	int notify, req_id, ret;
> +	struct xen_pvcalls_request *req;
> +
> +	if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> +		     (void *)&map->passive.flags)) {
> +		uint32_t req_id = READ_ONCE(map->passive.inflight_req_id);
> +
> +		if (req_id != PVCALLS_INVALID_ID &&
> +		    READ_ONCE(bedata->rsp[req_id].req_id) == req_id)
> +			return POLLIN | POLLRDNORM;


Do we need to clear PVCALLS_FLAG_ACCEPT_INFLIGHT? Or do we expect a
(subsequent?) accept() to do that?

-boris



> +
> +		poll_wait(file, &map->passive.inflight_accept_req, wait);
> +		return 0;
> +	}
> +
Stefano Stabellini Oct. 6, 2017, 11:33 p.m. UTC | #3
On Tue, 19 Sep 2017, Andrea Parri wrote:
> Hi Stefano,
> 
> On Fri, Sep 15, 2017 at 04:00:38PM -0700, Stefano Stabellini wrote:
> > For active sockets, check the indexes and use the inflight_conn_req
> > waitqueue to wait.
> > 
> > For passive sockets if an accept is outstanding
> > (PVCALLS_FLAG_ACCEPT_INFLIGHT), check if it has been answered by looking
> > at bedata->rsp[req_id]. If so, return POLLIN.  Otherwise use the
> > inflight_accept_req waitqueue.
> > 
> > If no accepts are inflight, send PVCALLS_POLL to the backend. If we have
> > outstanding POLL requests awaiting for a response use the inflight_req
> > waitqueue: inflight_req is awaken when a new response is received; on
> > wakeup we check whether the POLL response is arrived by looking at the
> > PVCALLS_FLAG_POLL_RET flag. We set the flag from
> > pvcalls_front_event_handler, if the response was for a POLL command.
> > 
> > In pvcalls_front_event_handler, get the struct sock_mapping from the
> > poll id (we previously converted struct sock_mapping* to uint64_t and
> > used it as id).
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-front.c | 144 +++++++++++++++++++++++++++++++++++++++++---
> >  drivers/xen/pvcalls-front.h |   3 +
> >  2 files changed, 138 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index 01a5a69..8a90213 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -85,6 +85,8 @@ struct sock_mapping {
> >  		 * Only one poll operation can be inflight for a given socket.
> >  		 */
> >  #define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
> > +#define PVCALLS_FLAG_POLL_INFLIGHT   1
> > +#define PVCALLS_FLAG_POLL_RET        2
> >  			uint8_t flags;
> >  			uint32_t inflight_req_id;
> >  			struct sock_mapping *accept_map;
> > @@ -155,15 +157,32 @@ static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
> >  		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
> >  
> >  		req_id = rsp->req_id;
> > -		dst = (uint8_t *)&bedata->rsp[req_id] + sizeof(rsp->req_id);
> > -		src = (uint8_t *)rsp + sizeof(rsp->req_id);
> > -		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > -		/*
> > -		 * First copy the rest of the data, then req_id. It is
> > -		 * paired with the barrier when accessing bedata->rsp.
> > -		 */
> > -		smp_wmb();
> > -		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> > +		if (rsp->cmd == PVCALLS_POLL) {
> > +			struct sock_mapping *map = (struct sock_mapping *)
> > +						   rsp->u.poll.id;
> > +
> > +			set_bit(PVCALLS_FLAG_POLL_RET,
> > +				(void *)&map->passive.flags);
> > +			/*
> > +			 * Set RET, then clear INFLIGHT. It pairs with
> > +			 * the checks at the beginning of
> > +			 * pvcalls_front_poll_passive.
> > +			 */
> > +			smp_wmb();
> 
> pvcalls_front_poll_passive() seems to first check RET, then INFLIGHT
> (no "crossing of mem. locations"): can you elaborate here?

Thank you for the review! It looks like you spotted to errors. I think
heree should be

  clear INFLIGHT
  smp_wmb()
  set RET

to pair properly.


> > +			clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
> > +				  (void *)&map->passive.flags);
> > +		} else {
> > +			dst = (uint8_t *)&bedata->rsp[req_id] +
> > +			      sizeof(rsp->req_id);
> > +			src = (uint8_t *)rsp + sizeof(rsp->req_id);
> > +			memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
> > +			/*
> > +			 * First copy the rest of the data, then req_id. It is
> > +			 * paired with the barrier when accessing bedata->rsp.
> > +			 */
> > +			smp_wmb();
> 
> Would you point me to the "pairing barrier"? (not sure I understand
> the logic here...)

On this side (pvcalls_front_event_handler):
    write content
    smp_wmb
    write id

On the other side (all command functions, for example pvcalls_front_socket)
    read id (passed to wait_event)
    smp_rmb() <--- this is missing and should pair with the above smp_wmb
    read content
    smp_mb() (*)
    clear id
 
(*) this is unnecessary and has no pairing, it was supposed to protect
against the content being overwritten before the read is complete,
because after clearing the id the slot could be reused. get_request
returns an id, but the content is not overwritten immediately. The id is
passed to the backend, and only upon receiving an answer the content
gets overwritten. I think it is unnecessary and I could remove it or
turn it into a compiler barrier.


> > +			WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
> 
> Could this be rewritten as
> 
>    WRITE_ONCE(bedata->rsp[req_id].req_id, req_id);
 
Yes, I'll change it

 
> > +		}
> >  
> >  		done = 1;
> >  		bedata->ring.rsp_cons++;
Stefano Stabellini Oct. 6, 2017, 11:39 p.m. UTC | #4
On Fri, 22 Sep 2017, Boris Ostrovsky wrote:
> > +static unsigned int pvcalls_front_poll_passive(struct file *file,
> > +					       struct pvcalls_bedata *bedata,
> > +					       struct sock_mapping *map,
> > +					       poll_table *wait)
> > +{
> > +	int notify, req_id, ret;
> > +	struct xen_pvcalls_request *req;
> > +
> > +	if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
> > +		     (void *)&map->passive.flags)) {
> > +		uint32_t req_id = READ_ONCE(map->passive.inflight_req_id);
> > +
> > +		if (req_id != PVCALLS_INVALID_ID &&
> > +		    READ_ONCE(bedata->rsp[req_id].req_id) == req_id)
> > +			return POLLIN | POLLRDNORM;
> 
> 
> Do we need to clear PVCALLS_FLAG_ACCEPT_INFLIGHT? Or do we expect a
> (subsequent?) accept() to do that?

No need to clear, here we only need to say whether there is data to read
or not. Subsequent accept calls will clear PVCALLS_FLAG_ACCEPT_INFLIGHT.

 
> > +
> > +		poll_wait(file, &map->passive.inflight_accept_req, wait);
> > +		return 0;
> > +	}
> > +
>
diff mbox

Patch

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 01a5a69..8a90213 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -85,6 +85,8 @@  struct sock_mapping {
 		 * Only one poll operation can be inflight for a given socket.
 		 */
 #define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
+#define PVCALLS_FLAG_POLL_INFLIGHT   1
+#define PVCALLS_FLAG_POLL_RET        2
 			uint8_t flags;
 			uint32_t inflight_req_id;
 			struct sock_mapping *accept_map;
@@ -155,15 +157,32 @@  static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 		rsp = RING_GET_RESPONSE(&bedata->ring, bedata->ring.rsp_cons);
 
 		req_id = rsp->req_id;
-		dst = (uint8_t *)&bedata->rsp[req_id] + sizeof(rsp->req_id);
-		src = (uint8_t *)rsp + sizeof(rsp->req_id);
-		memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
-		/*
-		 * First copy the rest of the data, then req_id. It is
-		 * paired with the barrier when accessing bedata->rsp.
-		 */
-		smp_wmb();
-		WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
+		if (rsp->cmd == PVCALLS_POLL) {
+			struct sock_mapping *map = (struct sock_mapping *)
+						   rsp->u.poll.id;
+
+			set_bit(PVCALLS_FLAG_POLL_RET,
+				(void *)&map->passive.flags);
+			/*
+			 * Set RET, then clear INFLIGHT. It pairs with
+			 * the checks at the beginning of
+			 * pvcalls_front_poll_passive.
+			 */
+			smp_wmb();
+			clear_bit(PVCALLS_FLAG_POLL_INFLIGHT,
+				  (void *)&map->passive.flags);
+		} else {
+			dst = (uint8_t *)&bedata->rsp[req_id] +
+			      sizeof(rsp->req_id);
+			src = (uint8_t *)rsp + sizeof(rsp->req_id);
+			memcpy(dst, src, sizeof(*rsp) - sizeof(rsp->req_id));
+			/*
+			 * First copy the rest of the data, then req_id. It is
+			 * paired with the barrier when accessing bedata->rsp.
+			 */
+			smp_wmb();
+			WRITE_ONCE(bedata->rsp[req_id].req_id, rsp->req_id);
+		}
 
 		done = 1;
 		bedata->ring.rsp_cons++;
@@ -834,6 +853,113 @@  int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
 	return ret;
 }
 
+static unsigned int pvcalls_front_poll_passive(struct file *file,
+					       struct pvcalls_bedata *bedata,
+					       struct sock_mapping *map,
+					       poll_table *wait)
+{
+	int notify, req_id, ret;
+	struct xen_pvcalls_request *req;
+
+	if (test_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
+		     (void *)&map->passive.flags)) {
+		uint32_t req_id = READ_ONCE(map->passive.inflight_req_id);
+
+		if (req_id != PVCALLS_INVALID_ID &&
+		    READ_ONCE(bedata->rsp[req_id].req_id) == req_id)
+			return POLLIN | POLLRDNORM;
+
+		poll_wait(file, &map->passive.inflight_accept_req, wait);
+		return 0;
+	}
+
+	if (test_and_clear_bit(PVCALLS_FLAG_POLL_RET,
+			       (void *)&map->passive.flags))
+		return POLLIN | POLLRDNORM;
+
+	/*
+	 * First check RET, then INFLIGHT. No barriers necessary to
+	 * ensure execution ordering because of the conditional
+	 * instructions creating control dependencies.
+	 */
+
+	if (test_and_set_bit(PVCALLS_FLAG_POLL_INFLIGHT,
+			     (void *)&map->passive.flags)) {
+		poll_wait(file, &bedata->inflight_req, wait);
+		return 0;
+	}
+
+	spin_lock(&bedata->socket_lock);
+	ret = get_request(bedata, &req_id);
+	if (ret < 0) {
+		spin_unlock(&bedata->socket_lock);
+		return ret;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	req->cmd = PVCALLS_POLL;
+	req->u.poll.id = (uint64_t) map;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->socket_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	poll_wait(file, &bedata->inflight_req, wait);
+	return 0;
+}
+
+static unsigned int pvcalls_front_poll_active(struct file *file,
+					      struct pvcalls_bedata *bedata,
+					      struct sock_mapping *map,
+					      poll_table *wait)
+{
+	unsigned int mask = 0;
+	int32_t in_error, out_error;
+	struct pvcalls_data_intf *intf = map->active.ring;
+
+	out_error = intf->out_error;
+	in_error = intf->in_error;
+
+	poll_wait(file, &map->active.inflight_conn_req, wait);
+	if (pvcalls_front_write_todo(map))
+		mask |= POLLOUT | POLLWRNORM;
+	if (pvcalls_front_read_todo(map))
+		mask |= POLLIN | POLLRDNORM;
+	if (in_error != 0 || out_error != 0)
+		mask |= POLLERR;
+
+	return mask;
+}
+
+unsigned int pvcalls_front_poll(struct file *file, struct socket *sock,
+			       poll_table *wait)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map;
+	int ret;
+
+	pvcalls_enter;
+	if (!pvcalls_front_dev) {
+		pvcalls_exit;
+		return POLLNVAL;
+	}
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) sock->sk->sk_send_head;
+	if (!map) {
+		pvcalls_exit;
+		return POLLNVAL;
+	}
+	if (map->active_socket)
+		ret = pvcalls_front_poll_active(file, bedata, map, wait);
+	else
+		ret = pvcalls_front_poll_passive(file, bedata, map, wait);
+	pvcalls_exit;
+	return ret;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index de24041..25e05b8 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -20,5 +20,8 @@  int pvcalls_front_recvmsg(struct socket *sock,
 			  struct msghdr *msg,
 			  size_t len,
 			  int flags);
+unsigned int pvcalls_front_poll(struct file *file,
+				struct socket *sock,
+				poll_table *wait);
 
 #endif