diff mbox

[v5,10/13] xen/pvcalls: implement recvmsg

Message ID 1507336227-20477-10-git-send-email-sstabellini@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Stefano Stabellini Oct. 7, 2017, 12:30 a.m. UTC
Implement recvmsg by copying data from the "in" ring. If not enough data
is available and the recvmsg call is blocking, then wait on the
inflight_conn_req waitqueue. Take the active socket in_mutex so that
only one function can access the ring at any given time.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 108 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |   4 ++
 2 files changed, 112 insertions(+)

Comments

Boris Ostrovsky Oct. 17, 2017, 9:35 p.m. UTC | #1
> +
> +int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> +		     int flags)
> +{
> +	struct pvcalls_bedata *bedata;
> +	int ret;
> +	struct sock_mapping *map;
> +
> +	if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
> +		return -EOPNOTSUPP;
> +
> +	pvcalls_enter();
> +	if (!pvcalls_front_dev) {
> +		pvcalls_exit();
> +		return -ENOTCONN;
> +	}
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = (struct sock_mapping *) sock->sk->sk_send_head;
> +	if (!map) {
> +		pvcalls_exit();
> +		return -ENOTSOCK;
> +	}
> +
> +	mutex_lock(&map->active.in_mutex);
> +	if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER))
> +		len = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
> +
> +	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
> +		wait_event_interruptible(map->active.inflight_conn_req,
> +					 pvcalls_front_read_todo(map));
> +	}
> +	ret = __read_ring(map->active.ring, &map->active.data,
> +			  &msg->msg_iter, len, flags);
> +
> +	if (ret > 0)
> +		notify_remote_via_irq(map->active.irq);
> +	if (ret == 0)
> +		ret = -EAGAIN;

Why not 0? The manpage says:

       EAGAIN or EWOULDBLOCK
              The  socket  is  marked nonblocking and the receive
operation would block, or a receive timeout
              had been set and the timeout expired before data was
received.  POSIX.1 allows either error  to
              be  returned  for  this case, and does not require these
constants to have the same value, so a
              portable application should check for both possibilities.


I don't think either of these conditions is true here.

(Again, should have noticed this earlier, sorry)

-boris


> +	if (ret == -ENOTCONN)
> +		ret = 0;
> +
> +	mutex_unlock(&map->active.in_mutex);
> +	pvcalls_exit();
> +	return ret;
> +}
Stefano Stabellini Oct. 20, 2017, 1:38 a.m. UTC | #2
On Tue, 17 Oct 2017, Boris Ostrovsky wrote:
> > +
> > +int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
> > +		     int flags)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	int ret;
> > +	struct sock_mapping *map;
> > +
> > +	if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
> > +		return -EOPNOTSUPP;
> > +
> > +	pvcalls_enter();
> > +	if (!pvcalls_front_dev) {
> > +		pvcalls_exit();
> > +		return -ENOTCONN;
> > +	}
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = (struct sock_mapping *) sock->sk->sk_send_head;
> > +	if (!map) {
> > +		pvcalls_exit();
> > +		return -ENOTSOCK;
> > +	}
> > +
> > +	mutex_lock(&map->active.in_mutex);
> > +	if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER))
> > +		len = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
> > +
> > +	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
> > +		wait_event_interruptible(map->active.inflight_conn_req,
> > +					 pvcalls_front_read_todo(map));
> > +	}
> > +	ret = __read_ring(map->active.ring, &map->active.data,
> > +			  &msg->msg_iter, len, flags);
> > +
> > +	if (ret > 0)
> > +		notify_remote_via_irq(map->active.irq);
> > +	if (ret == 0)
> > +		ret = -EAGAIN;
> 
> Why not 0? The manpage says:
> 
>        EAGAIN or EWOULDBLOCK
>               The  socket  is  marked nonblocking and the receive
> operation would block, or a receive timeout
>               had been set and the timeout expired before data was
> received.  POSIX.1 allows either error  to
>               be  returned  for  this case, and does not require these
> constants to have the same value, so a
>               portable application should check for both possibilities.
> 
> 
> I don't think either of these conditions is true here.
> 
> (Again, should have noticed this earlier, sorry)

In case the socket is MSG_DONTWAIT, then we should return -EAGAIN here.
However, it is true that if the socket is not MSG_DONTWAIT, then
returning 0 would make more sense.

So I'll do:

if (ret == 0)
    ret = (flags & MSG_DONTWAIT) ? -EAGAIN : 0;
Boris Ostrovsky Oct. 20, 2017, 2:43 p.m. UTC | #3
On 10/19/2017 09:38 PM, Stefano Stabellini wrote:
> On Tue, 17 Oct 2017, Boris Ostrovsky wrote:
>>> +
>>> +int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
>>> +		     int flags)
>>> +{
>>> +	struct pvcalls_bedata *bedata;
>>> +	int ret;
>>> +	struct sock_mapping *map;
>>> +
>>> +	if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
>>> +		return -EOPNOTSUPP;
>>> +
>>> +	pvcalls_enter();
>>> +	if (!pvcalls_front_dev) {
>>> +		pvcalls_exit();
>>> +		return -ENOTCONN;
>>> +	}
>>> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
>>> +
>>> +	map = (struct sock_mapping *) sock->sk->sk_send_head;
>>> +	if (!map) {
>>> +		pvcalls_exit();
>>> +		return -ENOTSOCK;
>>> +	}
>>> +
>>> +	mutex_lock(&map->active.in_mutex);
>>> +	if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER))
>>> +		len = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
>>> +
>>> +	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
>>> +		wait_event_interruptible(map->active.inflight_conn_req,
>>> +					 pvcalls_front_read_todo(map));
>>> +	}
>>> +	ret = __read_ring(map->active.ring, &map->active.data,
>>> +			  &msg->msg_iter, len, flags);
>>> +
>>> +	if (ret > 0)
>>> +		notify_remote_via_irq(map->active.irq);
>>> +	if (ret == 0)
>>> +		ret = -EAGAIN;
>> Why not 0? The manpage says:
>>
>>        EAGAIN or EWOULDBLOCK
>>               The  socket  is  marked nonblocking and the receive
>> operation would block, or a receive timeout
>>               had been set and the timeout expired before data was
>> received.  POSIX.1 allows either error  to
>>               be  returned  for  this case, and does not require these
>> constants to have the same value, so a
>>               portable application should check for both possibilities.
>>
>>
>> I don't think either of these conditions is true here.
>>
>> (Again, should have noticed this earlier, sorry)
> In case the socket is MSG_DONTWAIT, then we should return -EAGAIN here.
> However, it is true that if the socket is not MSG_DONTWAIT, then
> returning 0 would make more sense.
>
> So I'll do:
>
> if (ret == 0)
>     ret = (flags & MSG_DONTWAIT) ? -EAGAIN : 0;

Sure. With that

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
diff mbox

Patch

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index c13c40a..161f88b 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -118,6 +118,20 @@  static bool pvcalls_front_write_todo(struct sock_mapping *map)
 	return !!(size - pvcalls_queued(prod, cons, size));
 }
 
+static bool pvcalls_front_read_todo(struct sock_mapping *map)
+{
+	struct pvcalls_data_intf *intf = map->active.ring;
+	RING_IDX cons, prod;
+	int32_t error;
+
+	cons = intf->in_cons;
+	prod = intf->in_prod;
+	error = intf->in_error;
+	return (error != 0 ||
+		pvcalls_queued(prod, cons,
+			       XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER)) != 0);
+}
+
 static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id)
 {
 	struct xenbus_device *dev = dev_id;
@@ -482,6 +496,100 @@  int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg,
 	return tot_sent;
 }
 
+static int __read_ring(struct pvcalls_data_intf *intf,
+		       struct pvcalls_data *data,
+		       struct iov_iter *msg_iter,
+		       size_t len, int flags)
+{
+	RING_IDX cons, prod, size, masked_prod, masked_cons;
+	RING_IDX array_size = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
+	int32_t error;
+
+	cons = intf->in_cons;
+	prod = intf->in_prod;
+	error = intf->in_error;
+	/* get pointers before reading from the ring */
+	virt_rmb();
+	if (error < 0)
+		return error;
+
+	size = pvcalls_queued(prod, cons, array_size);
+	masked_prod = pvcalls_mask(prod, array_size);
+	masked_cons = pvcalls_mask(cons, array_size);
+
+	if (size == 0)
+		return 0;
+
+	if (len > size)
+		len = size;
+
+	if (masked_prod > masked_cons) {
+		copy_to_iter(data->in + masked_cons, len, msg_iter);
+	} else {
+		if (len > (array_size - masked_cons)) {
+			copy_to_iter(data->in + masked_cons,
+				     array_size - masked_cons, msg_iter);
+			copy_to_iter(data->in,
+				     len - (array_size - masked_cons),
+				     msg_iter);
+		} else {
+			copy_to_iter(data->in + masked_cons, len, msg_iter);
+		}
+	}
+	/* read data from the ring before increasing the index */
+	virt_mb();
+	if (!(flags & MSG_PEEK))
+		intf->in_cons += len;
+
+	return len;
+}
+
+int pvcalls_front_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		     int flags)
+{
+	struct pvcalls_bedata *bedata;
+	int ret;
+	struct sock_mapping *map;
+
+	if (flags & (MSG_CMSG_CLOEXEC|MSG_ERRQUEUE|MSG_OOB|MSG_TRUNC))
+		return -EOPNOTSUPP;
+
+	pvcalls_enter();
+	if (!pvcalls_front_dev) {
+		pvcalls_exit();
+		return -ENOTCONN;
+	}
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = (struct sock_mapping *) sock->sk->sk_send_head;
+	if (!map) {
+		pvcalls_exit();
+		return -ENOTSOCK;
+	}
+
+	mutex_lock(&map->active.in_mutex);
+	if (len > XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER))
+		len = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
+
+	while (!(flags & MSG_DONTWAIT) && !pvcalls_front_read_todo(map)) {
+		wait_event_interruptible(map->active.inflight_conn_req,
+					 pvcalls_front_read_todo(map));
+	}
+	ret = __read_ring(map->active.ring, &map->active.data,
+			  &msg->msg_iter, len, flags);
+
+	if (ret > 0)
+		notify_remote_via_irq(map->active.irq);
+	if (ret == 0)
+		ret = -EAGAIN;
+	if (ret == -ENOTCONN)
+		ret = 0;
+
+	mutex_unlock(&map->active.in_mutex);
+	pvcalls_exit();
+	return ret;
+}
+
 int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
 	struct pvcalls_bedata *bedata;
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index d937c24..de24041 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -16,5 +16,9 @@  int pvcalls_front_accept(struct socket *sock,
 int pvcalls_front_sendmsg(struct socket *sock,
 			  struct msghdr *msg,
 			  size_t len);
+int pvcalls_front_recvmsg(struct socket *sock,
+			  struct msghdr *msg,
+			  size_t len,
+			  int flags);
 
 #endif