diff mbox

[v3,16/18] xen/pvcalls: implement read

Message ID 1496431915-20774-16-git-send-email-sstabellini@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Stefano Stabellini June 2, 2017, 7:31 p.m. UTC
When an active socket has data available, increment the io and read
counters, and schedule the ioworker.

Implement the read function by reading from the socket, writing the data
to the data ring.

Set in_error on error.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-back.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

Comments

Jürgen Groß June 13, 2017, 7:56 a.m. UTC | #1
On 02/06/17 21:31, Stefano Stabellini wrote:
> When an active socket has data available, increment the io and read
> counters, and schedule the ioworker.
> 
> Implement the read function by reading from the socket, writing the data
> to the data ring.
> 
> Set in_error on error.
> 
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>  drivers/xen/pvcalls-back.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 85 insertions(+)
> 
> diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
> index 0283d49..e7d2b85 100644
> --- a/drivers/xen/pvcalls-back.c
> +++ b/drivers/xen/pvcalls-back.c
> @@ -101,6 +101,81 @@ static int pvcalls_back_release_active(struct xenbus_device *dev,
>  
>  static void pvcalls_conn_back_read(unsigned long opaque)
>  {
> +	struct sock_mapping *map = (struct sock_mapping *)opaque;
> +	struct msghdr msg;
> +	struct kvec vec[2];
> +	RING_IDX cons, prod, size, wanted, array_size, masked_prod, masked_cons;
> +	int32_t error;
> +	struct pvcalls_data_intf *intf = map->ring;
> +	struct pvcalls_data *data = &map->data;
> +	unsigned long flags;
> +	int ret;
> +
> +	array_size = XEN_FLEX_RING_SIZE(map->ring_order);
> +	cons = intf->in_cons;
> +	prod = intf->in_prod;
> +	error = intf->in_error;
> +	/* read the indexes first, then deal with the data */
> +	virt_mb();
> +
> +	if (error)
> +		return;
> +
> +	size = pvcalls_queued(prod, cons, array_size);
> +	if (size >= array_size)
> +		return;
> +	spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
> +	if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) {
> +		atomic_set(&map->read, 0);
> +		spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock,
> +				flags);
> +		return;
> +	}
> +	spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
> +	wanted = array_size - size;
> +	masked_prod = pvcalls_mask(prod, array_size);
> +	masked_cons = pvcalls_mask(cons, array_size);
> +
> +	memset(&msg, 0, sizeof(msg));
> +	msg.msg_iter.type = ITER_KVEC|WRITE;
> +	msg.msg_iter.count = wanted;
> +	if (masked_prod < masked_cons) {
> +		vec[0].iov_base = data->in + masked_prod;
> +		vec[0].iov_len = wanted;
> +		msg.msg_iter.kvec = vec;
> +		msg.msg_iter.nr_segs = 1;
> +	} else {
> +		vec[0].iov_base = data->in + masked_prod;
> +		vec[0].iov_len = array_size - masked_prod;
> +		vec[1].iov_base = data->in;
> +		vec[1].iov_len = wanted - vec[0].iov_len;
> +		msg.msg_iter.kvec = vec;
> +		msg.msg_iter.nr_segs = 2;
> +	}
> +
> +	atomic_set(&map->read, 0);
> +	ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT);
> +	WARN_ON(ret > 0 && ret > wanted);

wanted is always > 0, so you can omit the ret > 0 test.


Juergen

> +	if (ret == -EAGAIN) /* shouldn't happen */
> +		return;
> +	if (!ret)
> +		ret = -ENOTCONN;
> +	spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
> +	if (ret > 0 && !skb_queue_empty(&map->sock->sk->sk_receive_queue))
> +		atomic_inc(&map->read);
> +	spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
> +
> +	/* write the data, then modify the indexes */
> +	virt_wmb();
> +	if (ret < 0)
> +		intf->in_error = ret;
> +	else
> +		intf->in_prod = prod + ret;
> +	/* update the indexes, then notify the other end */
> +	virt_wmb();
> +	notify_remote_via_irq(map->irq);
> +
> +	return;
>  }
>  
>  static int pvcalls_conn_back_write(struct sock_mapping *map)
> @@ -173,6 +248,16 @@ static void pvcalls_sk_state_change(struct sock *sock)
>  
>  static void pvcalls_sk_data_ready(struct sock *sock)
>  {
> +	struct sock_mapping *map = sock->sk_user_data;
> +	struct pvcalls_ioworker *iow;
> +
> +	if (map == NULL)
> +		return;
> +
> +	iow = &map->ioworker;
> +	atomic_inc(&map->read);
> +	atomic_inc(&map->io);
> +	queue_work_on(iow->cpu, iow->wq, &iow->register_work);
>  }
>  
>  static struct sock_mapping *pvcalls_new_active_socket(
>
Stefano Stabellini June 14, 2017, 12:57 a.m. UTC | #2
On Tue, 13 Jun 2017, Juergen Gross wrote:
> On 02/06/17 21:31, Stefano Stabellini wrote:
> > When an active socket has data available, increment the io and read
> > counters, and schedule the ioworker.
> > 
> > Implement the read function by reading from the socket, writing the data
> > to the data ring.
> > 
> > Set in_error on error.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >  drivers/xen/pvcalls-back.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 85 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
> > index 0283d49..e7d2b85 100644
> > --- a/drivers/xen/pvcalls-back.c
> > +++ b/drivers/xen/pvcalls-back.c
> > @@ -101,6 +101,81 @@ static int pvcalls_back_release_active(struct xenbus_device *dev,
> >  
> >  static void pvcalls_conn_back_read(unsigned long opaque)
> >  {
> > +	struct sock_mapping *map = (struct sock_mapping *)opaque;
> > +	struct msghdr msg;
> > +	struct kvec vec[2];
> > +	RING_IDX cons, prod, size, wanted, array_size, masked_prod, masked_cons;
> > +	int32_t error;
> > +	struct pvcalls_data_intf *intf = map->ring;
> > +	struct pvcalls_data *data = &map->data;
> > +	unsigned long flags;
> > +	int ret;
> > +
> > +	array_size = XEN_FLEX_RING_SIZE(map->ring_order);
> > +	cons = intf->in_cons;
> > +	prod = intf->in_prod;
> > +	error = intf->in_error;
> > +	/* read the indexes first, then deal with the data */
> > +	virt_mb();
> > +
> > +	if (error)
> > +		return;
> > +
> > +	size = pvcalls_queued(prod, cons, array_size);
> > +	if (size >= array_size)
> > +		return;
> > +	spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
> > +	if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) {
> > +		atomic_set(&map->read, 0);
> > +		spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock,
> > +				flags);
> > +		return;
> > +	}
> > +	spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
> > +	wanted = array_size - size;
> > +	masked_prod = pvcalls_mask(prod, array_size);
> > +	masked_cons = pvcalls_mask(cons, array_size);
> > +
> > +	memset(&msg, 0, sizeof(msg));
> > +	msg.msg_iter.type = ITER_KVEC|WRITE;
> > +	msg.msg_iter.count = wanted;
> > +	if (masked_prod < masked_cons) {
> > +		vec[0].iov_base = data->in + masked_prod;
> > +		vec[0].iov_len = wanted;
> > +		msg.msg_iter.kvec = vec;
> > +		msg.msg_iter.nr_segs = 1;
> > +	} else {
> > +		vec[0].iov_base = data->in + masked_prod;
> > +		vec[0].iov_len = array_size - masked_prod;
> > +		vec[1].iov_base = data->in;
> > +		vec[1].iov_len = wanted - vec[0].iov_len;
> > +		msg.msg_iter.kvec = vec;
> > +		msg.msg_iter.nr_segs = 2;
> > +	}
> > +
> > +	atomic_set(&map->read, 0);
> > +	ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT);
> > +	WARN_ON(ret > 0 && ret > wanted);
> 
> wanted is always > 0, so you can omit the ret > 0 test.

Good point


> 
> > +	if (ret == -EAGAIN) /* shouldn't happen */
> > +		return;
> > +	if (!ret)
> > +		ret = -ENOTCONN;
> > +	spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
> > +	if (ret > 0 && !skb_queue_empty(&map->sock->sk->sk_receive_queue))
> > +		atomic_inc(&map->read);
> > +	spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
> > +
> > +	/* write the data, then modify the indexes */
> > +	virt_wmb();
> > +	if (ret < 0)
> > +		intf->in_error = ret;
> > +	else
> > +		intf->in_prod = prod + ret;
> > +	/* update the indexes, then notify the other end */
> > +	virt_wmb();
> > +	notify_remote_via_irq(map->irq);
> > +
> > +	return;
> >  }
> >  
> >  static int pvcalls_conn_back_write(struct sock_mapping *map)
> > @@ -173,6 +248,16 @@ static void pvcalls_sk_state_change(struct sock *sock)
> >  
> >  static void pvcalls_sk_data_ready(struct sock *sock)
> >  {
> > +	struct sock_mapping *map = sock->sk_user_data;
> > +	struct pvcalls_ioworker *iow;
> > +
> > +	if (map == NULL)
> > +		return;
> > +
> > +	iow = &map->ioworker;
> > +	atomic_inc(&map->read);
> > +	atomic_inc(&map->io);
> > +	queue_work_on(iow->cpu, iow->wq, &iow->register_work);
> >  }
> >  
> >  static struct sock_mapping *pvcalls_new_active_socket(
> > 
>
diff mbox

Patch

diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 0283d49..e7d2b85 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -101,6 +101,81 @@  static int pvcalls_back_release_active(struct xenbus_device *dev,
 
 static void pvcalls_conn_back_read(unsigned long opaque)
 {
+	struct sock_mapping *map = (struct sock_mapping *)opaque;
+	struct msghdr msg;
+	struct kvec vec[2];
+	RING_IDX cons, prod, size, wanted, array_size, masked_prod, masked_cons;
+	int32_t error;
+	struct pvcalls_data_intf *intf = map->ring;
+	struct pvcalls_data *data = &map->data;
+	unsigned long flags;
+	int ret;
+
+	array_size = XEN_FLEX_RING_SIZE(map->ring_order);
+	cons = intf->in_cons;
+	prod = intf->in_prod;
+	error = intf->in_error;
+	/* read the indexes first, then deal with the data */
+	virt_mb();
+
+	if (error)
+		return;
+
+	size = pvcalls_queued(prod, cons, array_size);
+	if (size >= array_size)
+		return;
+	spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
+	if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) {
+		atomic_set(&map->read, 0);
+		spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock,
+				flags);
+		return;
+	}
+	spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
+	wanted = array_size - size;
+	masked_prod = pvcalls_mask(prod, array_size);
+	masked_cons = pvcalls_mask(cons, array_size);
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_iter.type = ITER_KVEC|WRITE;
+	msg.msg_iter.count = wanted;
+	if (masked_prod < masked_cons) {
+		vec[0].iov_base = data->in + masked_prod;
+		vec[0].iov_len = wanted;
+		msg.msg_iter.kvec = vec;
+		msg.msg_iter.nr_segs = 1;
+	} else {
+		vec[0].iov_base = data->in + masked_prod;
+		vec[0].iov_len = array_size - masked_prod;
+		vec[1].iov_base = data->in;
+		vec[1].iov_len = wanted - vec[0].iov_len;
+		msg.msg_iter.kvec = vec;
+		msg.msg_iter.nr_segs = 2;
+	}
+
+	atomic_set(&map->read, 0);
+	ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT);
+	WARN_ON(ret > 0 && ret > wanted);
+	if (ret == -EAGAIN) /* shouldn't happen */
+		return;
+	if (!ret)
+		ret = -ENOTCONN;
+	spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
+	if (ret > 0 && !skb_queue_empty(&map->sock->sk->sk_receive_queue))
+		atomic_inc(&map->read);
+	spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
+
+	/* write the data, then modify the indexes */
+	virt_wmb();
+	if (ret < 0)
+		intf->in_error = ret;
+	else
+		intf->in_prod = prod + ret;
+	/* update the indexes, then notify the other end */
+	virt_wmb();
+	notify_remote_via_irq(map->irq);
+
+	return;
 }
 
 static int pvcalls_conn_back_write(struct sock_mapping *map)
@@ -173,6 +248,16 @@  static void pvcalls_sk_state_change(struct sock *sock)
 
 static void pvcalls_sk_data_ready(struct sock *sock)
 {
+	struct sock_mapping *map = sock->sk_user_data;
+	struct pvcalls_ioworker *iow;
+
+	if (map == NULL)
+		return;
+
+	iow = &map->ioworker;
+	atomic_inc(&map->read);
+	atomic_inc(&map->io);
+	queue_work_on(iow->cpu, iow->wq, &iow->register_work);
 }
 
 static struct sock_mapping *pvcalls_new_active_socket(