diff mbox

[v2,05/13] xen/pvcalls: implement bind command

Message ID 1501017730-12797-5-git-send-email-sstabellini@kernel.org (mailing list archive)
State New, archived
Headers show

Commit Message

Stefano Stabellini July 25, 2017, 9:22 p.m. UTC
Send PVCALLS_BIND to the backend. Introduce a new structure, part of
struct sock_mapping, to store information specific to passive sockets.

Introduce a status field to keep track of the status of the passive
socket.

Introduce a waitqueue for the "accept" command (see the accept command
implementation): it is used to allow only one outstanding accept
command at any given time and to implement polling on the passive
socket. Introduce a flags field to keep track of in-flight accept and
poll commands.

sock->sk->sk_send_head is not used for ip sockets: reuse the field to
store a pointer to the struct sock_mapping corresponding to the socket.

Convert the struct socket pointer into an uint64_t and use it as id for
the socket to pass to the backend.

Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
---
 drivers/xen/pvcalls-front.c | 73 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/pvcalls-front.h |  3 ++
 2 files changed, 76 insertions(+)

Comments

Boris Ostrovsky July 26, 2017, 2:56 p.m. UTC | #1
On 7/25/2017 5:22 PM, Stefano Stabellini wrote:
> Send PVCALLS_BIND to the backend. Introduce a new structure, part of
> struct sock_mapping, to store information specific to passive sockets.
>
> Introduce a status field to keep track of the status of the passive
> socket.
>
> Introduce a waitqueue for the "accept" command (see the accept command
> implementation): it is used to allow only one outstanding accept
> command at any given time and to implement polling on the passive
> socket. Introduce a flags field to keep track of in-flight accept and
> poll commands.
>
> sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> store a pointer to the struct sock_mapping corresponding to the socket.
>
> Convert the struct socket pointer into an uint64_t and use it as id for
> the socket to pass to the backend.
>
> Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> CC: boris.ostrovsky@oracle.com
> CC: jgross@suse.com
> ---
>   drivers/xen/pvcalls-front.c | 73 +++++++++++++++++++++++++++++++++++++++++++++
>   drivers/xen/pvcalls-front.h |  3 ++
>   2 files changed, 76 insertions(+)
>
> diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> index d0f5f42..af2ce20 100644
> --- a/drivers/xen/pvcalls-front.c
> +++ b/drivers/xen/pvcalls-front.c
> @@ -59,6 +59,23 @@ struct sock_mapping {
>   
>   			wait_queue_head_t inflight_conn_req;
>   		} active;
> +		struct {
> +		/* Socket status */
> +#define PVCALLS_STATUS_UNINITALIZED  0
> +#define PVCALLS_STATUS_BIND          1
> +#define PVCALLS_STATUS_LISTEN        2
> +			uint8_t status;
> +		/*
> +		 * Internal state-machine flags.
> +		 * Only one accept operation can be inflight for a socket.
> +		 * Only one poll operation can be inflight for a given socket.
> +		 */
> +#define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
> +#define PVCALLS_FLAG_POLL_INFLIGHT   1
> +#define PVCALLS_FLAG_POLL_RET        2
> +			uint8_t flags;
> +			wait_queue_head_t inflight_accept_req;
> +		} passive;
>   	};
>   };
>   
> @@ -292,6 +309,62 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
>   	return ret;
>   }
>   
> +int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
> +{
> +	struct pvcalls_bedata *bedata;
> +	struct sock_mapping *map = NULL;
> +	struct xen_pvcalls_request *req;
> +	int notify, req_id, ret;
> +
> +	if (!pvcalls_front_dev)
> +		return -ENOTCONN;
> +	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
> +		return -ENOTSUPP;
> +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> +
> +	map = kzalloc(sizeof(*map), GFP_KERNEL);
> +	if (map == NULL)
> +		return -ENOMEM;
> +
> +	spin_lock(&bedata->pvcallss_lock);
> +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> +	if (RING_FULL(&bedata->ring) ||
> +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> +		kfree(map);
> +		spin_unlock(&bedata->pvcallss_lock);
> +		return -EAGAIN;
> +	}
> +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> +	req->req_id = req_id;
> +	map->sock = sock;
> +	req->cmd = PVCALLS_BIND;
> +	req->u.bind.id = (uint64_t) sock;
> +	memcpy(req->u.bind.addr, addr, sizeof(*addr));
> +	req->u.bind.len = addr_len;
> +
> +	init_waitqueue_head(&map->passive.inflight_accept_req);
> +
> +	list_add_tail(&map->list, &bedata->socketpass_mappings);
> +	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
> +	map->active_socket = false;
> +
> +	bedata->ring.req_prod_pvt++;
> +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> +	spin_unlock(&bedata->pvcallss_lock);
> +	if (notify)
> +		notify_remote_via_irq(bedata->irq);
> +
> +	wait_event(bedata->inflight_req,
> +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);

This all looks very similar to previous patches. Can it be factored out?

Also, you've used wait_event_interruptible in socket() implementation. 
Why not here (and connect())?

-boris

> +
> +	map->passive.status = PVCALLS_STATUS_BIND;
> +	ret = bedata->rsp[req_id].ret;
> +	/* read ret, then set this rsp slot to be reused */
> +	smp_mb();
> +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> +	return 0;
> +}
> +
>   static const struct xenbus_device_id pvcalls_front_ids[] = {
>   	{ "pvcalls" },
>   	{ "" }
> diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> index 63b0417..8b0a274 100644
> --- a/drivers/xen/pvcalls-front.h
> +++ b/drivers/xen/pvcalls-front.h
> @@ -6,5 +6,8 @@
>   int pvcalls_front_socket(struct socket *sock);
>   int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
>   			  int addr_len, int flags);
> +int pvcalls_front_bind(struct socket *sock,
> +		       struct sockaddr *addr,
> +		       int addr_len);
>   
>   #endif
Stefano Stabellini July 26, 2017, 11:59 p.m. UTC | #2
On Wed, 26 Jul 2017, Boris Ostrovsky wrote:
> On 7/25/2017 5:22 PM, Stefano Stabellini wrote:
> > Send PVCALLS_BIND to the backend. Introduce a new structure, part of
> > struct sock_mapping, to store information specific to passive sockets.
> > 
> > Introduce a status field to keep track of the status of the passive
> > socket.
> > 
> > Introduce a waitqueue for the "accept" command (see the accept command
> > implementation): it is used to allow only one outstanding accept
> > command at any given time and to implement polling on the passive
> > socket. Introduce a flags field to keep track of in-flight accept and
> > poll commands.
> > 
> > sock->sk->sk_send_head is not used for ip sockets: reuse the field to
> > store a pointer to the struct sock_mapping corresponding to the socket.
> > 
> > Convert the struct socket pointer into an uint64_t and use it as id for
> > the socket to pass to the backend.
> > 
> > Signed-off-by: Stefano Stabellini <stefano@aporeto.com>
> > CC: boris.ostrovsky@oracle.com
> > CC: jgross@suse.com
> > ---
> >   drivers/xen/pvcalls-front.c | 73
> > +++++++++++++++++++++++++++++++++++++++++++++
> >   drivers/xen/pvcalls-front.h |  3 ++
> >   2 files changed, 76 insertions(+)
> > 
> > diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
> > index d0f5f42..af2ce20 100644
> > --- a/drivers/xen/pvcalls-front.c
> > +++ b/drivers/xen/pvcalls-front.c
> > @@ -59,6 +59,23 @@ struct sock_mapping {
> >     			wait_queue_head_t inflight_conn_req;
> >   		} active;
> > +		struct {
> > +		/* Socket status */
> > +#define PVCALLS_STATUS_UNINITALIZED  0
> > +#define PVCALLS_STATUS_BIND          1
> > +#define PVCALLS_STATUS_LISTEN        2
> > +			uint8_t status;
> > +		/*
> > +		 * Internal state-machine flags.
> > +		 * Only one accept operation can be inflight for a socket.
> > +		 * Only one poll operation can be inflight for a given socket.
> > +		 */
> > +#define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
> > +#define PVCALLS_FLAG_POLL_INFLIGHT   1
> > +#define PVCALLS_FLAG_POLL_RET        2
> > +			uint8_t flags;
> > +			wait_queue_head_t inflight_accept_req;
> > +		} passive;
> >   	};
> >   };
> >   @@ -292,6 +309,62 @@ int pvcalls_front_connect(struct socket *sock, struct
> > sockaddr *addr,
> >   	return ret;
> >   }
> >   +int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int
> > addr_len)
> > +{
> > +	struct pvcalls_bedata *bedata;
> > +	struct sock_mapping *map = NULL;
> > +	struct xen_pvcalls_request *req;
> > +	int notify, req_id, ret;
> > +
> > +	if (!pvcalls_front_dev)
> > +		return -ENOTCONN;
> > +	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
> > +		return -ENOTSUPP;
> > +	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
> > +
> > +	map = kzalloc(sizeof(*map), GFP_KERNEL);
> > +	if (map == NULL)
> > +		return -ENOMEM;
> > +
> > +	spin_lock(&bedata->pvcallss_lock);
> > +	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> > +	if (RING_FULL(&bedata->ring) ||
> > +	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
> > +		kfree(map);
> > +		spin_unlock(&bedata->pvcallss_lock);
> > +		return -EAGAIN;
> > +	}
> > +	req = RING_GET_REQUEST(&bedata->ring, req_id);
> > +	req->req_id = req_id;
> > +	map->sock = sock;
> > +	req->cmd = PVCALLS_BIND;
> > +	req->u.bind.id = (uint64_t) sock;
> > +	memcpy(req->u.bind.addr, addr, sizeof(*addr));
> > +	req->u.bind.len = addr_len;
> > +
> > +	init_waitqueue_head(&map->passive.inflight_accept_req);
> > +
> > +	list_add_tail(&map->list, &bedata->socketpass_mappings);
> > +	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
> > +	map->active_socket = false;
> > +
> > +	bedata->ring.req_prod_pvt++;
> > +	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
> > +	spin_unlock(&bedata->pvcallss_lock);
> > +	if (notify)
> > +		notify_remote_via_irq(bedata->irq);
> > +
> > +	wait_event(bedata->inflight_req,
> > +		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
> 
> This all looks very similar to previous patches. Can it be factored out?

You are right that the pattern is the same for all commands:
- get a request
- fill the request
- possibly do something else
- wait
however each request is different, the struct and fields are different.
There are spin_lock and spin_unlock calls intermingled. I am not sure I
can factor out much of this. Maybe I could create a static inline or
macro as a syntactic sugar to replace the wait call, but that's pretty
much it I think.


> Also, you've used wait_event_interruptible in socket() implementation. Why not
> here (and connect())?

My intention was to use wait_event to wait for replies everywhere but I
missed some of them in the conversion (I used to use
wait_event_interruptible in early versions of the code).

The reason to use wait_event is that it makes it easier to handle the
rsp slot in bedata (bedata->rsp[req_id]): in case of EINTR the response
in bedata->rsp would not be cleared by anybody. If we use wait_event
there is no such problem, and the backend could still return EINTR and
we would handle it just fine as any other responses.

I'll make sure to use wait_event to wait for a response (like here), and
wait_event_interruptible elsewhere (like in recvmsg, where we don't risk
leaking a rsp slot).

 
> > +
> > +	map->passive.status = PVCALLS_STATUS_BIND;
> > +	ret = bedata->rsp[req_id].ret;
> > +	/* read ret, then set this rsp slot to be reused */
> > +	smp_mb();
> > +	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
> > +	return 0;
> > +}
> > +
> >   static const struct xenbus_device_id pvcalls_front_ids[] = {
> >   	{ "pvcalls" },
> >   	{ "" }
> > diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
> > index 63b0417..8b0a274 100644
> > --- a/drivers/xen/pvcalls-front.h
> > +++ b/drivers/xen/pvcalls-front.h
> > @@ -6,5 +6,8 @@
> >   int pvcalls_front_socket(struct socket *sock);
> >   int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
> >   			  int addr_len, int flags);
> > +int pvcalls_front_bind(struct socket *sock,
> > +		       struct sockaddr *addr,
> > +		       int addr_len);
> >     #endif
Boris Ostrovsky July 27, 2017, 2:43 p.m. UTC | #3
>> This all looks very similar to previous patches. Can it be factored out?
> You are right that the pattern is the same for all commands:
> - get a request
> - fill the request
> - possibly do something else
> - wait
> however each request is different, the struct and fields are different.
> There are spin_lock and spin_unlock calls intermingled. I am not sure I
> can factor out much of this. Maybe I could create a static inline or
> macro as a syntactic sugar to replace the wait call, but that's pretty
> much it I think.

Maybe you could factor out common fragments, not necessarily the whole
thing at once?

For example,

static inline int get_request(*bedata, int *req_id)
{

	*req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
	if (RING_FULL(&bedata->ring) ||
	    READ_ONCE(bedata->rsp[*req_id].req_id) != PVCALLS_INVALID_ID) {
		return -EAGAIN;
	return 0;
}

(or some such)


>
>
>> Also, you've used wait_event_interruptible in socket() implementation. Why not
>> here (and connect())?
> My intention was to use wait_event to wait for replies everywhere but I
> missed some of them in the conversion (I used to use
> wait_event_interruptible in early versions of the code).
>
> The reason to use wait_event is that it makes it easier to handle the
> rsp slot in bedata (bedata->rsp[req_id]): in case of EINTR the response
> in bedata->rsp would not be cleared by anybody. If we use wait_event
> there is no such problem, and the backend could still return EINTR and
> we would handle it just fine as any other responses.

I was actually wondering about this myself when I was looking at
socket() but then I somehow convinced myself (incorrectly!) that it was OK.

-boris
Stefano Stabellini July 31, 2017, 10:17 p.m. UTC | #4
On Thu, 27 Jul 2017, Boris Ostrovsky wrote:
> >> This all looks very similar to previous patches. Can it be factored out?
> > You are right that the pattern is the same for all commands:
> > - get a request
> > - fill the request
> > - possibly do something else
> > - wait
> > however each request is different, the struct and fields are different.
> > There are spin_lock and spin_unlock calls intermingled. I am not sure I
> > can factor out much of this. Maybe I could create a static inline or
> > macro as a syntactic sugar to replace the wait call, but that's pretty
> > much it I think.
> 
> Maybe you could factor out common fragments, not necessarily the whole
> thing at once?
> 
> For example,
> 
> static inline int get_request(*bedata, int *req_id)
> {
> 
> 	*req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
> 	if (RING_FULL(&bedata->ring) ||
> 	    READ_ONCE(bedata->rsp[*req_id].req_id) != PVCALLS_INVALID_ID) {
> 		return -EAGAIN;
> 	return 0;
> }
> 
> (or some such)

You are right, the code looks better this way. I'll add it.
diff mbox

Patch

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index d0f5f42..af2ce20 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -59,6 +59,23 @@  struct sock_mapping {
 
 			wait_queue_head_t inflight_conn_req;
 		} active;
+		struct {
+		/* Socket status */
+#define PVCALLS_STATUS_UNINITALIZED  0
+#define PVCALLS_STATUS_BIND          1
+#define PVCALLS_STATUS_LISTEN        2
+			uint8_t status;
+		/*
+		 * Internal state-machine flags.
+		 * Only one accept operation can be inflight for a socket.
+		 * Only one poll operation can be inflight for a given socket.
+		 */
+#define PVCALLS_FLAG_ACCEPT_INFLIGHT 0
+#define PVCALLS_FLAG_POLL_INFLIGHT   1
+#define PVCALLS_FLAG_POLL_RET        2
+			uint8_t flags;
+			wait_queue_head_t inflight_accept_req;
+		} passive;
 	};
 };
 
@@ -292,6 +309,62 @@  int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 	return ret;
 }
 
+int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	struct pvcalls_bedata *bedata;
+	struct sock_mapping *map = NULL;
+	struct xen_pvcalls_request *req;
+	int notify, req_id, ret;
+
+	if (!pvcalls_front_dev)
+		return -ENOTCONN;
+	if (addr->sa_family != AF_INET || sock->type != SOCK_STREAM)
+		return -ENOTSUPP;
+	bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (map == NULL)
+		return -ENOMEM;
+
+	spin_lock(&bedata->pvcallss_lock);
+	req_id = bedata->ring.req_prod_pvt & (RING_SIZE(&bedata->ring) - 1);
+	if (RING_FULL(&bedata->ring) ||
+	    READ_ONCE(bedata->rsp[req_id].req_id) != PVCALLS_INVALID_ID) {
+		kfree(map);
+		spin_unlock(&bedata->pvcallss_lock);
+		return -EAGAIN;
+	}
+	req = RING_GET_REQUEST(&bedata->ring, req_id);
+	req->req_id = req_id;
+	map->sock = sock;
+	req->cmd = PVCALLS_BIND;
+	req->u.bind.id = (uint64_t) sock;
+	memcpy(req->u.bind.addr, addr, sizeof(*addr));
+	req->u.bind.len = addr_len;
+
+	init_waitqueue_head(&map->passive.inflight_accept_req);
+
+	list_add_tail(&map->list, &bedata->socketpass_mappings);
+	WRITE_ONCE(sock->sk->sk_send_head, (void *)map);
+	map->active_socket = false;
+
+	bedata->ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&bedata->ring, notify);
+	spin_unlock(&bedata->pvcallss_lock);
+	if (notify)
+		notify_remote_via_irq(bedata->irq);
+
+	wait_event(bedata->inflight_req,
+		   READ_ONCE(bedata->rsp[req_id].req_id) == req_id);
+
+	map->passive.status = PVCALLS_STATUS_BIND;
+	ret = bedata->rsp[req_id].ret;
+	/* read ret, then set this rsp slot to be reused */
+	smp_mb();
+	WRITE_ONCE(bedata->rsp[req_id].req_id, PVCALLS_INVALID_ID);
+	return 0;
+}
+
 static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "pvcalls" },
 	{ "" }
diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h
index 63b0417..8b0a274 100644
--- a/drivers/xen/pvcalls-front.h
+++ b/drivers/xen/pvcalls-front.h
@@ -6,5 +6,8 @@ 
 int pvcalls_front_socket(struct socket *sock);
 int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
 			  int addr_len, int flags);
+int pvcalls_front_bind(struct socket *sock,
+		       struct sockaddr *addr,
+		       int addr_len);
 
 #endif