Message ID | 1499116119-15638-8-git-send-email-sstabellini@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 03/07/17 23:08, Stefano Stabellini wrote: > Allocate a socket. Keep track of socket <-> ring mappings with a new data > structure, called sock_mapping. Implement the connect command by calling > inet_stream_connect, and mapping the new indexes page and data ring. > Allocate a workqueue and a work_struct, called ioworker, to perform > reads and writes to the socket. > > When an active socket is closed (sk_state_change), set in_error to > -ENOTCONN and notify the other end, as specified by the protocol. > > sk_data_ready and pvcalls_back_ioworker will be implemented later. > > Signed-off-by: Stefano Stabellini <stefano@aporeto.com> > CC: boris.ostrovsky@oracle.com > CC: jgross@suse.com > --- > drivers/xen/pvcalls-back.c | 174 +++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 174 insertions(+) > > diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c > index 53fd908..1bc2620 100644 > --- a/drivers/xen/pvcalls-back.c > +++ b/drivers/xen/pvcalls-back.c > @@ -54,6 +54,39 @@ struct pvcalls_fedata { > struct semaphore socket_lock; > }; > > +struct pvcalls_ioworker { > + struct work_struct register_work; > + struct workqueue_struct *wq; > +}; > + > +struct sock_mapping { > + struct list_head list; > + struct pvcalls_fedata *fedata; > + struct socket *sock; > + uint64_t id; > + grant_ref_t ref; > + struct pvcalls_data_intf *ring; > + void *bytes; > + struct pvcalls_data data; > + uint32_t ring_order; > + int irq; > + atomic_t read; > + atomic_t write; > + atomic_t io; > + atomic_t release; > + void (*saved_data_ready)(struct sock *sk); > + struct pvcalls_ioworker ioworker; > +}; > + > +static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map); > +static int pvcalls_back_release_active(struct xenbus_device *dev, > + struct pvcalls_fedata *fedata, > + struct sock_mapping *map); > + > +static void pvcalls_back_ioworker(struct work_struct *work) > +{ > +} > + > static int pvcalls_back_socket(struct xenbus_device *dev, > struct xen_pvcalls_request *req) > { > @@ -82,9 +115,145 @@ static int pvcalls_back_socket(struct xenbus_device *dev, > return 0; > } > > +static void pvcalls_sk_state_change(struct sock *sock) > +{ > + struct sock_mapping *map = sock->sk_user_data; > + struct pvcalls_data_intf *intf; > + > + if (map == NULL) > + return; > + > + intf = map->ring; > + intf->in_error = -ENOTCONN; > + notify_remote_via_irq(map->irq); > +} > + > +static void pvcalls_sk_data_ready(struct sock *sock) > +{ > +} > + > +static struct sock_mapping *pvcalls_new_active_socket( > + struct pvcalls_fedata *fedata, > + uint64_t id, > + grant_ref_t ref, > + uint32_t evtchn, > + struct socket *sock) > +{ > + int ret; > + struct sock_mapping *map; > + void *page; > + > + map = kzalloc(sizeof(*map), GFP_KERNEL); > + if (map == NULL) > + return NULL; > + > + map->fedata = fedata; > + map->sock = sock; > + map->id = id; > + map->ref = ref; > + > + ret = xenbus_map_ring_valloc(fedata->dev, &ref, 1, &page); > + if (ret < 0) > + goto out; > + map->ring = page; > + map->ring_order = map->ring->ring_order; > + /* first read the order, then map the data ring */ > + virt_rmb(); > + if (map->ring_order > MAX_RING_ORDER) { > + pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n", > + __func__, map->ring_order, MAX_RING_ORDER); > + goto out; > + } > + ret = xenbus_map_ring_valloc(fedata->dev, map->ring->ref, > + (1 << map->ring_order), &page); > + if (ret < 0) > + goto out; > + map->bytes = page; > + > + ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id, > + evtchn, > + pvcalls_back_conn_event, > + 0, > + "pvcalls-backend", > + map); > + if (ret < 0) > + goto out; > + map->irq = ret; > + > + map->data.in = map->bytes; > + map->data.out = map->bytes + XEN_FLEX_RING_SIZE(map->ring_order); > + > + map->ioworker.wq = alloc_workqueue("pvcalls_io", WQ_UNBOUND, 1); > + if (!map->ioworker.wq) > + goto out; > + atomic_set(&map->io, 1); > + INIT_WORK(&map->ioworker.register_work, pvcalls_back_ioworker); > + > + down(&fedata->socket_lock); > + list_add_tail(&map->list, &fedata->socket_mappings); > + up(&fedata->socket_lock); > + > + write_lock_bh(&map->sock->sk->sk_callback_lock); > + map->saved_data_ready = map->sock->sk->sk_data_ready; > + map->sock->sk->sk_user_data = map; > + map->sock->sk->sk_data_ready = pvcalls_sk_data_ready; > + map->sock->sk->sk_state_change = pvcalls_sk_state_change; > + write_unlock_bh(&map->sock->sk->sk_callback_lock); > + > + return map; > +out: > + down(&fedata->socket_lock); > + list_del(&map->list); > + pvcalls_back_release_active(fedata->dev, fedata, map); > + up(&fedata->socket_lock); > + return NULL; > +} > + > static int pvcalls_back_connect(struct xenbus_device *dev, > struct xen_pvcalls_request *req) > { > + struct pvcalls_fedata *fedata; > + int ret = -EINVAL; > + struct socket *sock; > + struct sock_mapping *map; > + struct xen_pvcalls_response *rsp; > + > + fedata = dev_get_drvdata(&dev->dev); > + > + ret = sock_create(AF_INET, SOCK_STREAM, 0, &sock); > + if (ret < 0) > + goto out; > + ret = inet_stream_connect(sock, (struct sockaddr *)&req->u.connect.addr, > + req->u.connect.len, req->u.connect.flags); Shouldn't there be some kind of validation, e.g. whether req->u.connect.len isn't larger than sizeof(req->u.connect.addr) ? Are all flags really valid to specify? I'd like to have at least a comment stating that everything is save without validation. Juergen
On Tue, 4 Jul 2017, Juergen Gross wrote: > On 03/07/17 23:08, Stefano Stabellini wrote: > > Allocate a socket. Keep track of socket <-> ring mappings with a new data > > structure, called sock_mapping. Implement the connect command by calling > > inet_stream_connect, and mapping the new indexes page and data ring. > > Allocate a workqueue and a work_struct, called ioworker, to perform > > reads and writes to the socket. > > > > When an active socket is closed (sk_state_change), set in_error to > > -ENOTCONN and notify the other end, as specified by the protocol. > > > > sk_data_ready and pvcalls_back_ioworker will be implemented later. > > > > Signed-off-by: Stefano Stabellini <stefano@aporeto.com> > > CC: boris.ostrovsky@oracle.com > > CC: jgross@suse.com > > --- > > drivers/xen/pvcalls-back.c | 174 +++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 174 insertions(+) > > > > diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c > > index 53fd908..1bc2620 100644 > > --- a/drivers/xen/pvcalls-back.c > > +++ b/drivers/xen/pvcalls-back.c > > @@ -54,6 +54,39 @@ struct pvcalls_fedata { > > struct semaphore socket_lock; > > }; > > > > +struct pvcalls_ioworker { > > + struct work_struct register_work; > > + struct workqueue_struct *wq; > > +}; > > + > > +struct sock_mapping { > > + struct list_head list; > > + struct pvcalls_fedata *fedata; > > + struct socket *sock; > > + uint64_t id; > > + grant_ref_t ref; > > + struct pvcalls_data_intf *ring; > > + void *bytes; > > + struct pvcalls_data data; > > + uint32_t ring_order; > > + int irq; > > + atomic_t read; > > + atomic_t write; > > + atomic_t io; > > + atomic_t release; > > + void (*saved_data_ready)(struct sock *sk); > > + struct pvcalls_ioworker ioworker; > > +}; > > + > > +static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map); > > +static int pvcalls_back_release_active(struct xenbus_device *dev, > > + struct pvcalls_fedata *fedata, > > + struct sock_mapping *map); > > + > > +static void pvcalls_back_ioworker(struct work_struct *work) > > +{ > > +} > > + > > static int pvcalls_back_socket(struct xenbus_device *dev, > > struct xen_pvcalls_request *req) > > { > > @@ -82,9 +115,145 @@ static int pvcalls_back_socket(struct xenbus_device *dev, > > return 0; > > } > > > > +static void pvcalls_sk_state_change(struct sock *sock) > > +{ > > + struct sock_mapping *map = sock->sk_user_data; > > + struct pvcalls_data_intf *intf; > > + > > + if (map == NULL) > > + return; > > + > > + intf = map->ring; > > + intf->in_error = -ENOTCONN; > > + notify_remote_via_irq(map->irq); > > +} > > + > > +static void pvcalls_sk_data_ready(struct sock *sock) > > +{ > > +} > > + > > +static struct sock_mapping *pvcalls_new_active_socket( > > + struct pvcalls_fedata *fedata, > > + uint64_t id, > > + grant_ref_t ref, > > + uint32_t evtchn, > > + struct socket *sock) > > +{ > > + int ret; > > + struct sock_mapping *map; > > + void *page; > > + > > + map = kzalloc(sizeof(*map), GFP_KERNEL); > > + if (map == NULL) > > + return NULL; > > + > > + map->fedata = fedata; > > + map->sock = sock; > > + map->id = id; > > + map->ref = ref; > > + > > + ret = xenbus_map_ring_valloc(fedata->dev, &ref, 1, &page); > > + if (ret < 0) > > + goto out; > > + map->ring = page; > > + map->ring_order = map->ring->ring_order; > > + /* first read the order, then map the data ring */ > > + virt_rmb(); > > + if (map->ring_order > MAX_RING_ORDER) { > > + pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n", > > + __func__, map->ring_order, MAX_RING_ORDER); > > + goto out; > > + } > > + ret = xenbus_map_ring_valloc(fedata->dev, map->ring->ref, > > + (1 << map->ring_order), &page); > > + if (ret < 0) > > + goto out; > > + map->bytes = page; > > + > > + ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id, > > + evtchn, > > + pvcalls_back_conn_event, > > + 0, > > + "pvcalls-backend", > > + map); > > + if (ret < 0) > > + goto out; > > + map->irq = ret; > > + > > + map->data.in = map->bytes; > > + map->data.out = map->bytes + XEN_FLEX_RING_SIZE(map->ring_order); > > + > > + map->ioworker.wq = alloc_workqueue("pvcalls_io", WQ_UNBOUND, 1); > > + if (!map->ioworker.wq) > > + goto out; > > + atomic_set(&map->io, 1); > > + INIT_WORK(&map->ioworker.register_work, pvcalls_back_ioworker); > > + > > + down(&fedata->socket_lock); > > + list_add_tail(&map->list, &fedata->socket_mappings); > > + up(&fedata->socket_lock); > > + > > + write_lock_bh(&map->sock->sk->sk_callback_lock); > > + map->saved_data_ready = map->sock->sk->sk_data_ready; > > + map->sock->sk->sk_user_data = map; > > + map->sock->sk->sk_data_ready = pvcalls_sk_data_ready; > > + map->sock->sk->sk_state_change = pvcalls_sk_state_change; > > + write_unlock_bh(&map->sock->sk->sk_callback_lock); > > + > > + return map; > > +out: > > + down(&fedata->socket_lock); > > + list_del(&map->list); > > + pvcalls_back_release_active(fedata->dev, fedata, map); > > + up(&fedata->socket_lock); > > + return NULL; > > +} > > + > > static int pvcalls_back_connect(struct xenbus_device *dev, > > struct xen_pvcalls_request *req) > > { > > + struct pvcalls_fedata *fedata; > > + int ret = -EINVAL; > > + struct socket *sock; > > + struct sock_mapping *map; > > + struct xen_pvcalls_response *rsp; > > + > > + fedata = dev_get_drvdata(&dev->dev); > > + > > + ret = sock_create(AF_INET, SOCK_STREAM, 0, &sock); > > + if (ret < 0) > > + goto out; > > + ret = inet_stream_connect(sock, (struct sockaddr *)&req->u.connect.addr, > > + req->u.connect.len, req->u.connect.flags); > > Shouldn't there be some kind of validation, e.g. whether > req->u.connect.len isn't larger than sizeof(req->u.connect.addr) ? Yes, we should also validate that (req->u.connect.len >= sizeof(sa->sa_family)). I'll add the checks. > Are all flags really valid to specify? I'd like to have at least a > comment stating that everything is save without validation. The flags field for the connect operation is actually ignored in this version of the protocol ("reserved for future usage"). I'll drop it.
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index 53fd908..1bc2620 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -54,6 +54,39 @@ struct pvcalls_fedata { struct semaphore socket_lock; }; +struct pvcalls_ioworker { + struct work_struct register_work; + struct workqueue_struct *wq; +}; + +struct sock_mapping { + struct list_head list; + struct pvcalls_fedata *fedata; + struct socket *sock; + uint64_t id; + grant_ref_t ref; + struct pvcalls_data_intf *ring; + void *bytes; + struct pvcalls_data data; + uint32_t ring_order; + int irq; + atomic_t read; + atomic_t write; + atomic_t io; + atomic_t release; + void (*saved_data_ready)(struct sock *sk); + struct pvcalls_ioworker ioworker; +}; + +static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map); +static int pvcalls_back_release_active(struct xenbus_device *dev, + struct pvcalls_fedata *fedata, + struct sock_mapping *map); + +static void pvcalls_back_ioworker(struct work_struct *work) +{ +} + static int pvcalls_back_socket(struct xenbus_device *dev, struct xen_pvcalls_request *req) { @@ -82,9 +115,145 @@ static int pvcalls_back_socket(struct xenbus_device *dev, return 0; } +static void pvcalls_sk_state_change(struct sock *sock) +{ + struct sock_mapping *map = sock->sk_user_data; + struct pvcalls_data_intf *intf; + + if (map == NULL) + return; + + intf = map->ring; + intf->in_error = -ENOTCONN; + notify_remote_via_irq(map->irq); +} + +static void pvcalls_sk_data_ready(struct sock *sock) +{ +} + +static struct sock_mapping *pvcalls_new_active_socket( + struct pvcalls_fedata *fedata, + uint64_t id, + grant_ref_t ref, + uint32_t evtchn, + struct socket *sock) +{ + int ret; + struct sock_mapping *map; + void *page; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (map == NULL) + return NULL; + + map->fedata = fedata; + map->sock = sock; + map->id = id; + map->ref = ref; + + ret = xenbus_map_ring_valloc(fedata->dev, &ref, 1, &page); + if (ret < 0) + goto out; + map->ring = page; + map->ring_order = map->ring->ring_order; + /* first read the order, then map the data ring */ + virt_rmb(); + if (map->ring_order > MAX_RING_ORDER) { + pr_warn("%s frontend requested ring_order %u, which is > MAX (%u)\n", + __func__, map->ring_order, MAX_RING_ORDER); + goto out; + } + ret = xenbus_map_ring_valloc(fedata->dev, map->ring->ref, + (1 << map->ring_order), &page); + if (ret < 0) + goto out; + map->bytes = page; + + ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id, + evtchn, + pvcalls_back_conn_event, + 0, + "pvcalls-backend", + map); + if (ret < 0) + goto out; + map->irq = ret; + + map->data.in = map->bytes; + map->data.out = map->bytes + XEN_FLEX_RING_SIZE(map->ring_order); + + map->ioworker.wq = alloc_workqueue("pvcalls_io", WQ_UNBOUND, 1); + if (!map->ioworker.wq) + goto out; + atomic_set(&map->io, 1); + INIT_WORK(&map->ioworker.register_work, pvcalls_back_ioworker); + + down(&fedata->socket_lock); + list_add_tail(&map->list, &fedata->socket_mappings); + up(&fedata->socket_lock); + + write_lock_bh(&map->sock->sk->sk_callback_lock); + map->saved_data_ready = map->sock->sk->sk_data_ready; + map->sock->sk->sk_user_data = map; + map->sock->sk->sk_data_ready = pvcalls_sk_data_ready; + map->sock->sk->sk_state_change = pvcalls_sk_state_change; + write_unlock_bh(&map->sock->sk->sk_callback_lock); + + return map; +out: + down(&fedata->socket_lock); + list_del(&map->list); + pvcalls_back_release_active(fedata->dev, fedata, map); + up(&fedata->socket_lock); + return NULL; +} + static int pvcalls_back_connect(struct xenbus_device *dev, struct xen_pvcalls_request *req) { + struct pvcalls_fedata *fedata; + int ret = -EINVAL; + struct socket *sock; + struct sock_mapping *map; + struct xen_pvcalls_response *rsp; + + fedata = dev_get_drvdata(&dev->dev); + + ret = sock_create(AF_INET, SOCK_STREAM, 0, &sock); + if (ret < 0) + goto out; + ret = inet_stream_connect(sock, (struct sockaddr *)&req->u.connect.addr, + req->u.connect.len, req->u.connect.flags); + if (ret < 0) { + sock_release(sock); + goto out; + } + + map = pvcalls_new_active_socket(fedata, + req->u.connect.id, + req->u.connect.ref, + req->u.connect.evtchn, + sock); + if (!map) { + ret = -EFAULT; + sock_release(map->sock); + } + +out: + rsp = RING_GET_RESPONSE(&fedata->ring, fedata->ring.rsp_prod_pvt++); + rsp->req_id = req->req_id; + rsp->cmd = req->cmd; + rsp->u.connect.id = req->u.connect.id; + rsp->ret = ret; + + return 0; +} + +static int pvcalls_back_release_active(struct xenbus_device *dev, + struct pvcalls_fedata *fedata, + struct sock_mapping *map) +{ return 0; } @@ -206,6 +375,11 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map) +{ + return IRQ_HANDLED; +} + static int backend_connect(struct xenbus_device *dev) { int err, evtchn;
Allocate a socket. Keep track of socket <-> ring mappings with a new data structure, called sock_mapping. Implement the connect command by calling inet_stream_connect, and mapping the new indexes page and data ring. Allocate a workqueue and a work_struct, called ioworker, to perform reads and writes to the socket. When an active socket is closed (sk_state_change), set in_error to -ENOTCONN and notify the other end, as specified by the protocol. sk_data_ready and pvcalls_back_ioworker will be implemented later. Signed-off-by: Stefano Stabellini <stefano@aporeto.com> CC: boris.ostrovsky@oracle.com CC: jgross@suse.com --- drivers/xen/pvcalls-back.c | 174 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+)