Message ID | 1507336227-20477-9-git-send-email-sstabellini@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> +static int __write_ring(struct pvcalls_data_intf *intf, > + struct pvcalls_data *data, > + struct iov_iter *msg_iter, > + int len) > +{ > + RING_IDX cons, prod, size, masked_prod, masked_cons; > + RING_IDX array_size = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER); > + int32_t error; > + > + error = intf->out_error; > + if (error < 0) > + return error; > + cons = intf->out_cons; > + prod = intf->out_prod; > + /* read indexes before continuing */ > + virt_mb(); > + > + size = pvcalls_queued(prod, cons, array_size); > + if (size >= array_size) > + return 0; I thought you were going to return an error here? If this can only be due to someone messing up indexes is there a reason to continue trying to write? What are the chances that the index will get corrected? -boris > + if (len > array_size - size) > + len = array_size - size; > + > + masked_prod = pvcalls_mask(prod, array_size); > + masked_cons = pvcalls_mask(cons, array_size); > + > + if (masked_prod < masked_cons) { > + copy_from_iter(data->out + masked_prod, len, msg_iter); > + } else { > + if (len > array_size - masked_prod) { > + copy_from_iter(data->out + masked_prod, > + array_size - masked_prod, msg_iter); > + copy_from_iter(data->out, > + len - (array_size - masked_prod), > + msg_iter); > + } else { > + copy_from_iter(data->out + masked_prod, len, msg_iter); > + } > + } > + /* write to ring before updating pointer */ > + virt_wmb(); > + intf->out_prod += len; > + > + return len; > +}
On Tue, 17 Oct 2017, Boris Ostrovsky wrote: > > +static int __write_ring(struct pvcalls_data_intf *intf, > > + struct pvcalls_data *data, > > + struct iov_iter *msg_iter, > > + int len) > > +{ > > + RING_IDX cons, prod, size, masked_prod, masked_cons; > > + RING_IDX array_size = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER); > > + int32_t error; > > + > > + error = intf->out_error; > > + if (error < 0) > > + return error; > > + cons = intf->out_cons; > > + prod = intf->out_prod; > > + /* read indexes before continuing */ > > + virt_mb(); > > + > > + size = pvcalls_queued(prod, cons, array_size); > > + if (size >= array_size) > > + return 0; > > > I thought you were going to return an error here? If this can only be > due to someone messing up indexes is there a reason to continue trying > to write? What are the chances that the index will get corrected? Sorry, I forgot. I'll change it to return an error, maybe EFAULT. > > + if (len > array_size - size) > > + len = array_size - size; > > + > > + masked_prod = pvcalls_mask(prod, array_size); > > + masked_cons = pvcalls_mask(cons, array_size); > > + > > + if (masked_prod < masked_cons) { > > + copy_from_iter(data->out + masked_prod, len, msg_iter); > > + } else { > > + if (len > array_size - masked_prod) { > > + copy_from_iter(data->out + masked_prod, > > + array_size - masked_prod, msg_iter); > > + copy_from_iter(data->out, > > + len - (array_size - masked_prod), > > + msg_iter); > > + } else { > > + copy_from_iter(data->out + masked_prod, len, msg_iter); > > + } > > + } > > + /* write to ring before updating pointer */ > > + virt_wmb(); > > + intf->out_prod += len; > > + > > + return len; > > +} >
On 10/19/2017 09:41 PM, Stefano Stabellini wrote: > On Tue, 17 Oct 2017, Boris Ostrovsky wrote: >>> +static int __write_ring(struct pvcalls_data_intf *intf, >>> + struct pvcalls_data *data, >>> + struct iov_iter *msg_iter, >>> + int len) >>> +{ >>> + RING_IDX cons, prod, size, masked_prod, masked_cons; >>> + RING_IDX array_size = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER); >>> + int32_t error; >>> + >>> + error = intf->out_error; >>> + if (error < 0) >>> + return error; >>> + cons = intf->out_cons; >>> + prod = intf->out_prod; >>> + /* read indexes before continuing */ >>> + virt_mb(); >>> + >>> + size = pvcalls_queued(prod, cons, array_size); >>> + if (size >= array_size) >>> + return 0; >> >> I thought you were going to return an error here? If this can only be >> due to someone messing up indexes is there a reason to continue trying >> to write? What are the chances that the index will get corrected? > Sorry, I forgot. I'll change it to return an error, maybe EFAULT. I think EINVAL might be more appropriate. But either way you can tack on my R-b to the patch. -boris > > >>> + if (len > array_size - size) >>> + len = array_size - size; >>> + >>> + masked_prod = pvcalls_mask(prod, array_size); >>> + masked_cons = pvcalls_mask(cons, array_size); >>> + >>> + if (masked_prod < masked_cons) { >>> + copy_from_iter(data->out + masked_prod, len, msg_iter); >>> + } else { >>> + if (len > array_size - masked_prod) { >>> + copy_from_iter(data->out + masked_prod, >>> + array_size - masked_prod, msg_iter); >>> + copy_from_iter(data->out, >>> + len - (array_size - masked_prod), >>> + msg_iter); >>> + } else { >>> + copy_from_iter(data->out + masked_prod, len, msg_iter); >>> + } >>> + } >>> + /* write to ring before updating pointer */ >>> + virt_wmb(); >>> + intf->out_prod += len; >>> + >>> + return len; >>> +}
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 8958e74..c13c40a 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -29,6 +29,7 @@ #define PVCALLS_INVALID_ID UINT_MAX #define PVCALLS_RING_ORDER XENBUS_MAX_RING_GRANT_ORDER #define PVCALLS_NR_REQ_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE) +#define PVCALLS_FRONT_MAX_SPIN 5000 struct pvcalls_bedata { struct xen_pvcalls_front_ring ring; @@ -100,6 +101,23 @@ static inline int get_request(struct pvcalls_bedata *bedata, int *req_id) return 0; } +static bool pvcalls_front_write_todo(struct sock_mapping *map) +{ + struct pvcalls_data_intf *intf = map->active.ring; + RING_IDX cons, prod, size = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER); + int32_t error; + + error = intf->out_error; + if (error == -ENOTCONN) + return false; + if (error != 0) + return true; + + cons = intf->out_cons; + prod = intf->out_prod; + return !!(size - pvcalls_queued(prod, cons, size)); +} + static irqreturn_t pvcalls_front_event_handler(int irq, void *dev_id) { struct xenbus_device *dev = dev_id; @@ -364,6 +382,106 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr, return ret; } +static int __write_ring(struct pvcalls_data_intf *intf, + struct pvcalls_data *data, + struct iov_iter *msg_iter, + int len) +{ + RING_IDX cons, prod, size, masked_prod, masked_cons; + RING_IDX array_size = XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER); + int32_t error; + + error = intf->out_error; + if (error < 0) + return error; + cons = intf->out_cons; + prod = intf->out_prod; + /* read indexes before continuing */ + virt_mb(); + + size = pvcalls_queued(prod, cons, array_size); + if (size >= array_size) + return 0; + if (len > array_size - size) + len = array_size - size; + + masked_prod = pvcalls_mask(prod, array_size); + masked_cons = pvcalls_mask(cons, array_size); + + if (masked_prod < masked_cons) { + copy_from_iter(data->out + masked_prod, len, msg_iter); + } else { + if (len > array_size - masked_prod) { + copy_from_iter(data->out + masked_prod, + array_size - masked_prod, msg_iter); + copy_from_iter(data->out, + len - (array_size - masked_prod), + msg_iter); + } else { + copy_from_iter(data->out + masked_prod, len, msg_iter); + } + } + /* write to ring before updating pointer */ + virt_wmb(); + intf->out_prod += len; + + return len; +} + +int pvcalls_front_sendmsg(struct socket *sock, struct msghdr *msg, + size_t len) +{ + struct pvcalls_bedata *bedata; + struct sock_mapping *map; + int sent, tot_sent = 0; + int count = 0, flags; + + flags = msg->msg_flags; + if (flags & (MSG_CONFIRM|MSG_DONTROUTE|MSG_EOR|MSG_OOB)) + return -EOPNOTSUPP; + + pvcalls_enter(); + if (!pvcalls_front_dev) { + pvcalls_exit(); + return -ENOTCONN; + } + bedata = dev_get_drvdata(&pvcalls_front_dev->dev); + + map = (struct sock_mapping *) sock->sk->sk_send_head; + if (!map) { + pvcalls_exit(); + return -ENOTSOCK; + } + + mutex_lock(&map->active.out_mutex); + if ((flags & MSG_DONTWAIT) && !pvcalls_front_write_todo(map)) { + mutex_unlock(&map->active.out_mutex); + pvcalls_exit(); + return -EAGAIN; + } + if (len > INT_MAX) + len = INT_MAX; + +again: + count++; + sent = __write_ring(map->active.ring, + &map->active.data, &msg->msg_iter, + len); + if (sent > 0) { + len -= sent; + tot_sent += sent; + notify_remote_via_irq(map->active.irq); + } + if (sent >= 0 && len > 0 && count < PVCALLS_FRONT_MAX_SPIN) + goto again; + if (sent < 0) + tot_sent = sent; + + mutex_unlock(&map->active.out_mutex); + pvcalls_exit(); + return tot_sent; +} + int pvcalls_front_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct pvcalls_bedata *bedata; diff --git a/drivers/xen/pvcalls-front.h b/drivers/xen/pvcalls-front.h index ab4f1da..d937c24 100644 --- a/drivers/xen/pvcalls-front.h +++ b/drivers/xen/pvcalls-front.h @@ -13,5 +13,8 @@ int pvcalls_front_bind(struct socket *sock, int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags); +int pvcalls_front_sendmsg(struct socket *sock, + struct msghdr *msg, + size_t len); #endif
Send data to an active socket by copying data to the "out" ring. Take the active socket out_mutex so that only one function can access the ring at any given time. If not enough room is available on the ring, rather than returning immediately or sleep-waiting, spin for up to 5000 cycles. This small optimization turns out to improve performance significantly. Signed-off-by: Stefano Stabellini <stefano@aporeto.com> CC: boris.ostrovsky@oracle.com CC: jgross@suse.com --- drivers/xen/pvcalls-front.c | 118 ++++++++++++++++++++++++++++++++++++++++++++ drivers/xen/pvcalls-front.h | 3 ++ 2 files changed, 121 insertions(+)