Message ID | 1366677147-2150-1-git-send-email-sasha.levin@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Apr 23, 2013 at 3:32 AM, Sasha Levin <sasha.levin@oracle.com> wrote: > @@ -94,4 +95,6 @@ struct kvm; > void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size); > void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size); > > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len); > + I hate the name. Maybe append_to_iovec or something? Otherwise looks OK to me. Asias? Pekka -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Apr 23, 2013 at 12:06:56PM +0300, Pekka Enberg wrote: > On Tue, Apr 23, 2013 at 3:32 AM, Sasha Levin <sasha.levin@oracle.com> wrote: > > @@ -94,4 +95,6 @@ struct kvm; > > void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size); > > void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size); > > > > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len); > > + > > I hate the name. Maybe append_to_iovec or something? > > Otherwise looks OK to me. Asias? I will look into it tomorrow. > Pekka
(off list) On 04/22/2013 08:32 PM, Sasha Levin wrote: > Support mergable rx buffers for virtio-net. This helps reduce the amount > of memory the guest kernel has to allocate per rx vq. One of the benefits of this is that even one virtio-net device with just one pair of vqs will require much less memory - so you could try booting smaller guests than before! Thanks, Sasha -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Do you care about guests with drivers that don't negotiate VIRTIO_NET_F_MRG_RXBUF? On Mon, Apr 22, 2013 at 5:32 PM, Sasha Levin <sasha.levin@oracle.com> wrote: > > Support mergable rx buffers for virtio-net. This helps reduce the amount > of memory the guest kernel has to allocate per rx vq. > > Signed-off-by: Sasha Levin <sasha.levin@oracle.com> > --- > tools/kvm/include/kvm/uip.h | 4 ++-- > tools/kvm/include/kvm/util.h | 3 +++ > tools/kvm/net/uip/core.c | 54 +++++--------------------------------------- > tools/kvm/net/uip/tcp.c | 2 +- > tools/kvm/net/uip/udp.c | 2 +- > tools/kvm/util/util.c | 15 ++++++++++++ > tools/kvm/virtio/net.c | 37 ++++++++++++++++++++++-------- > 7 files changed, 55 insertions(+), 62 deletions(-) > > diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h > index ac248d2..fa82f10 100644 > --- a/tools/kvm/include/kvm/uip.h > +++ b/tools/kvm/include/kvm/uip.h > @@ -252,7 +252,7 @@ struct uip_tcp_socket { > }; > > struct uip_tx_arg { > - struct virtio_net_hdr *vnet; > + struct virtio_net_hdr_mrg_rxbuf *vnet; > struct uip_info *info; > struct uip_eth *eth; > int vnet_len; > @@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth) > } > > int uip_tx(struct iovec *iov, u16 out, struct uip_info *info); > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info); > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info); > int uip_init(struct uip_info *info); > > int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg); > diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h > index 0df9f0d..6f8ac83 100644 > --- a/tools/kvm/include/kvm/util.h > +++ b/tools/kvm/include/kvm/util.h > @@ -22,6 +22,7 @@ > #include <sys/param.h> > #include <sys/types.h> > #include <linux/types.h> > +#include <sys/uio.h> > > #ifdef __GNUC__ > #define NORETURN __attribute__((__noreturn__)) > @@ -94,4 +95,6 @@ struct kvm; > void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size); > void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size); > > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len); > + > #endif /* KVM__UTIL_H */ > diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c > index 4e5bb82..d9e9993 100644 > --- a/tools/kvm/net/uip/core.c > +++ b/tools/kvm/net/uip/core.c > @@ -7,7 +7,7 @@ > > int uip_tx(struct iovec *iov, u16 out, struct uip_info *info) > { > - struct virtio_net_hdr *vnet; > + struct virtio_net_hdr_mrg_rxbuf *vnet; > struct uip_tx_arg arg; > int eth_len, vnet_len; > struct uip_eth *eth; > @@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info *info) > return vnet_len + eth_len; > } > > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info) > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info) > { > - struct virtio_net_hdr *vnet; > - struct uip_eth *eth; > struct uip_buf *buf; > - int vnet_len; > - int eth_len; > - char *p; > int len; > - int cnt; > - int i; > > /* > * Sleep until there is a buffer for guest > */ > buf = uip_buf_get_used(info); > > - /* > - * Fill device to guest buffer, vnet hdr fisrt > - */ > - vnet_len = iov[0].iov_len; > - vnet = iov[0].iov_base; > - if (buf->vnet_len > vnet_len) { > - len = -1; > - goto out; > - } > - memcpy(vnet, buf->vnet, buf->vnet_len); > - > - /* > - * Then, the real eth data > - * Note: Be sure buf->eth_len is not bigger than the buffer len that guest provides > - */ > - cnt = buf->eth_len; > - p = buf->eth; > - for (i = 1; i < in; i++) { > - eth_len = iov[i].iov_len; > - eth = iov[i].iov_base; > - if (cnt > eth_len) { > - memcpy(eth, p, eth_len); > - cnt -= eth_len; > - p += eth_len; > - } else { > - memcpy(eth, p, cnt); > - cnt -= cnt; > - break; > - } > - } > - > - if (cnt) { > - pr_warning("uip_rx error"); > - len = -1; > - goto out; > - } > + memcpy(buffer, buf->vnet, buf->vnet_len); > + memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len); > > len = buf->vnet_len + buf->eth_len; > > -out: > uip_buf_set_free(info, buf); > return len; > } > @@ -172,8 +130,8 @@ int uip_init(struct uip_info *info) > } > > list_for_each_entry(buf, buf_head, list) { > - buf->vnet = malloc(sizeof(struct virtio_net_hdr)); > - buf->vnet_len = sizeof(struct virtio_net_hdr); > + buf->vnet = malloc(sizeof(struct virtio_net_hdr_mrg_rxbuf)); > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > buf->eth = malloc(1024*64 + sizeof(struct uip_pseudo_hdr)); > buf->eth_len = 1024*64 + sizeof(struct uip_pseudo_hdr); > > diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c > index 9044f40..a99b95e 100644 > --- a/tools/kvm/net/uip/tcp.c > +++ b/tools/kvm/net/uip/tcp.c > @@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_ > /* > * virtio_net_hdr > */ > - buf->vnet_len = sizeof(struct virtio_net_hdr); > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > memset(buf->vnet, 0, buf->vnet_len); > > buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth); > diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c > index 31c417c..083c221 100644 > --- a/tools/kvm/net/uip/udp.c > +++ b/tools/kvm/net/uip/udp.c > @@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct uip_udp_socket *sk, struct ui > /* > * virtio_net_hdr > */ > - buf->vnet_len = sizeof(struct virtio_net_hdr); > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > memset(buf->vnet, 0, buf->vnet_len); > > buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth); > diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c > index c11a15a..4e0069c 100644 > --- a/tools/kvm/util/util.c > +++ b/tools/kvm/util/util.c > @@ -9,6 +9,7 @@ > #include <sys/mman.h> > #include <sys/stat.h> > #include <sys/statfs.h> > +#include <linux/kernel.h> > > static void report(const char *prefix, const char *err, va_list params) > { > @@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 si > return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); > } > } > + > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len) > +{ > + size_t copy, start_len = len; > + > + for (; len > 0 && iovlen > 0; ++iov, --iovlen) { > + copy = min(iov->iov_len, len); > + memcpy(iov->iov_base, kdata, copy); > + kdata += copy; > + len -= copy; > + } > + > + return start_len - len; > +} > diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c > index c0a8f12..c99c5a6 100644 > --- a/tools/kvm/virtio/net.c > +++ b/tools/kvm/virtio/net.c > @@ -32,8 +32,8 @@ > struct net_dev; > > struct net_dev_operations { > - int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); > - int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); > + int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev); > + int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev); > }; > > struct net_dev { > @@ -63,6 +63,8 @@ struct net_dev { > static LIST_HEAD(ndevs); > static int compat_id = -1; > > +#define MAX_PACKET_SIZE ((u16)(-1)) > + > static void *virtio_net_rx_thread(void *p) > { > struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; > @@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p) > mutex_unlock(&ndev->io_lock[id]); > > while (virt_queue__available(vq)) { > + unsigned char buffer[MAX_PACKET_SIZE]; > + struct virtio_net_hdr_mrg_rxbuf *hdr; > + > + len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev); > head = virt_queue__get_iov(vq, iov, &out, &in, kvm); > - len = ndev->ops->rx(iov, in, ndev); > - virt_queue__set_used_elem(vq, head, len); > + hdr = (void *)iov[0].iov_base; > + while (len) { > + int copied; > + > + copied = memcpy_toiovecend(iov, in, buffer, len); > + len -= copied; > + hdr->num_buffers++; > + virt_queue__set_used_elem(vq, head, copied); > + if (len == 0) > + break; > + head = virt_queue__get_iov(vq, iov, &out, &in, kvm); Need to check that virt_queue__available(vq) first? > + } > > /* We should interrupt guest right now, otherwise latency is huge. */ > if (virtio_queue__should_signal(vq)) > @@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct virtio_net_params *params, > goto fail; > } > > - hdr_len = sizeof(struct virtio_net_hdr); > + hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) > pr_warning("Config tap device TUNSETVNETHDRSZ error"); > > @@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) > return writev(ndev->tap_fd, iov, out); > } > > -static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) > +static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev) > { > - return readv(ndev->tap_fd, iov, in); > + return read(ndev->tap_fd, buffer, length); > } > > static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) > @@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) > return uip_tx(iov, out, &ndev->info); > } > > -static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) > +static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev) > { > - return uip_rx(iov, in, &ndev->info); > + return uip_rx(buffer, length, &ndev->info); > } > > static struct net_dev_operations tap_ops = { > @@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev) > | 1UL << VIRTIO_RING_F_EVENT_IDX > | 1UL << VIRTIO_RING_F_INDIRECT_DESC > | 1UL << VIRTIO_NET_F_CTRL_VQ > - | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0); > + | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0) > + | 1UL << VIRTIO_NET_F_MRG_RXBUF; > } > > static void set_guest_features(struct kvm *kvm, void *dev, u32 features) > -- > 1.8.2.1 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 04/23/2013 12:35 PM, Eric Northup wrote: > Do you care about guests with drivers that don't negotiate > VIRTIO_NET_F_MRG_RXBUF? We usually try to keep backward compatibility, but in this case mergable RX buffers are about 5 years old now, so it's safe to assume they'll be running in any guest. Unless there is a specific reason to allow working without them I'd rather keep the code simple in this case. > On Mon, Apr 22, 2013 at 5:32 PM, Sasha Levin <sasha.levin@oracle.com> wrote: >> + copied = memcpy_toiovecend(iov, in, buffer, len); >> + len -= copied; >> + hdr->num_buffers++; >> + virt_queue__set_used_elem(vq, head, copied); >> + if (len == 0) >> + break; >> + head = virt_queue__get_iov(vq, iov, &out, &in, kvm); > > Need to check that virt_queue__available(vq) first? Yup. I wonder why it didn't blow up running 'ping -f' with a huge packet size. Thanks, Sasha -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Apr 22, 2013 at 08:32:27PM -0400, Sasha Levin wrote: > Support mergable rx buffers for virtio-net. This helps reduce the amount > of memory the guest kernel has to allocate per rx vq. With this, we always do an extra copy of the rx data into a linear buffer. I am thinking about whether we should keep the non MRG_RXBUF code. For tap use case, it is fine, user can use vhost. But for uip use case, we can not use vhost. > Signed-off-by: Sasha Levin <sasha.levin@oracle.com> > --- > tools/kvm/include/kvm/uip.h | 4 ++-- > tools/kvm/include/kvm/util.h | 3 +++ > tools/kvm/net/uip/core.c | 54 +++++--------------------------------------- > tools/kvm/net/uip/tcp.c | 2 +- > tools/kvm/net/uip/udp.c | 2 +- > tools/kvm/util/util.c | 15 ++++++++++++ > tools/kvm/virtio/net.c | 37 ++++++++++++++++++++++-------- > 7 files changed, 55 insertions(+), 62 deletions(-) > > diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h > index ac248d2..fa82f10 100644 > --- a/tools/kvm/include/kvm/uip.h > +++ b/tools/kvm/include/kvm/uip.h > @@ -252,7 +252,7 @@ struct uip_tcp_socket { > }; > > struct uip_tx_arg { > - struct virtio_net_hdr *vnet; > + struct virtio_net_hdr_mrg_rxbuf *vnet; > struct uip_info *info; > struct uip_eth *eth; > int vnet_len; > @@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth) > } > > int uip_tx(struct iovec *iov, u16 out, struct uip_info *info); > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info); > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info); > int uip_init(struct uip_info *info); > > int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg); > diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h > index 0df9f0d..6f8ac83 100644 > --- a/tools/kvm/include/kvm/util.h > +++ b/tools/kvm/include/kvm/util.h > @@ -22,6 +22,7 @@ > #include <sys/param.h> > #include <sys/types.h> > #include <linux/types.h> > +#include <sys/uio.h> > > #ifdef __GNUC__ > #define NORETURN __attribute__((__noreturn__)) > @@ -94,4 +95,6 @@ struct kvm; > void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size); > void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size); > > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len); > + > #endif /* KVM__UTIL_H */ > diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c > index 4e5bb82..d9e9993 100644 > --- a/tools/kvm/net/uip/core.c > +++ b/tools/kvm/net/uip/core.c > @@ -7,7 +7,7 @@ > > int uip_tx(struct iovec *iov, u16 out, struct uip_info *info) > { > - struct virtio_net_hdr *vnet; > + struct virtio_net_hdr_mrg_rxbuf *vnet; > struct uip_tx_arg arg; > int eth_len, vnet_len; > struct uip_eth *eth; > @@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info *info) > return vnet_len + eth_len; > } > > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info) > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info) > { > - struct virtio_net_hdr *vnet; > - struct uip_eth *eth; > struct uip_buf *buf; > - int vnet_len; > - int eth_len; > - char *p; > int len; > - int cnt; > - int i; > > /* > * Sleep until there is a buffer for guest > */ > buf = uip_buf_get_used(info); > > - /* > - * Fill device to guest buffer, vnet hdr fisrt > - */ > - vnet_len = iov[0].iov_len; > - vnet = iov[0].iov_base; > - if (buf->vnet_len > vnet_len) { > - len = -1; > - goto out; > - } > - memcpy(vnet, buf->vnet, buf->vnet_len); > - > - /* > - * Then, the real eth data > - * Note: Be sure buf->eth_len is not bigger than the buffer len that guest provides > - */ > - cnt = buf->eth_len; > - p = buf->eth; > - for (i = 1; i < in; i++) { > - eth_len = iov[i].iov_len; > - eth = iov[i].iov_base; > - if (cnt > eth_len) { > - memcpy(eth, p, eth_len); > - cnt -= eth_len; > - p += eth_len; > - } else { > - memcpy(eth, p, cnt); > - cnt -= cnt; > - break; > - } > - } > - > - if (cnt) { > - pr_warning("uip_rx error"); > - len = -1; > - goto out; > - } > + memcpy(buffer, buf->vnet, buf->vnet_len); > + memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len); > > len = buf->vnet_len + buf->eth_len; > > -out: > uip_buf_set_free(info, buf); > return len; > } > @@ -172,8 +130,8 @@ int uip_init(struct uip_info *info) > } > > list_for_each_entry(buf, buf_head, list) { > - buf->vnet = malloc(sizeof(struct virtio_net_hdr)); > - buf->vnet_len = sizeof(struct virtio_net_hdr); > + buf->vnet = malloc(sizeof(struct virtio_net_hdr_mrg_rxbuf)); > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > buf->eth = malloc(1024*64 + sizeof(struct uip_pseudo_hdr)); > buf->eth_len = 1024*64 + sizeof(struct uip_pseudo_hdr); > > diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c > index 9044f40..a99b95e 100644 > --- a/tools/kvm/net/uip/tcp.c > +++ b/tools/kvm/net/uip/tcp.c > @@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_ > /* > * virtio_net_hdr > */ > - buf->vnet_len = sizeof(struct virtio_net_hdr); > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > memset(buf->vnet, 0, buf->vnet_len); > > buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth); > diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c > index 31c417c..083c221 100644 > --- a/tools/kvm/net/uip/udp.c > +++ b/tools/kvm/net/uip/udp.c > @@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct uip_udp_socket *sk, struct ui > /* > * virtio_net_hdr > */ > - buf->vnet_len = sizeof(struct virtio_net_hdr); > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > memset(buf->vnet, 0, buf->vnet_len); > > buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth); > diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c > index c11a15a..4e0069c 100644 > --- a/tools/kvm/util/util.c > +++ b/tools/kvm/util/util.c > @@ -9,6 +9,7 @@ > #include <sys/mman.h> > #include <sys/stat.h> > #include <sys/statfs.h> > +#include <linux/kernel.h> > > static void report(const char *prefix, const char *err, va_list params) > { > @@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 si > return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); > } > } > + > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len) > +{ > + size_t copy, start_len = len; > + > + for (; len > 0 && iovlen > 0; ++iov, --iovlen) { > + copy = min(iov->iov_len, len); > + memcpy(iov->iov_base, kdata, copy); > + kdata += copy; > + len -= copy; > + } > + > + return start_len - len; > +} > diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c > index c0a8f12..c99c5a6 100644 > --- a/tools/kvm/virtio/net.c > +++ b/tools/kvm/virtio/net.c > @@ -32,8 +32,8 @@ > struct net_dev; > > struct net_dev_operations { > - int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); > - int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); > + int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev); > + int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev); > }; > > struct net_dev { > @@ -63,6 +63,8 @@ struct net_dev { > static LIST_HEAD(ndevs); > static int compat_id = -1; > > +#define MAX_PACKET_SIZE ((u16)(-1)) Why 64KB - 1? The package can be 65550 bytes which is max TCP or UDP plus 14 bytes eth header. > + > static void *virtio_net_rx_thread(void *p) > { > struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; > @@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p) > mutex_unlock(&ndev->io_lock[id]); > > while (virt_queue__available(vq)) { > + unsigned char buffer[MAX_PACKET_SIZE]; The buffer size should at least be: MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf) > + struct virtio_net_hdr_mrg_rxbuf *hdr; > + > + len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev); > head = virt_queue__get_iov(vq, iov, &out, &in, kvm); > - len = ndev->ops->rx(iov, in, ndev); > - virt_queue__set_used_elem(vq, head, len); > + hdr = (void *)iov[0].iov_base; > + while (len) { > + int copied; > + > + copied = memcpy_toiovecend(iov, in, buffer, len); > + len -= copied; > + hdr->num_buffers++; > + virt_queue__set_used_elem(vq, head, copied); > + if (len == 0) > + break; > + head = virt_queue__get_iov(vq, iov, &out, &in, kvm); Here, need to check if we still have available buffer from guest. If not need to wait for it. > + } > > /* We should interrupt guest right now, otherwise latency is huge. */ > if (virtio_queue__should_signal(vq)) > @@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct virtio_net_params *params, > goto fail; > } > > - hdr_len = sizeof(struct virtio_net_hdr); > + hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) > pr_warning("Config tap device TUNSETVNETHDRSZ error"); > > @@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) > return writev(ndev->tap_fd, iov, out); > } > > -static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) > +static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev) > { > - return readv(ndev->tap_fd, iov, in); > + return read(ndev->tap_fd, buffer, length); > } > > static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) > @@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) > return uip_tx(iov, out, &ndev->info); > } > > -static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) > +static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev) > { > - return uip_rx(iov, in, &ndev->info); > + return uip_rx(buffer, length, &ndev->info); > } > > static struct net_dev_operations tap_ops = { > @@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev) > | 1UL << VIRTIO_RING_F_EVENT_IDX > | 1UL << VIRTIO_RING_F_INDIRECT_DESC > | 1UL << VIRTIO_NET_F_CTRL_VQ > - | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0); > + | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0) > + | 1UL << VIRTIO_NET_F_MRG_RXBUF; > } > > static void set_guest_features(struct kvm *kvm, void *dev, u32 features) > -- > 1.8.2.1 >
Hi, On 04/23/2013 12:35 PM, Eric Northup wrote: >> Do you care about guests with drivers that don't negotiate >> VIRTIO_NET_F_MRG_RXBUF? On Wed, Apr 24, 2013 at 5:51 AM, Sasha Levin <sasha.levin@oracle.com> wrote: > We usually try to keep backward compatibility, but in this case > mergable RX buffers are about 5 years old now, so it's safe to > assume they'll be running in any guest. > > Unless there is a specific reason to allow working without them > I'd rather keep the code simple in this case. Are there such guests around? What's the failure scenario for them after this patch? Pekka -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Apr 24, 2013 at 09:51:57AM +0300, Pekka Enberg wrote: > Hi, > > On 04/23/2013 12:35 PM, Eric Northup wrote: > >> Do you care about guests with drivers that don't negotiate > >> VIRTIO_NET_F_MRG_RXBUF? > > On Wed, Apr 24, 2013 at 5:51 AM, Sasha Levin <sasha.levin@oracle.com> wrote: > > We usually try to keep backward compatibility, but in this case > > mergable RX buffers are about 5 years old now, so it's safe to > > assume they'll be running in any guest. > > > > Unless there is a specific reason to allow working without them > > I'd rather keep the code simple in this case. > > Are there such guests around? What's the failure scenario for them > after this patch? > > Pekka Warning: have not looked at the patch, just a general comment. I think it's reasonable to assume embedded guests such as PXE won't negotiate any features. And, running old guests is one of the reasons people use virtualization at all. So 5 years is not a lot. In any case, stick to the device spec please, if you want it changed please send a spec patch, don't deviate from it randomly.
On Wed, Apr 24, 2013 at 01:32:56PM +0800, Asias He wrote: > On Mon, Apr 22, 2013 at 08:32:27PM -0400, Sasha Levin wrote: > > Support mergable rx buffers for virtio-net. This helps reduce the amount > > of memory the guest kernel has to allocate per rx vq. > > With this, we always do an extra copy of the rx data into a linear buffer. > I am thinking about whether we should keep the non MRG_RXBUF code. For > tap use case, it is fine, user can use vhost. But for uip use case, we > can not use vhost. You have to keep it but it might be ok not to optimize it. > > Signed-off-by: Sasha Levin <sasha.levin@oracle.com> > > --- > > tools/kvm/include/kvm/uip.h | 4 ++-- > > tools/kvm/include/kvm/util.h | 3 +++ > > tools/kvm/net/uip/core.c | 54 +++++--------------------------------------- > > tools/kvm/net/uip/tcp.c | 2 +- > > tools/kvm/net/uip/udp.c | 2 +- > > tools/kvm/util/util.c | 15 ++++++++++++ > > tools/kvm/virtio/net.c | 37 ++++++++++++++++++++++-------- > > 7 files changed, 55 insertions(+), 62 deletions(-) > > > > diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h > > index ac248d2..fa82f10 100644 > > --- a/tools/kvm/include/kvm/uip.h > > +++ b/tools/kvm/include/kvm/uip.h > > @@ -252,7 +252,7 @@ struct uip_tcp_socket { > > }; > > > > struct uip_tx_arg { > > - struct virtio_net_hdr *vnet; > > + struct virtio_net_hdr_mrg_rxbuf *vnet; > > struct uip_info *info; > > struct uip_eth *eth; > > int vnet_len; > > @@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth) > > } > > > > int uip_tx(struct iovec *iov, u16 out, struct uip_info *info); > > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info); > > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info); > > int uip_init(struct uip_info *info); > > > > int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg); > > diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h > > index 0df9f0d..6f8ac83 100644 > > --- a/tools/kvm/include/kvm/util.h > > +++ b/tools/kvm/include/kvm/util.h > > @@ -22,6 +22,7 @@ > > #include <sys/param.h> > > #include <sys/types.h> > > #include <linux/types.h> > > +#include <sys/uio.h> > > > > #ifdef __GNUC__ > > #define NORETURN __attribute__((__noreturn__)) > > @@ -94,4 +95,6 @@ struct kvm; > > void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size); > > void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size); > > > > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len); > > + > > #endif /* KVM__UTIL_H */ > > diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c > > index 4e5bb82..d9e9993 100644 > > --- a/tools/kvm/net/uip/core.c > > +++ b/tools/kvm/net/uip/core.c > > @@ -7,7 +7,7 @@ > > > > int uip_tx(struct iovec *iov, u16 out, struct uip_info *info) > > { > > - struct virtio_net_hdr *vnet; > > + struct virtio_net_hdr_mrg_rxbuf *vnet; > > struct uip_tx_arg arg; > > int eth_len, vnet_len; > > struct uip_eth *eth; > > @@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info *info) > > return vnet_len + eth_len; > > } > > > > -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info) > > +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info) > > { > > - struct virtio_net_hdr *vnet; > > - struct uip_eth *eth; > > struct uip_buf *buf; > > - int vnet_len; > > - int eth_len; > > - char *p; > > int len; > > - int cnt; > > - int i; > > > > /* > > * Sleep until there is a buffer for guest > > */ > > buf = uip_buf_get_used(info); > > > > - /* > > - * Fill device to guest buffer, vnet hdr fisrt > > - */ > > - vnet_len = iov[0].iov_len; > > - vnet = iov[0].iov_base; > > - if (buf->vnet_len > vnet_len) { > > - len = -1; > > - goto out; > > - } > > - memcpy(vnet, buf->vnet, buf->vnet_len); > > - > > - /* > > - * Then, the real eth data > > - * Note: Be sure buf->eth_len is not bigger than the buffer len that guest provides > > - */ > > - cnt = buf->eth_len; > > - p = buf->eth; > > - for (i = 1; i < in; i++) { > > - eth_len = iov[i].iov_len; > > - eth = iov[i].iov_base; > > - if (cnt > eth_len) { > > - memcpy(eth, p, eth_len); > > - cnt -= eth_len; > > - p += eth_len; > > - } else { > > - memcpy(eth, p, cnt); > > - cnt -= cnt; > > - break; > > - } > > - } > > - > > - if (cnt) { > > - pr_warning("uip_rx error"); > > - len = -1; > > - goto out; > > - } > > + memcpy(buffer, buf->vnet, buf->vnet_len); > > + memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len); > > > > len = buf->vnet_len + buf->eth_len; > > > > -out: > > uip_buf_set_free(info, buf); > > return len; > > } > > @@ -172,8 +130,8 @@ int uip_init(struct uip_info *info) > > } > > > > list_for_each_entry(buf, buf_head, list) { > > - buf->vnet = malloc(sizeof(struct virtio_net_hdr)); > > - buf->vnet_len = sizeof(struct virtio_net_hdr); > > + buf->vnet = malloc(sizeof(struct virtio_net_hdr_mrg_rxbuf)); > > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > > buf->eth = malloc(1024*64 + sizeof(struct uip_pseudo_hdr)); > > buf->eth_len = 1024*64 + sizeof(struct uip_pseudo_hdr); > > > > diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c > > index 9044f40..a99b95e 100644 > > --- a/tools/kvm/net/uip/tcp.c > > +++ b/tools/kvm/net/uip/tcp.c > > @@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_ > > /* > > * virtio_net_hdr > > */ > > - buf->vnet_len = sizeof(struct virtio_net_hdr); > > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > > memset(buf->vnet, 0, buf->vnet_len); > > > > buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth); > > diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c > > index 31c417c..083c221 100644 > > --- a/tools/kvm/net/uip/udp.c > > +++ b/tools/kvm/net/uip/udp.c > > @@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct uip_udp_socket *sk, struct ui > > /* > > * virtio_net_hdr > > */ > > - buf->vnet_len = sizeof(struct virtio_net_hdr); > > + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > > memset(buf->vnet, 0, buf->vnet_len); > > > > buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth); > > diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c > > index c11a15a..4e0069c 100644 > > --- a/tools/kvm/util/util.c > > +++ b/tools/kvm/util/util.c > > @@ -9,6 +9,7 @@ > > #include <sys/mman.h> > > #include <sys/stat.h> > > #include <sys/statfs.h> > > +#include <linux/kernel.h> > > > > static void report(const char *prefix, const char *err, va_list params) > > { > > @@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 si > > return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); > > } > > } > > + > > +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len) > > +{ > > + size_t copy, start_len = len; > > + > > + for (; len > 0 && iovlen > 0; ++iov, --iovlen) { > > + copy = min(iov->iov_len, len); > > + memcpy(iov->iov_base, kdata, copy); > > + kdata += copy; > > + len -= copy; > > + } > > + > > + return start_len - len; > > +} > > diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c > > index c0a8f12..c99c5a6 100644 > > --- a/tools/kvm/virtio/net.c > > +++ b/tools/kvm/virtio/net.c > > @@ -32,8 +32,8 @@ > > struct net_dev; > > > > struct net_dev_operations { > > - int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); > > - int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); > > + int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev); > > + int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev); > > }; > > > > struct net_dev { > > @@ -63,6 +63,8 @@ struct net_dev { > > static LIST_HEAD(ndevs); > > static int compat_id = -1; > > > > +#define MAX_PACKET_SIZE ((u16)(-1)) > > Why 64KB - 1? The package can be 65550 bytes which is max TCP or UDP > plus 14 bytes eth header. > > > + > > static void *virtio_net_rx_thread(void *p) > > { > > struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; > > @@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p) > > mutex_unlock(&ndev->io_lock[id]); > > > > while (virt_queue__available(vq)) { > > + unsigned char buffer[MAX_PACKET_SIZE]; > > The buffer size should at least be: > > MAX_PACKET_SIZE + sizeof(struct virtio_net_hdr_mrg_rxbuf) > > > + struct virtio_net_hdr_mrg_rxbuf *hdr; > > + > > + len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev); > > head = virt_queue__get_iov(vq, iov, &out, &in, kvm); > > - len = ndev->ops->rx(iov, in, ndev); > > - virt_queue__set_used_elem(vq, head, len); > > + hdr = (void *)iov[0].iov_base; > > + while (len) { > > + int copied; > > + > > + copied = memcpy_toiovecend(iov, in, buffer, len); > > + len -= copied; > > + hdr->num_buffers++; > > + virt_queue__set_used_elem(vq, head, copied); > > + if (len == 0) > > + break; > > + head = virt_queue__get_iov(vq, iov, &out, &in, kvm); > > Here, need to check if we still have available buffer from guest. If not > need to wait for it. > > > + } > > > > /* We should interrupt guest right now, otherwise latency is huge. */ > > if (virtio_queue__should_signal(vq)) > > @@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct virtio_net_params *params, > > goto fail; > > } > > > > - hdr_len = sizeof(struct virtio_net_hdr); > > + hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > > if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) > > pr_warning("Config tap device TUNSETVNETHDRSZ error"); > > > > @@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) > > return writev(ndev->tap_fd, iov, out); > > } > > > > -static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) > > +static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev) > > { > > - return readv(ndev->tap_fd, iov, in); > > + return read(ndev->tap_fd, buffer, length); > > } > > > > static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) > > @@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) > > return uip_tx(iov, out, &ndev->info); > > } > > > > -static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) > > +static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev) > > { > > - return uip_rx(iov, in, &ndev->info); > > + return uip_rx(buffer, length, &ndev->info); > > } > > > > static struct net_dev_operations tap_ops = { > > @@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev) > > | 1UL << VIRTIO_RING_F_EVENT_IDX > > | 1UL << VIRTIO_RING_F_INDIRECT_DESC > > | 1UL << VIRTIO_NET_F_CTRL_VQ > > - | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0); > > + | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0) > > + | 1UL << VIRTIO_NET_F_MRG_RXBUF; > > } > > > > static void set_guest_features(struct kvm *kvm, void *dev, u32 features) > > -- > > 1.8.2.1 > > > > -- > Asias -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
"Michael S. Tsirkin" <mst@redhat.com> writes: > On Wed, Apr 24, 2013 at 09:51:57AM +0300, Pekka Enberg wrote: >> Hi, >> >> On 04/23/2013 12:35 PM, Eric Northup wrote: >> >> Do you care about guests with drivers that don't negotiate >> >> VIRTIO_NET_F_MRG_RXBUF? >> >> On Wed, Apr 24, 2013 at 5:51 AM, Sasha Levin <sasha.levin@oracle.com> wrote: >> > We usually try to keep backward compatibility, but in this case >> > mergable RX buffers are about 5 years old now, so it's safe to >> > assume they'll be running in any guest. >> > >> > Unless there is a specific reason to allow working without them >> > I'd rather keep the code simple in this case. >> >> Are there such guests around? What's the failure scenario for them >> after this patch? >> >> Pekka > > Warning: have not looked at the patch, just a general comment. > > I think it's reasonable to assume embedded guests such as PXE won't > negotiate any features. And, running old guests is one of the reasons > people use virtualization at all. So 5 years is not a lot. > > In any case, stick to the device spec please, if you want it changed > please send a spec patch, don't deviate from it randomly. Supporting old guests is an quality of implementation issue. It's like any ABI: if noone will notice, you can remove stuff. But the case of "I can receive GSO packets but I don't support mergeable buffers" is a trivial one: you can "support" it by pretending the guest can't handle GSO :) If you want to support non-Linux guests (eg. bootloaders), you probably want to keep support for very dumb drivers with no mergable rxbufs though. Cheers, Rusty. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 04/28/2013 08:44 PM, Rusty Russell wrote: > "Michael S. Tsirkin" <mst@redhat.com> writes: >> > On Wed, Apr 24, 2013 at 09:51:57AM +0300, Pekka Enberg wrote: >>> >> Hi, >>> >> >>> >> On 04/23/2013 12:35 PM, Eric Northup wrote: >>>>> >> >> Do you care about guests with drivers that don't negotiate >>>>> >> >> VIRTIO_NET_F_MRG_RXBUF? >>> >> >>> >> On Wed, Apr 24, 2013 at 5:51 AM, Sasha Levin <sasha.levin@oracle.com> wrote: >>>> >> > We usually try to keep backward compatibility, but in this case >>>> >> > mergable RX buffers are about 5 years old now, so it's safe to >>>> >> > assume they'll be running in any guest. >>>> >> > >>>> >> > Unless there is a specific reason to allow working without them >>>> >> > I'd rather keep the code simple in this case. >>> >> >>> >> Are there such guests around? What's the failure scenario for them >>> >> after this patch? >>> >> >>> >> Pekka >> > >> > Warning: have not looked at the patch, just a general comment. >> > >> > I think it's reasonable to assume embedded guests such as PXE won't >> > negotiate any features. And, running old guests is one of the reasons >> > people use virtualization at all. So 5 years is not a lot. >> > >> > In any case, stick to the device spec please, if you want it changed >> > please send a spec patch, don't deviate from it randomly. > Supporting old guests is an quality of implementation issue. It's like > any ABI: if noone will notice, you can remove stuff. > > But the case of "I can receive GSO packets but I don't support mergeable > buffers" is a trivial one: you can "support" it by pretending the guest > can't handle GSO :) > > If you want to support non-Linux guests (eg. bootloaders), you probably > want to keep support for very dumb drivers with no mergable rxbufs > though. Yup, I'm planning on sending a version that supports older guests soonish. Thanks, Sasha -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/tools/kvm/include/kvm/uip.h b/tools/kvm/include/kvm/uip.h index ac248d2..fa82f10 100644 --- a/tools/kvm/include/kvm/uip.h +++ b/tools/kvm/include/kvm/uip.h @@ -252,7 +252,7 @@ struct uip_tcp_socket { }; struct uip_tx_arg { - struct virtio_net_hdr *vnet; + struct virtio_net_hdr_mrg_rxbuf *vnet; struct uip_info *info; struct uip_eth *eth; int vnet_len; @@ -332,7 +332,7 @@ static inline u16 uip_eth_hdrlen(struct uip_eth *eth) } int uip_tx(struct iovec *iov, u16 out, struct uip_info *info); -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info); +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info); int uip_init(struct uip_info *info); int uip_tx_do_ipv4_udp_dhcp(struct uip_tx_arg *arg); diff --git a/tools/kvm/include/kvm/util.h b/tools/kvm/include/kvm/util.h index 0df9f0d..6f8ac83 100644 --- a/tools/kvm/include/kvm/util.h +++ b/tools/kvm/include/kvm/util.h @@ -22,6 +22,7 @@ #include <sys/param.h> #include <sys/types.h> #include <linux/types.h> +#include <sys/uio.h> #ifdef __GNUC__ #define NORETURN __attribute__((__noreturn__)) @@ -94,4 +95,6 @@ struct kvm; void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size); void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size); +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len); + #endif /* KVM__UTIL_H */ diff --git a/tools/kvm/net/uip/core.c b/tools/kvm/net/uip/core.c index 4e5bb82..d9e9993 100644 --- a/tools/kvm/net/uip/core.c +++ b/tools/kvm/net/uip/core.c @@ -7,7 +7,7 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info *info) { - struct virtio_net_hdr *vnet; + struct virtio_net_hdr_mrg_rxbuf *vnet; struct uip_tx_arg arg; int eth_len, vnet_len; struct uip_eth *eth; @@ -74,63 +74,21 @@ int uip_tx(struct iovec *iov, u16 out, struct uip_info *info) return vnet_len + eth_len; } -int uip_rx(struct iovec *iov, u16 in, struct uip_info *info) +int uip_rx(unsigned char *buffer, u32 length, struct uip_info *info) { - struct virtio_net_hdr *vnet; - struct uip_eth *eth; struct uip_buf *buf; - int vnet_len; - int eth_len; - char *p; int len; - int cnt; - int i; /* * Sleep until there is a buffer for guest */ buf = uip_buf_get_used(info); - /* - * Fill device to guest buffer, vnet hdr fisrt - */ - vnet_len = iov[0].iov_len; - vnet = iov[0].iov_base; - if (buf->vnet_len > vnet_len) { - len = -1; - goto out; - } - memcpy(vnet, buf->vnet, buf->vnet_len); - - /* - * Then, the real eth data - * Note: Be sure buf->eth_len is not bigger than the buffer len that guest provides - */ - cnt = buf->eth_len; - p = buf->eth; - for (i = 1; i < in; i++) { - eth_len = iov[i].iov_len; - eth = iov[i].iov_base; - if (cnt > eth_len) { - memcpy(eth, p, eth_len); - cnt -= eth_len; - p += eth_len; - } else { - memcpy(eth, p, cnt); - cnt -= cnt; - break; - } - } - - if (cnt) { - pr_warning("uip_rx error"); - len = -1; - goto out; - } + memcpy(buffer, buf->vnet, buf->vnet_len); + memcpy(buffer + buf->vnet_len, buf->eth, buf->eth_len); len = buf->vnet_len + buf->eth_len; -out: uip_buf_set_free(info, buf); return len; } @@ -172,8 +130,8 @@ int uip_init(struct uip_info *info) } list_for_each_entry(buf, buf_head, list) { - buf->vnet = malloc(sizeof(struct virtio_net_hdr)); - buf->vnet_len = sizeof(struct virtio_net_hdr); + buf->vnet = malloc(sizeof(struct virtio_net_hdr_mrg_rxbuf)); + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); buf->eth = malloc(1024*64 + sizeof(struct uip_pseudo_hdr)); buf->eth_len = 1024*64 + sizeof(struct uip_pseudo_hdr); diff --git a/tools/kvm/net/uip/tcp.c b/tools/kvm/net/uip/tcp.c index 9044f40..a99b95e 100644 --- a/tools/kvm/net/uip/tcp.c +++ b/tools/kvm/net/uip/tcp.c @@ -153,7 +153,7 @@ static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_ /* * virtio_net_hdr */ - buf->vnet_len = sizeof(struct virtio_net_hdr); + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); memset(buf->vnet, 0, buf->vnet_len); buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth); diff --git a/tools/kvm/net/uip/udp.c b/tools/kvm/net/uip/udp.c index 31c417c..083c221 100644 --- a/tools/kvm/net/uip/udp.c +++ b/tools/kvm/net/uip/udp.c @@ -142,7 +142,7 @@ int uip_udp_make_pkg(struct uip_info *info, struct uip_udp_socket *sk, struct ui /* * virtio_net_hdr */ - buf->vnet_len = sizeof(struct virtio_net_hdr); + buf->vnet_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); memset(buf->vnet, 0, buf->vnet_len); buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth); diff --git a/tools/kvm/util/util.c b/tools/kvm/util/util.c index c11a15a..4e0069c 100644 --- a/tools/kvm/util/util.c +++ b/tools/kvm/util/util.c @@ -9,6 +9,7 @@ #include <sys/mman.h> #include <sys/stat.h> #include <sys/statfs.h> +#include <linux/kernel.h> static void report(const char *prefix, const char *err, va_list params) { @@ -131,3 +132,17 @@ void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 si return mmap(NULL, size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); } } + +int memcpy_toiovecend(const struct iovec *iov, int iovlen, unsigned char *kdata, size_t len) +{ + size_t copy, start_len = len; + + for (; len > 0 && iovlen > 0; ++iov, --iovlen) { + copy = min(iov->iov_len, len); + memcpy(iov->iov_base, kdata, copy); + kdata += copy; + len -= copy; + } + + return start_len - len; +} diff --git a/tools/kvm/virtio/net.c b/tools/kvm/virtio/net.c index c0a8f12..c99c5a6 100644 --- a/tools/kvm/virtio/net.c +++ b/tools/kvm/virtio/net.c @@ -32,8 +32,8 @@ struct net_dev; struct net_dev_operations { - int (*rx)(struct iovec *iov, u16 in, struct net_dev *ndev); - int (*tx)(struct iovec *iov, u16 in, struct net_dev *ndev); + int (*rx)(unsigned char *buffer, u32 length, struct net_dev *ndev); + int (*tx)(struct iovec *iov, u16 out, struct net_dev *ndev); }; struct net_dev { @@ -63,6 +63,8 @@ struct net_dev { static LIST_HEAD(ndevs); static int compat_id = -1; +#define MAX_PACKET_SIZE ((u16)(-1)) + static void *virtio_net_rx_thread(void *p) { struct iovec iov[VIRTIO_NET_QUEUE_SIZE]; @@ -90,9 +92,23 @@ static void *virtio_net_rx_thread(void *p) mutex_unlock(&ndev->io_lock[id]); while (virt_queue__available(vq)) { + unsigned char buffer[MAX_PACKET_SIZE]; + struct virtio_net_hdr_mrg_rxbuf *hdr; + + len = ndev->ops->rx(buffer, MAX_PACKET_SIZE, ndev); head = virt_queue__get_iov(vq, iov, &out, &in, kvm); - len = ndev->ops->rx(iov, in, ndev); - virt_queue__set_used_elem(vq, head, len); + hdr = (void *)iov[0].iov_base; + while (len) { + int copied; + + copied = memcpy_toiovecend(iov, in, buffer, len); + len -= copied; + hdr->num_buffers++; + virt_queue__set_used_elem(vq, head, copied); + if (len == 0) + break; + head = virt_queue__get_iov(vq, iov, &out, &in, kvm); + } /* We should interrupt guest right now, otherwise latency is huge. */ if (virtio_queue__should_signal(vq)) @@ -241,7 +257,7 @@ static bool virtio_net__tap_init(const struct virtio_net_params *params, goto fail; } - hdr_len = sizeof(struct virtio_net_hdr); + hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); if (ioctl(ndev->tap_fd, TUNSETVNETHDRSZ, &hdr_len) < 0) pr_warning("Config tap device TUNSETVNETHDRSZ error"); @@ -300,9 +316,9 @@ static inline int tap_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) return writev(ndev->tap_fd, iov, out); } -static inline int tap_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) +static inline int tap_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev) { - return readv(ndev->tap_fd, iov, in); + return read(ndev->tap_fd, buffer, length); } static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) @@ -310,9 +326,9 @@ static inline int uip_ops_tx(struct iovec *iov, u16 out, struct net_dev *ndev) return uip_tx(iov, out, &ndev->info); } -static inline int uip_ops_rx(struct iovec *iov, u16 in, struct net_dev *ndev) +static inline int uip_ops_rx(unsigned char *buffer, u32 length, struct net_dev *ndev) { - return uip_rx(iov, in, &ndev->info); + return uip_rx(buffer, length, &ndev->info); } static struct net_dev_operations tap_ops = { @@ -347,7 +363,8 @@ static u32 get_host_features(struct kvm *kvm, void *dev) | 1UL << VIRTIO_RING_F_EVENT_IDX | 1UL << VIRTIO_RING_F_INDIRECT_DESC | 1UL << VIRTIO_NET_F_CTRL_VQ - | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0); + | 1UL << (ndev->queue_pairs > 1 ? VIRTIO_NET_F_MQ : 0) + | 1UL << VIRTIO_NET_F_MRG_RXBUF; } static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
Support mergable rx buffers for virtio-net. This helps reduce the amount of memory the guest kernel has to allocate per rx vq. Signed-off-by: Sasha Levin <sasha.levin@oracle.com> --- tools/kvm/include/kvm/uip.h | 4 ++-- tools/kvm/include/kvm/util.h | 3 +++ tools/kvm/net/uip/core.c | 54 +++++--------------------------------------- tools/kvm/net/uip/tcp.c | 2 +- tools/kvm/net/uip/udp.c | 2 +- tools/kvm/util/util.c | 15 ++++++++++++ tools/kvm/virtio/net.c | 37 ++++++++++++++++++++++-------- 7 files changed, 55 insertions(+), 62 deletions(-)