diff mbox

[RFC,v2,03/10] virtio-net rsc: Chain Lookup, Packet Caching and Framework of IPv4

Message ID 1454264009-24094-4-git-send-email-wexu@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wei Xu Jan. 31, 2016, 6:13 p.m. UTC
From: Wei Xu <wei@wei-thinkpad.nay.redhat.com>

Upon a packet is arriving, a corresponding chain will be selected or created,
or be bypassed if it's not an IPv4 packets.

The callback in the chain will be invoked to call the real coalescing.

Since the coalescing is based on the TCP connection, so the packets will be
cached if there is no previous data within the same connection.

The framework of IPv4 is also introduced.

This patch depends on patch 2918cf2 (Detailed IPv4 and General TCP data
coalescing)

Signed-off-by: Wei Xu <wexu@redhat.com>
---
 hw/net/virtio-net.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 172 insertions(+), 1 deletion(-)

Comments

Michael S. Tsirkin Jan. 31, 2016, 6:50 p.m. UTC | #1
On Mon, Feb 01, 2016 at 02:13:22AM +0800, wexu@redhat.com wrote:
> From: Wei Xu <wei@wei-thinkpad.nay.redhat.com>
> 
> Upon a packet is arriving, a corresponding chain will be selected or created,
> or be bypassed if it's not an IPv4 packets.
> 
> The callback in the chain will be invoked to call the real coalescing.
> 
> Since the coalescing is based on the TCP connection, so the packets will be
> cached if there is no previous data within the same connection.
> 
> The framework of IPv4 is also introduced.
> 
> This patch depends on patch 2918cf2 (Detailed IPv4 and General TCP data
> coalescing)
> 
> Signed-off-by: Wei Xu <wexu@redhat.com>
> ---
>  hw/net/virtio-net.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 172 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 4e9458e..cfbac6d 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -14,10 +14,12 @@
>  #include "qemu/iov.h"
>  #include "hw/virtio/virtio.h"
>  #include "net/net.h"
> +#include "net/eth.h"
>  #include "net/checksum.h"
>  #include "net/tap.h"
>  #include "qemu/error-report.h"
>  #include "qemu/timer.h"
> +#include "qemu/sockets.h"
>  #include "hw/virtio/virtio-net.h"
>  #include "net/vhost_net.h"
>  #include "hw/virtio/virtio-bus.h"
> @@ -37,6 +39,21 @@
>  #define endof(container, field) \
>      (offsetof(container, field) + sizeof(((container *)0)->field))
>  
> +#define VIRTIO_HEADER   12    /* Virtio net header size */
> +#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header))
> +
> +#define MAX_VIRTIO_IP_PAYLOAD  (65535 + IP_OFFSET)
> +
> +/* Global statistics */
> +static uint32_t rsc_chain_no_mem;
> +
> +/* Switcher to enable/disable rsc */
> +static bool virtio_net_rsc_bypass;
> +
> +/* Coalesce callback for ipv4/6 */
> +typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg,
> +                                     const uint8_t *buf, size_t size);
> +

Since there are only 2 cases, it's probably better to just
open-code if (v4) -> coalesce4 else if v6 -> coalesce6

>  typedef struct VirtIOFeature {
>      uint32_t flags;
>      size_t end;
> @@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
>      return 0;
>  }
>  
> -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
> +static ssize_t virtio_net_do_receive(NetClientState *nc,
> +                                  const uint8_t *buf, size_t size)
>  {
>      VirtIONet *n = qemu_get_nic_opaque(nc);
>      VirtIONetQueue *q = virtio_net_get_subqueue(nc);
> @@ -1623,6 +1641,159 @@ static void virtio_net_rsc_cleanup(VirtIONet *n)
>      }
>  }
>  
> +static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
> +                                    const uint8_t *buf, size_t size)
> +{
> +    NetRscSeg *seg;
> +
> +    seg = g_malloc(sizeof(NetRscSeg));
> +    if (!seg) {
> +        return 0;
> +    }
> +
> +    seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD);
> +    if (!seg->buf) {
> +        goto out;
> +    }
> +
> +    memmove(seg->buf, buf, size);
> +    seg->size = size;
> +    seg->dup_ack_count = 0;
> +    seg->is_coalesced = 0;
> +    seg->nc = nc;
> +
> +    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
> +    return size;
> +
> +out:
> +    g_free(seg);
> +    return 0;
> +}
> +
> +
> +static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
> +                       NetRscSeg *seg, const uint8_t *buf, size_t size)
> +{
> +    /* This real part of this function will be introduced in next patch, just
> +    *  return a 'final' to feed the compilation. */
> +    return RSC_FINAL;
> +}
> +
> +static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
> +                const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce)
> +{
> +    int ret;
> +    NetRscSeg *seg, *nseg;
> +
> +    if (QTAILQ_EMPTY(&chain->buffers)) {
> +        if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) {
> +            return 0;
> +        } else {
> +            return size;
> +        }
> +    }
> +
> +    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
> +        ret = coalesce(chain, seg, buf, size);
> +        if (RSC_FINAL == ret) {
> +            ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
> +            QTAILQ_REMOVE(&chain->buffers, seg, next);
> +            g_free(seg->buf);
> +            g_free(seg);
> +            if (ret == 0) {
> +                /* Send failed */
> +                return 0;
> +            }
> +
> +            /* Send current packet */
> +            return virtio_net_do_receive(nc, buf, size);
> +        } else if (RSC_NO_MATCH == ret) {
> +            continue;
> +        } else {
> +            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
> +            seg->is_coalesced = 1;
> +            return size;
> +        }
> +    }
> +
> +    return virtio_net_rsc_cache_buf(chain, nc, buf, size);
> +}
> +
> +static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
> +                                      const uint8_t *buf, size_t size)
> +{
> +    NetRscChain *chain;
> +
> +    chain = (NetRscChain *)opq;
> +    return virtio_net_rsc_callback(chain, nc, buf, size,
> +                                   virtio_net_rsc_try_coalesce4);
> +}
> +
> +static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc,
> +                                                uint16_t proto)
> +{
> +    VirtIONet *n;
> +    NetRscChain *chain;
> +    NICState *nic;
> +
> +    /* Only handle IPv4/6 */
> +    if (proto != (uint16_t)ETH_P_IP) {
> +        return NULL;
> +    }
> +
> +    nic = (NICState *)nc;
> +    n = container_of(&nic, VirtIONet, nic);
> +    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
> +        if (chain->proto == proto) {
> +            return chain;
> +        }
> +    }
> +
> +    chain = g_malloc(sizeof(*chain));
> +    if (!chain) {
> +        rsc_chain_no_mem++;
> +        return NULL;
> +    }
> +
> +    chain->proto = proto;
> +    chain->do_receive = virtio_net_rsc_receive4;
> +
> +    QTAILQ_INIT(&chain->buffers);
> +    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
> +    return chain;
> +}
> +
> +static ssize_t virtio_net_rsc_receive(NetClientState *nc,
> +                                      const uint8_t *buf, size_t size)
> +{
> +    uint16_t proto;
> +    NetRscChain *chain;
> +    struct eth_header *eth;
> +
> +    if (size < IP_OFFSET) {
> +        return virtio_net_do_receive(nc, buf, size);
> +    }
> +
> +    eth = (struct eth_header *)(buf + VIRTIO_HEADER);
> +    proto = htons(eth->h_proto);
> +    chain = virtio_net_rsc_lookup_chain(nc, proto);
> +    if (!chain) {
> +        return virtio_net_do_receive(nc, buf, size);
> +    } else {
> +        return chain->do_receive(chain, nc, buf, size);
> +    }
> +}
> +
> +static ssize_t virtio_net_receive(NetClientState *nc,
> +                                  const uint8_t *buf, size_t size)
> +{
> +    if (virtio_net_rsc_bypass) {
> +        return virtio_net_do_receive(nc, buf, size);
> +    } else {
> +        return virtio_net_rsc_receive(nc, buf, size);
> +    }
> +}
> +
>  static NetClientInfo net_virtio_info = {
>      .type = NET_CLIENT_OPTIONS_KIND_NIC,
>      .size = sizeof(NICState),
> -- 
> 2.4.0
Wei Xu Feb. 1, 2016, 3:40 a.m. UTC | #2
On 02/01/2016 02:50 AM, Michael S. Tsirkin wrote:
> On Mon, Feb 01, 2016 at 02:13:22AM +0800, wexu@redhat.com wrote:
>> From: Wei Xu <wei@wei-thinkpad.nay.redhat.com>
>>
>> Upon a packet is arriving, a corresponding chain will be selected or created,
>> or be bypassed if it's not an IPv4 packets.
>>
>> The callback in the chain will be invoked to call the real coalescing.
>>
>> Since the coalescing is based on the TCP connection, so the packets will be
>> cached if there is no previous data within the same connection.
>>
>> The framework of IPv4 is also introduced.
>>
>> This patch depends on patch 2918cf2 (Detailed IPv4 and General TCP data
>> coalescing)
>>
>> Signed-off-by: Wei Xu <wexu@redhat.com>
>> ---
>>   hw/net/virtio-net.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 172 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>> index 4e9458e..cfbac6d 100644
>> --- a/hw/net/virtio-net.c
>> +++ b/hw/net/virtio-net.c
>> @@ -14,10 +14,12 @@
>>   #include "qemu/iov.h"
>>   #include "hw/virtio/virtio.h"
>>   #include "net/net.h"
>> +#include "net/eth.h"
>>   #include "net/checksum.h"
>>   #include "net/tap.h"
>>   #include "qemu/error-report.h"
>>   #include "qemu/timer.h"
>> +#include "qemu/sockets.h"
>>   #include "hw/virtio/virtio-net.h"
>>   #include "net/vhost_net.h"
>>   #include "hw/virtio/virtio-bus.h"
>> @@ -37,6 +39,21 @@
>>   #define endof(container, field) \
>>       (offsetof(container, field) + sizeof(((container *)0)->field))
>>   
>> +#define VIRTIO_HEADER   12    /* Virtio net header size */
>> +#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header))
>> +
>> +#define MAX_VIRTIO_IP_PAYLOAD  (65535 + IP_OFFSET)
>> +
>> +/* Global statistics */
>> +static uint32_t rsc_chain_no_mem;
>> +
>> +/* Switcher to enable/disable rsc */
>> +static bool virtio_net_rsc_bypass;
>> +
>> +/* Coalesce callback for ipv4/6 */
>> +typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg,
>> +                                     const uint8_t *buf, size_t size);
>> +
> Since there are only 2 cases, it's probably better to just
> open-code if (v4) -> coalesce4 else if v6 -> coalesce6
OK, thanks mst.

Wei
>
>>   typedef struct VirtIOFeature {
>>       uint32_t flags;
>>       size_t end;
>> @@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
>>       return 0;
>>   }
>>   
>> -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
>> +static ssize_t virtio_net_do_receive(NetClientState *nc,
>> +                                  const uint8_t *buf, size_t size)
>>   {
>>       VirtIONet *n = qemu_get_nic_opaque(nc);
>>       VirtIONetQueue *q = virtio_net_get_subqueue(nc);
>> @@ -1623,6 +1641,159 @@ static void virtio_net_rsc_cleanup(VirtIONet *n)
>>       }
>>   }
>>   
>> +static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
>> +                                    const uint8_t *buf, size_t size)
>> +{
>> +    NetRscSeg *seg;
>> +
>> +    seg = g_malloc(sizeof(NetRscSeg));
>> +    if (!seg) {
>> +        return 0;
>> +    }
>> +
>> +    seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD);
>> +    if (!seg->buf) {
>> +        goto out;
>> +    }
>> +
>> +    memmove(seg->buf, buf, size);
>> +    seg->size = size;
>> +    seg->dup_ack_count = 0;
>> +    seg->is_coalesced = 0;
>> +    seg->nc = nc;
>> +
>> +    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
>> +    return size;
>> +
>> +out:
>> +    g_free(seg);
>> +    return 0;
>> +}
>> +
>> +
>> +static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
>> +                       NetRscSeg *seg, const uint8_t *buf, size_t size)
>> +{
>> +    /* This real part of this function will be introduced in next patch, just
>> +    *  return a 'final' to feed the compilation. */
>> +    return RSC_FINAL;
>> +}
>> +
>> +static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
>> +                const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce)
>> +{
>> +    int ret;
>> +    NetRscSeg *seg, *nseg;
>> +
>> +    if (QTAILQ_EMPTY(&chain->buffers)) {
>> +        if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) {
>> +            return 0;
>> +        } else {
>> +            return size;
>> +        }
>> +    }
>> +
>> +    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
>> +        ret = coalesce(chain, seg, buf, size);
>> +        if (RSC_FINAL == ret) {
>> +            ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
>> +            QTAILQ_REMOVE(&chain->buffers, seg, next);
>> +            g_free(seg->buf);
>> +            g_free(seg);
>> +            if (ret == 0) {
>> +                /* Send failed */
>> +                return 0;
>> +            }
>> +
>> +            /* Send current packet */
>> +            return virtio_net_do_receive(nc, buf, size);
>> +        } else if (RSC_NO_MATCH == ret) {
>> +            continue;
>> +        } else {
>> +            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
>> +            seg->is_coalesced = 1;
>> +            return size;
>> +        }
>> +    }
>> +
>> +    return virtio_net_rsc_cache_buf(chain, nc, buf, size);
>> +}
>> +
>> +static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
>> +                                      const uint8_t *buf, size_t size)
>> +{
>> +    NetRscChain *chain;
>> +
>> +    chain = (NetRscChain *)opq;
>> +    return virtio_net_rsc_callback(chain, nc, buf, size,
>> +                                   virtio_net_rsc_try_coalesce4);
>> +}
>> +
>> +static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc,
>> +                                                uint16_t proto)
>> +{
>> +    VirtIONet *n;
>> +    NetRscChain *chain;
>> +    NICState *nic;
>> +
>> +    /* Only handle IPv4/6 */
>> +    if (proto != (uint16_t)ETH_P_IP) {
>> +        return NULL;
>> +    }
>> +
>> +    nic = (NICState *)nc;
>> +    n = container_of(&nic, VirtIONet, nic);
>> +    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
>> +        if (chain->proto == proto) {
>> +            return chain;
>> +        }
>> +    }
>> +
>> +    chain = g_malloc(sizeof(*chain));
>> +    if (!chain) {
>> +        rsc_chain_no_mem++;
>> +        return NULL;
>> +    }
>> +
>> +    chain->proto = proto;
>> +    chain->do_receive = virtio_net_rsc_receive4;
>> +
>> +    QTAILQ_INIT(&chain->buffers);
>> +    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
>> +    return chain;
>> +}
>> +
>> +static ssize_t virtio_net_rsc_receive(NetClientState *nc,
>> +                                      const uint8_t *buf, size_t size)
>> +{
>> +    uint16_t proto;
>> +    NetRscChain *chain;
>> +    struct eth_header *eth;
>> +
>> +    if (size < IP_OFFSET) {
>> +        return virtio_net_do_receive(nc, buf, size);
>> +    }
>> +
>> +    eth = (struct eth_header *)(buf + VIRTIO_HEADER);
>> +    proto = htons(eth->h_proto);
>> +    chain = virtio_net_rsc_lookup_chain(nc, proto);
>> +    if (!chain) {
>> +        return virtio_net_do_receive(nc, buf, size);
>> +    } else {
>> +        return chain->do_receive(chain, nc, buf, size);
>> +    }
>> +}
>> +
>> +static ssize_t virtio_net_receive(NetClientState *nc,
>> +                                  const uint8_t *buf, size_t size)
>> +{
>> +    if (virtio_net_rsc_bypass) {
>> +        return virtio_net_do_receive(nc, buf, size);
>> +    } else {
>> +        return virtio_net_rsc_receive(nc, buf, size);
>> +    }
>> +}
>> +
>>   static NetClientInfo net_virtio_info = {
>>       .type = NET_CLIENT_OPTIONS_KIND_NIC,
>>       .size = sizeof(NICState),
>> -- 
>> 2.4.0
Jason Wang Feb. 1, 2016, 5:55 a.m. UTC | #3
On 02/01/2016 02:13 AM, wexu@redhat.com wrote:
> From: Wei Xu <wei@wei-thinkpad.nay.redhat.com>
>
> Upon a packet is arriving, a corresponding chain will be selected or created,
> or be bypassed if it's not an IPv4 packets.
>
> The callback in the chain will be invoked to call the real coalescing.
>
> Since the coalescing is based on the TCP connection, so the packets will be
> cached if there is no previous data within the same connection.
>
> The framework of IPv4 is also introduced.
>
> This patch depends on patch 2918cf2 (Detailed IPv4 and General TCP data
> coalescing)

Then looks like the order needs to be changed?

>
> Signed-off-by: Wei Xu <wexu@redhat.com>
> ---
>  hw/net/virtio-net.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 172 insertions(+), 1 deletion(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 4e9458e..cfbac6d 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -14,10 +14,12 @@
>  #include "qemu/iov.h"
>  #include "hw/virtio/virtio.h"
>  #include "net/net.h"
> +#include "net/eth.h"
>  #include "net/checksum.h"
>  #include "net/tap.h"
>  #include "qemu/error-report.h"
>  #include "qemu/timer.h"
> +#include "qemu/sockets.h"
>  #include "hw/virtio/virtio-net.h"
>  #include "net/vhost_net.h"
>  #include "hw/virtio/virtio-bus.h"
> @@ -37,6 +39,21 @@
>  #define endof(container, field) \
>      (offsetof(container, field) + sizeof(((container *)0)->field))
>  
> +#define VIRTIO_HEADER   12    /* Virtio net header size */

This looks wrong if mrg_rxbuf (VIRTIO_NET_F_MRG_RXBUF) is off.

> +#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header))
> +
> +#define MAX_VIRTIO_IP_PAYLOAD  (65535 + IP_OFFSET)
> +
> +/* Global statistics */
> +static uint32_t rsc_chain_no_mem;

This is meaningless, see below comments.

> +
> +/* Switcher to enable/disable rsc */
> +static bool virtio_net_rsc_bypass;
> +
> +/* Coalesce callback for ipv4/6 */
> +typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg,
> +                                     const uint8_t *buf, size_t size);
> +
>  typedef struct VirtIOFeature {
>      uint32_t flags;
>      size_t end;
> @@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
>      return 0;
>  }
>  
> -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
> +static ssize_t virtio_net_do_receive(NetClientState *nc,
> +                                  const uint8_t *buf, size_t size)
>  {
>      VirtIONet *n = qemu_get_nic_opaque(nc);
>      VirtIONetQueue *q = virtio_net_get_subqueue(nc);
> @@ -1623,6 +1641,159 @@ static void virtio_net_rsc_cleanup(VirtIONet *n)
>      }
>  }
>  
> +static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
> +                                    const uint8_t *buf, size_t size)
> +{
> +    NetRscSeg *seg;
> +
> +    seg = g_malloc(sizeof(NetRscSeg));
> +    if (!seg) {
> +        return 0;
> +    }

g_malloc() can't fail, no need to check if it succeeded.

> +
> +    seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD);
> +    if (!seg->buf) {
> +        goto out;
> +    }
> +
> +    memmove(seg->buf, buf, size);
> +    seg->size = size;
> +    seg->dup_ack_count = 0;
> +    seg->is_coalesced = 0;
> +    seg->nc = nc;
> +
> +    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
> +    return size;
> +
> +out:
> +    g_free(seg);
> +    return 0;
> +}
> +
> +
> +static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
> +                       NetRscSeg *seg, const uint8_t *buf, size_t size)
> +{
> +    /* This real part of this function will be introduced in next patch, just
> +    *  return a 'final' to feed the compilation. */
> +    return RSC_FINAL;
> +}
> +
> +static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
> +                const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce)
> +{

Looks like this function was called directly, so "callback" suffix is
not accurate.

> +    int ret;
> +    NetRscSeg *seg, *nseg;
> +
> +    if (QTAILQ_EMPTY(&chain->buffers)) {
> +        if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) {
> +            return 0;
> +        } else {
> +            return size;
> +        }
> +    }
> +
> +    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
> +        ret = coalesce(chain, seg, buf, size);
> +        if (RSC_FINAL == ret) {

Let's use "ret == RSC_FINAL" for a consistent coding style with other
qemu codes.

> +            ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
> +            QTAILQ_REMOVE(&chain->buffers, seg, next);
> +            g_free(seg->buf);
> +            g_free(seg);
> +            if (ret == 0) {
> +                /* Send failed */
> +                return 0;
> +            }
> +
> +            /* Send current packet */
> +            return virtio_net_do_receive(nc, buf, size);
> +        } else if (RSC_NO_MATCH == ret) {
> +            continue;
> +        } else {
> +            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
> +            seg->is_coalesced = 1;
> +            return size;
> +        }
> +    }
> +
> +    return virtio_net_rsc_cache_buf(chain, nc, buf, size);

Maybe you can move the seg traversing info coalesce() function? This can
greatly simplify the codes above? (E.g no need to call
virtio_net_rsc_cache_buf() in two places?)

> +}
> +
> +static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
> +                                      const uint8_t *buf, size_t size)
> +{
> +    NetRscChain *chain;
> +
> +    chain = (NetRscChain *)opq;
> +    return virtio_net_rsc_callback(chain, nc, buf, size,
> +                                   virtio_net_rsc_try_coalesce4);
> +}
> +
> +static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc,
> +                                                uint16_t proto)
> +{
> +    VirtIONet *n;
> +    NetRscChain *chain;
> +    NICState *nic;
> +
> +    /* Only handle IPv4/6 */
> +    if (proto != (uint16_t)ETH_P_IP) {

The comments is inconsistent with code, the code can only handle IPv4
currently.

> +        return NULL;
> +    }
> +
> +    nic = (NICState *)nc;

This cast is wrong for multiqueue backend. You can refer the exist
virtio_net_receive() for how to vet VirtIONet structure. E.g:

...
    VirtIONet *n = qemu_get_nic_opaque(nc);
...

> +    n = container_of(&nic, VirtIONet, nic);
> +    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
> +        if (chain->proto == proto) {
> +            return chain;
> +        }
> +    }
> +
> +    chain = g_malloc(sizeof(*chain));
> +    if (!chain) {
> +        rsc_chain_no_mem++;

Since g_malloc() can't fail, this is meaningless.

> +        return NULL;
> +    }
> +
> +    chain->proto = proto;
> +    chain->do_receive = virtio_net_rsc_receive4;
> +
> +    QTAILQ_INIT(&chain->buffers);
> +    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
> +    return chain;
> +}

Better to split the chain initialization from lookup. And we can
initialize ipv4 chain during initialization.

> +
> +static ssize_t virtio_net_rsc_receive(NetClientState *nc,
> +                                      const uint8_t *buf, size_t size)
> +{
> +    uint16_t proto;
> +    NetRscChain *chain;
> +    struct eth_header *eth;
> +
> +    if (size < IP_OFFSET) {
> +        return virtio_net_do_receive(nc, buf, size);
> +    }
> +
> +    eth = (struct eth_header *)(buf + VIRTIO_HEADER);
> +    proto = htons(eth->h_proto);
> +    chain = virtio_net_rsc_lookup_chain(nc, proto);
> +    if (!chain) {
> +        return virtio_net_do_receive(nc, buf, size);
> +    } else {
> +        return chain->do_receive(chain, nc, buf, size);
> +    }
> +}
> +
> +static ssize_t virtio_net_receive(NetClientState *nc,
> +                                  const uint8_t *buf, size_t size)
> +{
> +    if (virtio_net_rsc_bypass) {
> +        return virtio_net_do_receive(nc, buf, size);
> +    } else {
> +        return virtio_net_rsc_receive(nc, buf, size);
> +    }
> +}
> +
>  static NetClientInfo net_virtio_info = {
>      .type = NET_CLIENT_OPTIONS_KIND_NIC,
>      .size = sizeof(NICState),
Wei Xu Feb. 1, 2016, 8:02 a.m. UTC | #4
On 02/01/2016 01:55 PM, Jason Wang wrote:

>
> On 02/01/2016 02:13 AM, wexu@redhat.com wrote:
>> From: Wei Xu <wei@wei-thinkpad.nay.redhat.com>
>>
>> Upon a packet is arriving, a corresponding chain will be selected or created,
>> or be bypassed if it's not an IPv4 packets.
>>
>> The callback in the chain will be invoked to call the real coalescing.
>>
>> Since the coalescing is based on the TCP connection, so the packets will be
>> cached if there is no previous data within the same connection.
>>
>> The framework of IPv4 is also introduced.
>>
>> This patch depends on patch 2918cf2 (Detailed IPv4 and General TCP data
>> coalescing)
> Then looks like the order needs to be changed?

OK, as mentioned in other feedbacks, some of the patches should be merged, will adjust the patch set again, thanks.


>> Signed-off-by: Wei Xu <wexu@redhat.com>
>> ---
>>   hw/net/virtio-net.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 172 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>> index 4e9458e..cfbac6d 100644
>> --- a/hw/net/virtio-net.c
>> +++ b/hw/net/virtio-net.c
>> @@ -14,10 +14,12 @@
>>   #include "qemu/iov.h"
>>   #include "hw/virtio/virtio.h"
>>   #include "net/net.h"
>> +#include "net/eth.h"
>>   #include "net/checksum.h"
>>   #include "net/tap.h"
>>   #include "qemu/error-report.h"
>>   #include "qemu/timer.h"
>> +#include "qemu/sockets.h"
>>   #include "hw/virtio/virtio-net.h"
>>   #include "net/vhost_net.h"
>>   #include "hw/virtio/virtio-bus.h"
>> @@ -37,6 +39,21 @@
>>   #define endof(container, field) \
>>       (offsetof(container, field) + sizeof(((container *)0)->field))
>>   
>> +#define VIRTIO_HEADER   12    /* Virtio net header size */
> This looks wrong if mrg_rxbuf (VIRTIO_NET_F_MRG_RXBUF) is off

OK.

>
>> +#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header))
>> +
>> +#define MAX_VIRTIO_IP_PAYLOAD  (65535 + IP_OFFSET)
>> +
>> +/* Global statistics */
>> +static uint32_t rsc_chain_no_mem;
> This is meaningless, see below comments.

Yes, should remove this.

>
>> +
>> +/* Switcher to enable/disable rsc */
>> +static bool virtio_net_rsc_bypass;
>> +
>> +/* Coalesce callback for ipv4/6 */
>> +typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg,
>> +                                     const uint8_t *buf, size_t size);
>> +
>>   typedef struct VirtIOFeature {
>>       uint32_t flags;
>>       size_t end;
>> @@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
>>       return 0;
>>   }
>>   
>> -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
>> +static ssize_t virtio_net_do_receive(NetClientState *nc,
>> +                                  const uint8_t *buf, size_t size)
>>   {
>>       VirtIONet *n = qemu_get_nic_opaque(nc);
>>       VirtIONetQueue *q = virtio_net_get_subqueue(nc);
>> @@ -1623,6 +1641,159 @@ static void virtio_net_rsc_cleanup(VirtIONet *n)
>>       }
>>   }
>>   
>> +static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
>> +                                    const uint8_t *buf, size_t size)
>> +{
>> +    NetRscSeg *seg;
>> +
>> +    seg = g_malloc(sizeof(NetRscSeg));
>> +    if (!seg) {
>> +        return 0;
>> +    }
> g_malloc() can't fail, no need to check if it succeeded.

OK.

>
>> +
>> +    seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD);
>> +    if (!seg->buf) {
>> +        goto out;
>> +    }
>> +
>> +    memmove(seg->buf, buf, size);
>> +    seg->size = size;
>> +    seg->dup_ack_count = 0;
>> +    seg->is_coalesced = 0;
>> +    seg->nc = nc;
>> +
>> +    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
>> +    return size;
>> +
>> +out:
>> +    g_free(seg);
>> +    return 0;
>> +}
>> +
>> +
>> +static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
>> +                       NetRscSeg *seg, const uint8_t *buf, size_t size)
>> +{
>> +    /* This real part of this function will be introduced in next patch, just
>> +    *  return a 'final' to feed the compilation. */
>> +    return RSC_FINAL;
>> +}
>> +
>> +static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
>> +                const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce)
>> +{
> Looks like this function was called directly, so "callback" suffix is
> not accurate.

OK.

>
>> +    int ret;
>> +    NetRscSeg *seg, *nseg;
>> +
>> +    if (QTAILQ_EMPTY(&chain->buffers)) {
>> +        if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) {
>> +            return 0;
>> +        } else {
>> +            return size;
>> +        }
>> +    }
>> +
>> +    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
>> +        ret = coalesce(chain, seg, buf, size);
>> +        if (RSC_FINAL == ret) {
> Let's use "ret == RSC_FINAL" for a consistent coding style with other
> qemu codes.

OK.

>
>> +            ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
>> +            QTAILQ_REMOVE(&chain->buffers, seg, next);
>> +            g_free(seg->buf);
>> +            g_free(seg);
>> +            if (ret == 0) {
>> +                /* Send failed */
>> +                return 0;
>> +            }
>> +
>> +            /* Send current packet */
>> +            return virtio_net_do_receive(nc, buf, size);
>> +        } else if (RSC_NO_MATCH == ret) {
>> +            continue;
>> +        } else {
>> +            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
>> +            seg->is_coalesced = 1;
>> +            return size;
>> +        }
>> +    }
>> +
>> +    return virtio_net_rsc_cache_buf(chain, nc, buf, size);
> Maybe you can move the seg traversing info coalesce() function? This can
> greatly simplify the codes above? (E.g no need to call
> virtio_net_rsc_cache_buf() in two places?)

Good idea.

>
>> +}
>> +
>> +static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
>> +                                      const uint8_t *buf, size_t size)
>> +{
>> +    NetRscChain *chain;
>> +
>> +    chain = (NetRscChain *)opq;
>> +    return virtio_net_rsc_callback(chain, nc, buf, size,
>> +                                   virtio_net_rsc_try_coalesce4);
>> +}
>> +
>> +static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc,
>> +                                                uint16_t proto)
>> +{
>> +    VirtIONet *n;
>> +    NetRscChain *chain;
>> +    NICState *nic;
>> +
>> +    /* Only handle IPv4/6 */
>> +    if (proto != (uint16_t)ETH_P_IP) {
> The comments is inconsistent with code, the code can only handle IPv4
> currently.

OK.

>
>> +        return NULL;
>> +    }
>> +
>> +    nic = (NICState *)nc;
> This cast is wrong for multiqueue backend. You can refer the exist
> virtio_net_receive() for how to vet VirtIONet structure. E.g:
>
> ...
>      VirtIONet *n = qemu_get_nic_opaque(nc);

OK, will check it out.

> ...
>
>> +    n = container_of(&nic, VirtIONet, nic);
>> +    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
>> +        if (chain->proto == proto) {
>> +            return chain;
>> +        }
>> +    }
>> +
>> +    chain = g_malloc(sizeof(*chain));
>> +    if (!chain) {
>> +        rsc_chain_no_mem++;
> Since g_malloc() can't fail, this is meaningless.

OK.

>
>> +        return NULL;
>> +    }
>> +
>> +    chain->proto = proto;
>> +    chain->do_receive = virtio_net_rsc_receive4;
>> +
>> +    QTAILQ_INIT(&chain->buffers);
>> +    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
>> +    return chain;
>> +}
> Better to split the chain initialization from lookup. And we can
> initialize ipv4 chain during initialization.

Since the allocation happens really seldom, is it ok to keep the mechanism to make the logic clean?

>
>> +
>> +static ssize_t virtio_net_rsc_receive(NetClientState *nc,
>> +                                      const uint8_t *buf, size_t size)
>> +{
>> +    uint16_t proto;
>> +    NetRscChain *chain;
>> +    struct eth_header *eth;
>> +
>> +    if (size < IP_OFFSET) {
>> +        return virtio_net_do_receive(nc, buf, size);
>> +    }
>> +
>> +    eth = (struct eth_header *)(buf + VIRTIO_HEADER);
>> +    proto = htons(eth->h_proto);
>> +    chain = virtio_net_rsc_lookup_chain(nc, proto);
>> +    if (!chain) {
>> +        return virtio_net_do_receive(nc, buf, size);
>> +    } else {
>> +        return chain->do_receive(chain, nc, buf, size);
>> +    }
>> +}
>> +
>> +static ssize_t virtio_net_receive(NetClientState *nc,
>> +                                  const uint8_t *buf, size_t size)
>> +{
>> +    if (virtio_net_rsc_bypass) {
>> +        return virtio_net_do_receive(nc, buf, size);
>> +    } else {
>> +        return virtio_net_rsc_receive(nc, buf, size);
>> +    }
>> +}
>> +
>>   static NetClientInfo net_virtio_info = {
>>       .type = NET_CLIENT_OPTIONS_KIND_NIC,
>>       .size = sizeof(NICState),
>
>
Jason Wang Feb. 1, 2016, 9:22 a.m. UTC | #5
On 02/01/2016 04:02 PM, Wei Xu wrote:

[...]
>>
>>> +        return NULL;
>>> +    }
>>> +
>>> +    chain->proto = proto;
>>> +    chain->do_receive = virtio_net_rsc_receive4;
>>> +
>>> +    QTAILQ_INIT(&chain->buffers);
>>> +    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
>>> +    return chain;
>>> +}
>> Better to split the chain initialization from lookup. And we can
>> initialize ipv4 chain during initialization.
>
> Since the allocation happens really seldom, is it ok to keep the
> mechanism to make the logic clean? 

Ok for now.
diff mbox

Patch

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 4e9458e..cfbac6d 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -14,10 +14,12 @@ 
 #include "qemu/iov.h"
 #include "hw/virtio/virtio.h"
 #include "net/net.h"
+#include "net/eth.h"
 #include "net/checksum.h"
 #include "net/tap.h"
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
+#include "qemu/sockets.h"
 #include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "hw/virtio/virtio-bus.h"
@@ -37,6 +39,21 @@ 
 #define endof(container, field) \
     (offsetof(container, field) + sizeof(((container *)0)->field))
 
+#define VIRTIO_HEADER   12    /* Virtio net header size */
+#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header))
+
+#define MAX_VIRTIO_IP_PAYLOAD  (65535 + IP_OFFSET)
+
+/* Global statistics */
+static uint32_t rsc_chain_no_mem;
+
+/* Switcher to enable/disable rsc */
+static bool virtio_net_rsc_bypass;
+
+/* Coalesce callback for ipv4/6 */
+typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg,
+                                     const uint8_t *buf, size_t size);
+
 typedef struct VirtIOFeature {
     uint32_t flags;
     size_t end;
@@ -1019,7 +1036,8 @@  static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
     return 0;
 }
 
-static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t virtio_net_do_receive(NetClientState *nc,
+                                  const uint8_t *buf, size_t size)
 {
     VirtIONet *n = qemu_get_nic_opaque(nc);
     VirtIONetQueue *q = virtio_net_get_subqueue(nc);
@@ -1623,6 +1641,159 @@  static void virtio_net_rsc_cleanup(VirtIONet *n)
     }
 }
 
+static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
+                                    const uint8_t *buf, size_t size)
+{
+    NetRscSeg *seg;
+
+    seg = g_malloc(sizeof(NetRscSeg));
+    if (!seg) {
+        return 0;
+    }
+
+    seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD);
+    if (!seg->buf) {
+        goto out;
+    }
+
+    memmove(seg->buf, buf, size);
+    seg->size = size;
+    seg->dup_ack_count = 0;
+    seg->is_coalesced = 0;
+    seg->nc = nc;
+
+    QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
+    return size;
+
+out:
+    g_free(seg);
+    return 0;
+}
+
+
+static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
+                       NetRscSeg *seg, const uint8_t *buf, size_t size)
+{
+    /* This real part of this function will be introduced in next patch, just
+    *  return a 'final' to feed the compilation. */
+    return RSC_FINAL;
+}
+
+static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
+                const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce)
+{
+    int ret;
+    NetRscSeg *seg, *nseg;
+
+    if (QTAILQ_EMPTY(&chain->buffers)) {
+        if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) {
+            return 0;
+        } else {
+            return size;
+        }
+    }
+
+    QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
+        ret = coalesce(chain, seg, buf, size);
+        if (RSC_FINAL == ret) {
+            ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
+            QTAILQ_REMOVE(&chain->buffers, seg, next);
+            g_free(seg->buf);
+            g_free(seg);
+            if (ret == 0) {
+                /* Send failed */
+                return 0;
+            }
+
+            /* Send current packet */
+            return virtio_net_do_receive(nc, buf, size);
+        } else if (RSC_NO_MATCH == ret) {
+            continue;
+        } else {
+            /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
+            seg->is_coalesced = 1;
+            return size;
+        }
+    }
+
+    return virtio_net_rsc_cache_buf(chain, nc, buf, size);
+}
+
+static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
+                                      const uint8_t *buf, size_t size)
+{
+    NetRscChain *chain;
+
+    chain = (NetRscChain *)opq;
+    return virtio_net_rsc_callback(chain, nc, buf, size,
+                                   virtio_net_rsc_try_coalesce4);
+}
+
+static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc,
+                                                uint16_t proto)
+{
+    VirtIONet *n;
+    NetRscChain *chain;
+    NICState *nic;
+
+    /* Only handle IPv4/6 */
+    if (proto != (uint16_t)ETH_P_IP) {
+        return NULL;
+    }
+
+    nic = (NICState *)nc;
+    n = container_of(&nic, VirtIONet, nic);
+    QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
+        if (chain->proto == proto) {
+            return chain;
+        }
+    }
+
+    chain = g_malloc(sizeof(*chain));
+    if (!chain) {
+        rsc_chain_no_mem++;
+        return NULL;
+    }
+
+    chain->proto = proto;
+    chain->do_receive = virtio_net_rsc_receive4;
+
+    QTAILQ_INIT(&chain->buffers);
+    QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
+    return chain;
+}
+
+static ssize_t virtio_net_rsc_receive(NetClientState *nc,
+                                      const uint8_t *buf, size_t size)
+{
+    uint16_t proto;
+    NetRscChain *chain;
+    struct eth_header *eth;
+
+    if (size < IP_OFFSET) {
+        return virtio_net_do_receive(nc, buf, size);
+    }
+
+    eth = (struct eth_header *)(buf + VIRTIO_HEADER);
+    proto = htons(eth->h_proto);
+    chain = virtio_net_rsc_lookup_chain(nc, proto);
+    if (!chain) {
+        return virtio_net_do_receive(nc, buf, size);
+    } else {
+        return chain->do_receive(chain, nc, buf, size);
+    }
+}
+
+static ssize_t virtio_net_receive(NetClientState *nc,
+                                  const uint8_t *buf, size_t size)
+{
+    if (virtio_net_rsc_bypass) {
+        return virtio_net_do_receive(nc, buf, size);
+    } else {
+        return virtio_net_rsc_receive(nc, buf, size);
+    }
+}
+
 static NetClientInfo net_virtio_info = {
     .type = NET_CLIENT_OPTIONS_KIND_NIC,
     .size = sizeof(NICState),