diff mbox

[v10,26/31] COLO proxy: implement setup/teardown of COLO proxy module

Message ID 1456109555-28299-27-git-send-email-wency@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wen Congyang Feb. 22, 2016, 2:52 a.m. UTC
setup/teardown of COLO proxy module.
we use netlink to communicate with proxy module.
About colo-proxy module:
https://lkml.org/lkml/2015/6/18/32
How to use:
http://wiki.xen.org/wiki/COLO_-_Coarse_Grain_Lock_Stepping

Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
---
 tools/libxl/Makefile           |   1 +
 tools/libxl/libxl_colo.h       |   2 +
 tools/libxl/libxl_colo_proxy.c | 230 +++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_internal.h   |  15 +++
 4 files changed, 248 insertions(+)
 create mode 100644 tools/libxl/libxl_colo_proxy.c

Comments

Wei Liu March 2, 2016, 3:04 p.m. UTC | #1
On Mon, Feb 22, 2016 at 10:52:30AM +0800, Wen Congyang wrote:
> setup/teardown of COLO proxy module.
> we use netlink to communicate with proxy module.
> About colo-proxy module:
> https://lkml.org/lkml/2015/6/18/32
> How to use:
> http://wiki.xen.org/wiki/COLO_-_Coarse_Grain_Lock_Stepping
> 
> Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn>
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>

I'm tempted to just ack this patch as well.

Wei.
Konrad Rzeszutek Wilk March 11, 2016, 10:25 p.m. UTC | #2
> +extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
> +extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
>  #endif
> diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
> new file mode 100644
> index 0000000..e07e640
> --- /dev/null
> +++ b/tools/libxl/libxl_colo_proxy.c
> @@ -0,0 +1,230 @@
> +/*
> + * Copyright (C) 2015 FUJITSU LIMITED

2016?
> + * Author: Yang Hongyang <hongyang.yang@easystack.cn>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU Lesser General Public License as published
> + * by the Free Software Foundation; version 2.1 only. with the special
> + * exception on linking described in file LICENSE.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU Lesser General Public License for more details.
> + */
> +
> +#include "libxl_osdeps.h" /* must come before any other headers */
> +
> +#include "libxl_internal.h"
> +#include "libxl_colo.h"
> +#include <linux/netlink.h>
> +
> +#define NETLINK_COLO 28

Can you include a comment why 28? Why not 31415?

> +
> +enum colo_netlink_op {
> +    COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1),
> +    COLO_CHECKPOINT,
> +    COLO_FAILOVER,
> +    COLO_PROXY_INIT,
> +    COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */
> +};
> +
> +/* ========= colo-proxy: helper functions ========== */
> +
> +static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff,
> +                           uint64_t size, int type)
> +{
> +    struct sockaddr_nl sa;
> +    struct nlmsghdr msg;
> +    struct iovec iov;
> +    struct msghdr mh;
> +    int ret;
> +
> +    STATE_AO_GC(cps->ao);
> +
> +    memset(&sa, 0, sizeof(sa));
> +    sa.nl_family = AF_NETLINK;
> +    sa.nl_pid = 0;
> +    sa.nl_groups = 0;
> +
> +    msg.nlmsg_len = NLMSG_SPACE(0);
> +    msg.nlmsg_flags = NLM_F_REQUEST;
> +    if (type == COLO_PROXY_INIT) {
> +        msg.nlmsg_flags |= NLM_F_ACK;
> +    }

I don't think you need the { }?

Ah, yup:
                                                                                
5. Block structure                                                              
                                                                                
Every indented statement is braced apart from blocks that contain just          
one statement.                                                                  

> +    msg.nlmsg_seq = 0;
> +    /* This is untrusty */

Umm, can you be more specific pls?

> +    msg.nlmsg_pid = cps->index;
> +    msg.nlmsg_type = type;
> +
> +    iov.iov_base = &msg;
> +    iov.iov_len = msg.nlmsg_len;
> +
> +    mh.msg_name = &sa;
> +    mh.msg_namelen = sizeof(sa);
> +    mh.msg_iov = &iov;
> +    mh.msg_iovlen = 1;
> +    mh.msg_control = NULL;
> +    mh.msg_controllen = 0;
> +    mh.msg_flags = 0;
> +
> +    ret = sendmsg(cps->sock_fd, &mh, 0);
> +    if (ret <= 0) {
> +        LOG(ERROR, "can't send msg to kernel by netlink: %s",
> +            strerror(errno));
> +    }
> +
> +    return ret;
> +}
> +
> +/* error: return -1, otherwise return 0 */
> +static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff,
> +                               unsigned int timeout_us)
> +{
> +    struct sockaddr_nl sa;
> +    struct iovec iov;
> +    struct msghdr mh = {
> +        .msg_name = &sa,
> +        .msg_namelen = sizeof(sa),
> +        .msg_iov = &iov,
> +        .msg_iovlen = 1,
> +    };
> +    struct timeval tv;
> +    uint32_t size = 16384;
> +    int64_t len = 0;
> +    int ret;
> +
> +    STATE_AO_GC(cps->ao);
> +    uint8_t *tmp = libxl__malloc(NOGC, size);
> +
> +    if (timeout_us) {
> +        tv.tv_sec = timeout_us / 1000000;
> +        tv.tv_usec = timeout_us % 1000000;
> +        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
> +    }
> +
> +    iov.iov_base = tmp;
> +    iov.iov_len = size;
> +next:
> +    ret = recvmsg(cps->sock_fd, &mh, 0);
> +    if (ret <= 0) {
> +        if (errno != EAGAIN && errno != EWOULDBLOCK)

-EINTR ?

> +            LOGE(ERROR, "can't recv msg from kernel by netlink");
> +        goto err;
> +    }
> +
> +    len += ret;
> +    if (mh.msg_flags & MSG_TRUNC) {
> +        size += 16384;
> +        tmp = libxl__realloc(NOGC, tmp, size);

You really should check 'tmp'.

If this loop continues on for some time the 'size' may be
in milions and this realloc will fail.

> +        iov.iov_base = tmp + len;
> +        iov.iov_len = size - len;
> +        goto next;

> +    }
> +
> +    *buff = tmp;
> +    ret = len;
> +    goto out;
> +
> +err:
> +    free(tmp);
> +    *buff = NULL;
> +
> +out:
> +    if (timeout_us) {
> +        tv.tv_sec = 0;
> +        tv.tv_usec = 0;
> +        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
> +    }
> +    return ret;
> +}
> +
> +/* ========= colo-proxy: setup and teardown ========== */
> +
> +int colo_proxy_setup(libxl__colo_proxy_state *cps)
> +{
> +    int skfd = 0;
> +    struct sockaddr_nl sa;
> +    struct nlmsghdr *h;
> +    int i = 1;
> +    int ret = ERROR_FAIL;
> +    uint8_t *buff = NULL;
> +    int64_t size;
> +
> +    STATE_AO_GC(cps->ao);
> +
> +    skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO);
> +    if (skfd < 0) {
> +        LOG(ERROR, "can not create a netlink socket: %s", strerror(errno));
> +        goto out;
> +    }
> +    cps->sock_fd = skfd;
> +    memset(&sa, 0, sizeof(sa));
> +    sa.nl_family = AF_NETLINK;
> +    sa.nl_groups = 0;
> +retry:
> +    sa.nl_pid = i++;
> +
> +    if (i > 10) {
> +        LOG(ERROR, "netlink bind error");
> +        goto out;
> +    }
> +
> +    ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa));
> +    if (ret < 0 && errno == EADDRINUSE) {
> +        LOG(ERROR, "colo index %d has already in used", sa.nl_pid);
> +        goto retry;
> +    } else if (ret < 0) {
> +        LOG(ERROR, "netlink bind error");
> +        goto out;
> +    }
> +
> +    cps->index = sa.nl_pid;
> +    ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT);
> +    if (ret < 0) {
> +        goto out;
> +    }

Ditto. You can remove it.

> +    /* receive ack */
> +    size = colo_proxy_recv(cps, &buff, 500000);
> +    if (size < 0) {
> +        LOG(ERROR, "Can't recv msg from kernel by netlink: %s",
> +            strerror(errno));
> +        goto out;
> +    }
> +
> +    if (size) {
> +        h = (struct nlmsghdr *)buff;
> +        if (h->nlmsg_type == NLMSG_ERROR) {
> +            /* ack's type is NLMSG_ERROR */
> +            struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
> +
> +            if (size - sizeof(*h) < sizeof(*err)) {
> +                LOG(ERROR, "NLMSG_LENGTH is too short");
> +                goto out;
> +            }
> +
> +            if (err->error) {
> +                LOG(ERROR, "NLMSG_ERROR contains error %d", err->error);
> +                goto out;
> +            }
> +        }
> +    }
> +
> +    ret = 0;
> +
> +out:
> +    free(buff);
> +    if (ret) {
> +        close(cps->sock_fd);
> +        cps->sock_fd = -1;
> +    }
> +    return ret;
> +}
> +
> +void colo_proxy_teardown(libxl__colo_proxy_state *cps)
> +{
> +    if (cps->sock_fd >= 0) {
> +        close(cps->sock_fd);
> +        cps->sock_fd = -1;
> +    }
> +}
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index 3af5fdd..3b44b09 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -3112,6 +3112,15 @@ libxl__stream_read_inuse(const libxl__stream_read_state *stream)
>  }
>  
>  /*----- colo related state structure -----*/
> +typedef struct libxl__colo_proxy_state libxl__colo_proxy_state;
> +struct libxl__colo_proxy_state {
> +    /* set by caller of colo_proxy_setup */
> +    libxl__ao *ao;
> +
> +    int sock_fd;
> +    int index;
> +};
> +
>  typedef struct libxl__colo_save_state libxl__colo_save_state;
>  struct libxl__colo_save_state {
>      int send_fd;
> @@ -3126,6 +3135,9 @@ struct libxl__colo_save_state {
>      /* private, used by qdisk block replication */
>      bool qdisk_used;
>      bool qdisk_setuped;
> +
> +    /* private, used by colo-proxy */
> +    libxl__colo_proxy_state cps;
>  };
>  
>  /*----- Domain suspend (save) state structure -----*/
> @@ -3535,6 +3547,9 @@ struct libxl__colo_restore_state {
>      bool qdisk_setuped;
>      const char *host;
>      const char *port;
> +
> +    /* private, used by colo-proxy */
> +    libxl__colo_proxy_state cps;
>  };
>  
>  struct libxl__domain_create_state {
> -- 
> 2.5.0
> 
> 
>
Wen Congyang March 14, 2016, 9:13 a.m. UTC | #3
On 03/12/2016 06:25 AM, Konrad Rzeszutek Wilk wrote:
>> +extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
>> +extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
>>  #endif
>> diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
>> new file mode 100644
>> index 0000000..e07e640
>> --- /dev/null
>> +++ b/tools/libxl/libxl_colo_proxy.c
>> @@ -0,0 +1,230 @@
>> +/*
>> + * Copyright (C) 2015 FUJITSU LIMITED
> 
> 2016?

Yes, I will check all new files.

>> + * Author: Yang Hongyang <hongyang.yang@easystack.cn>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU Lesser General Public License as published
>> + * by the Free Software Foundation; version 2.1 only. with the special
>> + * exception on linking described in file LICENSE.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU Lesser General Public License for more details.
>> + */
>> +
>> +#include "libxl_osdeps.h" /* must come before any other headers */
>> +
>> +#include "libxl_internal.h"
>> +#include "libxl_colo.h"
>> +#include <linux/netlink.h>
>> +
>> +#define NETLINK_COLO 28
> 
> Can you include a comment why 28? Why not 31415?

OK, will add a comment to describe it.

> 
>> +
>> +enum colo_netlink_op {
>> +    COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1),
>> +    COLO_CHECKPOINT,
>> +    COLO_FAILOVER,
>> +    COLO_PROXY_INIT,
>> +    COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */
>> +};
>> +
>> +/* ========= colo-proxy: helper functions ========== */
>> +
>> +static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff,
>> +                           uint64_t size, int type)
>> +{
>> +    struct sockaddr_nl sa;
>> +    struct nlmsghdr msg;
>> +    struct iovec iov;
>> +    struct msghdr mh;
>> +    int ret;
>> +
>> +    STATE_AO_GC(cps->ao);
>> +
>> +    memset(&sa, 0, sizeof(sa));
>> +    sa.nl_family = AF_NETLINK;
>> +    sa.nl_pid = 0;
>> +    sa.nl_groups = 0;
>> +
>> +    msg.nlmsg_len = NLMSG_SPACE(0);
>> +    msg.nlmsg_flags = NLM_F_REQUEST;
>> +    if (type == COLO_PROXY_INIT) {
>> +        msg.nlmsg_flags |= NLM_F_ACK;
>> +    }
> 
> I don't think you need the { }?

Yes, will fix it in the next version.

> 
> Ah, yup:
>                                                                                 
> 5. Block structure                                                              
>                                                                                 
> Every indented statement is braced apart from blocks that contain just          
> one statement.                                                                  
> 
>> +    msg.nlmsg_seq = 0;
>> +    /* This is untrusty */
> 
> Umm, can you be more specific pls?
> 
>> +    msg.nlmsg_pid = cps->index;
>> +    msg.nlmsg_type = type;
>> +
>> +    iov.iov_base = &msg;
>> +    iov.iov_len = msg.nlmsg_len;
>> +
>> +    mh.msg_name = &sa;
>> +    mh.msg_namelen = sizeof(sa);
>> +    mh.msg_iov = &iov;
>> +    mh.msg_iovlen = 1;
>> +    mh.msg_control = NULL;
>> +    mh.msg_controllen = 0;
>> +    mh.msg_flags = 0;
>> +
>> +    ret = sendmsg(cps->sock_fd, &mh, 0);
>> +    if (ret <= 0) {
>> +        LOG(ERROR, "can't send msg to kernel by netlink: %s",
>> +            strerror(errno));
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +/* error: return -1, otherwise return 0 */
>> +static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff,
>> +                               unsigned int timeout_us)
>> +{
>> +    struct sockaddr_nl sa;
>> +    struct iovec iov;
>> +    struct msghdr mh = {
>> +        .msg_name = &sa,
>> +        .msg_namelen = sizeof(sa),
>> +        .msg_iov = &iov,
>> +        .msg_iovlen = 1,
>> +    };
>> +    struct timeval tv;
>> +    uint32_t size = 16384;
>> +    int64_t len = 0;
>> +    int ret;
>> +
>> +    STATE_AO_GC(cps->ao);
>> +    uint8_t *tmp = libxl__malloc(NOGC, size);
>> +
>> +    if (timeout_us) {
>> +        tv.tv_sec = timeout_us / 1000000;
>> +        tv.tv_usec = timeout_us % 1000000;
>> +        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
>> +    }
>> +
>> +    iov.iov_base = tmp;
>> +    iov.iov_len = size;
>> +next:
>> +    ret = recvmsg(cps->sock_fd, &mh, 0);
>> +    if (ret <= 0) {
>> +        if (errno != EAGAIN && errno != EWOULDBLOCK)
> 
> -EINTR ?

IIRC, WAGAIN and EWOULDBLOCK may have different value in some system.
EINTR is not handled here.

> 
>> +            LOGE(ERROR, "can't recv msg from kernel by netlink");
>> +        goto err;
>> +    }
>> +
>> +    len += ret;
>> +    if (mh.msg_flags & MSG_TRUNC) {
>> +        size += 16384;
>> +        tmp = libxl__realloc(NOGC, tmp, size);
> 
> You really should check 'tmp'.
> 
> If this loop continues on for some time the 'size' may be
> in milions and this realloc will fail.

OK, will fix it in the next version.

> 
>> +        iov.iov_base = tmp + len;
>> +        iov.iov_len = size - len;
>> +        goto next;
> 
>> +    }
>> +
>> +    *buff = tmp;
>> +    ret = len;
>> +    goto out;
>> +
>> +err:
>> +    free(tmp);
>> +    *buff = NULL;
>> +
>> +out:
>> +    if (timeout_us) {
>> +        tv.tv_sec = 0;
>> +        tv.tv_usec = 0;
>> +        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
>> +    }
>> +    return ret;
>> +}
>> +
>> +/* ========= colo-proxy: setup and teardown ========== */
>> +
>> +int colo_proxy_setup(libxl__colo_proxy_state *cps)
>> +{
>> +    int skfd = 0;
>> +    struct sockaddr_nl sa;
>> +    struct nlmsghdr *h;
>> +    int i = 1;
>> +    int ret = ERROR_FAIL;
>> +    uint8_t *buff = NULL;
>> +    int64_t size;
>> +
>> +    STATE_AO_GC(cps->ao);
>> +
>> +    skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO);
>> +    if (skfd < 0) {
>> +        LOG(ERROR, "can not create a netlink socket: %s", strerror(errno));
>> +        goto out;
>> +    }
>> +    cps->sock_fd = skfd;
>> +    memset(&sa, 0, sizeof(sa));
>> +    sa.nl_family = AF_NETLINK;
>> +    sa.nl_groups = 0;
>> +retry:
>> +    sa.nl_pid = i++;
>> +
>> +    if (i > 10) {
>> +        LOG(ERROR, "netlink bind error");
>> +        goto out;
>> +    }
>> +
>> +    ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa));
>> +    if (ret < 0 && errno == EADDRINUSE) {
>> +        LOG(ERROR, "colo index %d has already in used", sa.nl_pid);
>> +        goto retry;
>> +    } else if (ret < 0) {
>> +        LOG(ERROR, "netlink bind error");
>> +        goto out;
>> +    }
>> +
>> +    cps->index = sa.nl_pid;
>> +    ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT);
>> +    if (ret < 0) {
>> +        goto out;
>> +    }
> 
> Ditto. You can remove it.

OK, will check all codes.

Thanks
Wen Congyang

> 
>> +    /* receive ack */
>> +    size = colo_proxy_recv(cps, &buff, 500000);
>> +    if (size < 0) {
>> +        LOG(ERROR, "Can't recv msg from kernel by netlink: %s",
>> +            strerror(errno));
>> +        goto out;
>> +    }
>> +
>> +    if (size) {
>> +        h = (struct nlmsghdr *)buff;
>> +        if (h->nlmsg_type == NLMSG_ERROR) {
>> +            /* ack's type is NLMSG_ERROR */
>> +            struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
>> +
>> +            if (size - sizeof(*h) < sizeof(*err)) {
>> +                LOG(ERROR, "NLMSG_LENGTH is too short");
>> +                goto out;
>> +            }
>> +
>> +            if (err->error) {
>> +                LOG(ERROR, "NLMSG_ERROR contains error %d", err->error);
>> +                goto out;
>> +            }
>> +        }
>> +    }
>> +
>> +    ret = 0;
>> +
>> +out:
>> +    free(buff);
>> +    if (ret) {
>> +        close(cps->sock_fd);
>> +        cps->sock_fd = -1;
>> +    }
>> +    return ret;
>> +}
>> +
>> +void colo_proxy_teardown(libxl__colo_proxy_state *cps)
>> +{
>> +    if (cps->sock_fd >= 0) {
>> +        close(cps->sock_fd);
>> +        cps->sock_fd = -1;
>> +    }
>> +}
>> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
>> index 3af5fdd..3b44b09 100644
>> --- a/tools/libxl/libxl_internal.h
>> +++ b/tools/libxl/libxl_internal.h
>> @@ -3112,6 +3112,15 @@ libxl__stream_read_inuse(const libxl__stream_read_state *stream)
>>  }
>>  
>>  /*----- colo related state structure -----*/
>> +typedef struct libxl__colo_proxy_state libxl__colo_proxy_state;
>> +struct libxl__colo_proxy_state {
>> +    /* set by caller of colo_proxy_setup */
>> +    libxl__ao *ao;
>> +
>> +    int sock_fd;
>> +    int index;
>> +};
>> +
>>  typedef struct libxl__colo_save_state libxl__colo_save_state;
>>  struct libxl__colo_save_state {
>>      int send_fd;
>> @@ -3126,6 +3135,9 @@ struct libxl__colo_save_state {
>>      /* private, used by qdisk block replication */
>>      bool qdisk_used;
>>      bool qdisk_setuped;
>> +
>> +    /* private, used by colo-proxy */
>> +    libxl__colo_proxy_state cps;
>>  };
>>  
>>  /*----- Domain suspend (save) state structure -----*/
>> @@ -3535,6 +3547,9 @@ struct libxl__colo_restore_state {
>>      bool qdisk_setuped;
>>      const char *host;
>>      const char *port;
>> +
>> +    /* private, used by colo-proxy */
>> +    libxl__colo_proxy_state cps;
>>  };
>>  
>>  struct libxl__domain_create_state {
>> -- 
>> 2.5.0
>>
>>
>>
> 
> 
> .
>
Changlong Xie March 22, 2016, 3:40 a.m. UTC | #4
On 03/14/2016 05:13 PM, Wen Congyang wrote:
> On 03/12/2016 06:25 AM, Konrad Rzeszutek Wilk wrote:
>>> +extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
>>> +extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
>>>   #endif
>>> diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
>>> new file mode 100644
>>> index 0000000..e07e640
>>> --- /dev/null
>>> +++ b/tools/libxl/libxl_colo_proxy.c
>>> @@ -0,0 +1,230 @@
>>> +/*
>>> + * Copyright (C) 2015 FUJITSU LIMITED
>>
>> 2016?
>
> Yes, I will check all new files.
>
>>> + * Author: Yang Hongyang <hongyang.yang@easystack.cn>
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify
>>> + * it under the terms of the GNU Lesser General Public License as published
>>> + * by the Free Software Foundation; version 2.1 only. with the special
>>> + * exception on linking described in file LICENSE.
>>> + *
>>> + * This program is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>> + * GNU Lesser General Public License for more details.
>>> + */
>>> +
>>> +#include "libxl_osdeps.h" /* must come before any other headers */
>>> +
>>> +#include "libxl_internal.h"
>>> +#include "libxl_colo.h"
>>> +#include <linux/netlink.h>
>>> +
>>> +#define NETLINK_COLO 28
>>
>> Can you include a comment why 28? Why not 31415?
>
> OK, will add a comment to describe it.
>
>>
>>> +
>>> +enum colo_netlink_op {
>>> +    COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1),
>>> +    COLO_CHECKPOINT,
>>> +    COLO_FAILOVER,
>>> +    COLO_PROXY_INIT,
>>> +    COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */
>>> +};
>>> +
>>> +/* ========= colo-proxy: helper functions ========== */
>>> +
>>> +static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff,
>>> +                           uint64_t size, int type)
>>> +{
>>> +    struct sockaddr_nl sa;
>>> +    struct nlmsghdr msg;
>>> +    struct iovec iov;
>>> +    struct msghdr mh;
>>> +    int ret;
>>> +
>>> +    STATE_AO_GC(cps->ao);
>>> +
>>> +    memset(&sa, 0, sizeof(sa));
>>> +    sa.nl_family = AF_NETLINK;
>>> +    sa.nl_pid = 0;
>>> +    sa.nl_groups = 0;
>>> +
>>> +    msg.nlmsg_len = NLMSG_SPACE(0);
>>> +    msg.nlmsg_flags = NLM_F_REQUEST;
>>> +    if (type == COLO_PROXY_INIT) {
>>> +        msg.nlmsg_flags |= NLM_F_ACK;
>>> +    }
>>
>> I don't think you need the { }?
>
> Yes, will fix it in the next version.
>
>>
>> Ah, yup:
>>
>> 5. Block structure
>>
>> Every indented statement is braced apart from blocks that contain just
>> one statement.
>>
>>> +    msg.nlmsg_seq = 0;
>>> +    /* This is untrusty */
>>
>> Umm, can you be more specific pls?
>>
>>> +    msg.nlmsg_pid = cps->index;
>>> +    msg.nlmsg_type = type;
>>> +
>>> +    iov.iov_base = &msg;
>>> +    iov.iov_len = msg.nlmsg_len;
>>> +
>>> +    mh.msg_name = &sa;
>>> +    mh.msg_namelen = sizeof(sa);
>>> +    mh.msg_iov = &iov;
>>> +    mh.msg_iovlen = 1;
>>> +    mh.msg_control = NULL;
>>> +    mh.msg_controllen = 0;
>>> +    mh.msg_flags = 0;
>>> +
>>> +    ret = sendmsg(cps->sock_fd, &mh, 0);
>>> +    if (ret <= 0) {
>>> +        LOG(ERROR, "can't send msg to kernel by netlink: %s",
>>> +            strerror(errno));
>>> +    }
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +/* error: return -1, otherwise return 0 */
>>> +static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff,
>>> +                               unsigned int timeout_us)
>>> +{
>>> +    struct sockaddr_nl sa;
>>> +    struct iovec iov;
>>> +    struct msghdr mh = {
>>> +        .msg_name = &sa,
>>> +        .msg_namelen = sizeof(sa),
>>> +        .msg_iov = &iov,
>>> +        .msg_iovlen = 1,
>>> +    };
>>> +    struct timeval tv;
>>> +    uint32_t size = 16384;
>>> +    int64_t len = 0;
>>> +    int ret;
>>> +
>>> +    STATE_AO_GC(cps->ao);
>>> +    uint8_t *tmp = libxl__malloc(NOGC, size);
>>> +
>>> +    if (timeout_us) {
>>> +        tv.tv_sec = timeout_us / 1000000;
>>> +        tv.tv_usec = timeout_us % 1000000;
>>> +        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
>>> +    }
>>> +
>>> +    iov.iov_base = tmp;
>>> +    iov.iov_len = size;
>>> +next:
>>> +    ret = recvmsg(cps->sock_fd, &mh, 0);
>>> +    if (ret <= 0) {
>>> +        if (errno != EAGAIN && errno != EWOULDBLOCK)
>>
>> -EINTR ?
>
> IIRC, WAGAIN and EWOULDBLOCK may have different value in some system.
> EINTR is not handled here.
>
>>
>>> +            LOGE(ERROR, "can't recv msg from kernel by netlink");
>>> +        goto err;
>>> +    }
>>> +
>>> +    len += ret;
>>> +    if (mh.msg_flags & MSG_TRUNC) {
>>> +        size += 16384;
>>> +        tmp = libxl__realloc(NOGC, tmp, size);
>>
>> You really should check 'tmp'.
>>
>> If this loop continues on for some time the 'size' may be
>> in milions and this realloc will fail.

As i see from the code, libxl_realloc will invoke _exit(-1) if malloc 
failed, so we'll keep it.

Thanks
	-Xie
>
> OK, will fix it in the next version.
>
>>
>>> +        iov.iov_base = tmp + len;
>>> +        iov.iov_len = size - len;
>>> +        goto next;
>>
>>> +    }
>>> +
>>> +    *buff = tmp;
>>> +    ret = len;
>>> +    goto out;
>>> +
>>> +err:
>>> +    free(tmp);
>>> +    *buff = NULL;
>>> +
>>> +out:
>>> +    if (timeout_us) {
>>> +        tv.tv_sec = 0;
>>> +        tv.tv_usec = 0;
>>> +        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
>>> +    }
>>> +    return ret;
>>> +}
>>> +
>>> +/* ========= colo-proxy: setup and teardown ========== */
>>> +
>>> +int colo_proxy_setup(libxl__colo_proxy_state *cps)
>>> +{
>>> +    int skfd = 0;
>>> +    struct sockaddr_nl sa;
>>> +    struct nlmsghdr *h;
>>> +    int i = 1;
>>> +    int ret = ERROR_FAIL;
>>> +    uint8_t *buff = NULL;
>>> +    int64_t size;
>>> +
>>> +    STATE_AO_GC(cps->ao);
>>> +
>>> +    skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO);
>>> +    if (skfd < 0) {
>>> +        LOG(ERROR, "can not create a netlink socket: %s", strerror(errno));
>>> +        goto out;
>>> +    }
>>> +    cps->sock_fd = skfd;
>>> +    memset(&sa, 0, sizeof(sa));
>>> +    sa.nl_family = AF_NETLINK;
>>> +    sa.nl_groups = 0;
>>> +retry:
>>> +    sa.nl_pid = i++;
>>> +
>>> +    if (i > 10) {
>>> +        LOG(ERROR, "netlink bind error");
>>> +        goto out;
>>> +    }
>>> +
>>> +    ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa));
>>> +    if (ret < 0 && errno == EADDRINUSE) {
>>> +        LOG(ERROR, "colo index %d has already in used", sa.nl_pid);
>>> +        goto retry;
>>> +    } else if (ret < 0) {
>>> +        LOG(ERROR, "netlink bind error");
>>> +        goto out;
>>> +    }
>>> +
>>> +    cps->index = sa.nl_pid;
>>> +    ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT);
>>> +    if (ret < 0) {
>>> +        goto out;
>>> +    }
>>
>> Ditto. You can remove it.
>
> OK, will check all codes.
>
> Thanks
> Wen Congyang
>
>>
>>> +    /* receive ack */
>>> +    size = colo_proxy_recv(cps, &buff, 500000);
>>> +    if (size < 0) {
>>> +        LOG(ERROR, "Can't recv msg from kernel by netlink: %s",
>>> +            strerror(errno));
>>> +        goto out;
>>> +    }
>>> +
>>> +    if (size) {
>>> +        h = (struct nlmsghdr *)buff;
>>> +        if (h->nlmsg_type == NLMSG_ERROR) {
>>> +            /* ack's type is NLMSG_ERROR */
>>> +            struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
>>> +
>>> +            if (size - sizeof(*h) < sizeof(*err)) {
>>> +                LOG(ERROR, "NLMSG_LENGTH is too short");
>>> +                goto out;
>>> +            }
>>> +
>>> +            if (err->error) {
>>> +                LOG(ERROR, "NLMSG_ERROR contains error %d", err->error);
>>> +                goto out;
>>> +            }
>>> +        }
>>> +    }
>>> +
>>> +    ret = 0;
>>> +
>>> +out:
>>> +    free(buff);
>>> +    if (ret) {
>>> +        close(cps->sock_fd);
>>> +        cps->sock_fd = -1;
>>> +    }
>>> +    return ret;
>>> +}
>>> +
>>> +void colo_proxy_teardown(libxl__colo_proxy_state *cps)
>>> +{
>>> +    if (cps->sock_fd >= 0) {
>>> +        close(cps->sock_fd);
>>> +        cps->sock_fd = -1;
>>> +    }
>>> +}
>>> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
>>> index 3af5fdd..3b44b09 100644
>>> --- a/tools/libxl/libxl_internal.h
>>> +++ b/tools/libxl/libxl_internal.h
>>> @@ -3112,6 +3112,15 @@ libxl__stream_read_inuse(const libxl__stream_read_state *stream)
>>>   }
>>>
>>>   /*----- colo related state structure -----*/
>>> +typedef struct libxl__colo_proxy_state libxl__colo_proxy_state;
>>> +struct libxl__colo_proxy_state {
>>> +    /* set by caller of colo_proxy_setup */
>>> +    libxl__ao *ao;
>>> +
>>> +    int sock_fd;
>>> +    int index;
>>> +};
>>> +
>>>   typedef struct libxl__colo_save_state libxl__colo_save_state;
>>>   struct libxl__colo_save_state {
>>>       int send_fd;
>>> @@ -3126,6 +3135,9 @@ struct libxl__colo_save_state {
>>>       /* private, used by qdisk block replication */
>>>       bool qdisk_used;
>>>       bool qdisk_setuped;
>>> +
>>> +    /* private, used by colo-proxy */
>>> +    libxl__colo_proxy_state cps;
>>>   };
>>>
>>>   /*----- Domain suspend (save) state structure -----*/
>>> @@ -3535,6 +3547,9 @@ struct libxl__colo_restore_state {
>>>       bool qdisk_setuped;
>>>       const char *host;
>>>       const char *port;
>>> +
>>> +    /* private, used by colo-proxy */
>>> +    libxl__colo_proxy_state cps;
>>>   };
>>>
>>>   struct libxl__domain_create_state {
>>> --
>>> 2.5.0
>>>
>>>
>>>
>>
>>
>> .
>>
>
> .
>
diff mbox

Patch

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 28d54d0..6fea9e0 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -67,6 +67,7 @@  endif
 LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o
 LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o
 LIBXL_OBJS-y += libxl_colo_qdisk.o
+LIBXL_OBJS-y += libxl_colo_proxy.o
 
 LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o
 LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o
diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h
index b08f672..6d8cd4f 100644
--- a/tools/libxl/libxl_colo.h
+++ b/tools/libxl/libxl_colo.h
@@ -27,4 +27,6 @@  extern void libxl__colo_save_teardown(libxl__egc *egc,
                                       libxl__colo_save_state *css,
                                       int rc);
 
+extern int colo_proxy_setup(libxl__colo_proxy_state *cps);
+extern void colo_proxy_teardown(libxl__colo_proxy_state *cps);
 #endif
diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c
new file mode 100644
index 0000000..e07e640
--- /dev/null
+++ b/tools/libxl/libxl_colo_proxy.c
@@ -0,0 +1,230 @@ 
+/*
+ * Copyright (C) 2015 FUJITSU LIMITED
+ * Author: Yang Hongyang <hongyang.yang@easystack.cn>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#include "libxl_osdeps.h" /* must come before any other headers */
+
+#include "libxl_internal.h"
+#include "libxl_colo.h"
+#include <linux/netlink.h>
+
+#define NETLINK_COLO 28
+
+enum colo_netlink_op {
+    COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1),
+    COLO_CHECKPOINT,
+    COLO_FAILOVER,
+    COLO_PROXY_INIT,
+    COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */
+};
+
+/* ========= colo-proxy: helper functions ========== */
+
+static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff,
+                           uint64_t size, int type)
+{
+    struct sockaddr_nl sa;
+    struct nlmsghdr msg;
+    struct iovec iov;
+    struct msghdr mh;
+    int ret;
+
+    STATE_AO_GC(cps->ao);
+
+    memset(&sa, 0, sizeof(sa));
+    sa.nl_family = AF_NETLINK;
+    sa.nl_pid = 0;
+    sa.nl_groups = 0;
+
+    msg.nlmsg_len = NLMSG_SPACE(0);
+    msg.nlmsg_flags = NLM_F_REQUEST;
+    if (type == COLO_PROXY_INIT) {
+        msg.nlmsg_flags |= NLM_F_ACK;
+    }
+    msg.nlmsg_seq = 0;
+    /* This is untrusty */
+    msg.nlmsg_pid = cps->index;
+    msg.nlmsg_type = type;
+
+    iov.iov_base = &msg;
+    iov.iov_len = msg.nlmsg_len;
+
+    mh.msg_name = &sa;
+    mh.msg_namelen = sizeof(sa);
+    mh.msg_iov = &iov;
+    mh.msg_iovlen = 1;
+    mh.msg_control = NULL;
+    mh.msg_controllen = 0;
+    mh.msg_flags = 0;
+
+    ret = sendmsg(cps->sock_fd, &mh, 0);
+    if (ret <= 0) {
+        LOG(ERROR, "can't send msg to kernel by netlink: %s",
+            strerror(errno));
+    }
+
+    return ret;
+}
+
+/* error: return -1, otherwise return 0 */
+static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff,
+                               unsigned int timeout_us)
+{
+    struct sockaddr_nl sa;
+    struct iovec iov;
+    struct msghdr mh = {
+        .msg_name = &sa,
+        .msg_namelen = sizeof(sa),
+        .msg_iov = &iov,
+        .msg_iovlen = 1,
+    };
+    struct timeval tv;
+    uint32_t size = 16384;
+    int64_t len = 0;
+    int ret;
+
+    STATE_AO_GC(cps->ao);
+    uint8_t *tmp = libxl__malloc(NOGC, size);
+
+    if (timeout_us) {
+        tv.tv_sec = timeout_us / 1000000;
+        tv.tv_usec = timeout_us % 1000000;
+        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+    }
+
+    iov.iov_base = tmp;
+    iov.iov_len = size;
+next:
+    ret = recvmsg(cps->sock_fd, &mh, 0);
+    if (ret <= 0) {
+        if (errno != EAGAIN && errno != EWOULDBLOCK)
+            LOGE(ERROR, "can't recv msg from kernel by netlink");
+        goto err;
+    }
+
+    len += ret;
+    if (mh.msg_flags & MSG_TRUNC) {
+        size += 16384;
+        tmp = libxl__realloc(NOGC, tmp, size);
+        iov.iov_base = tmp + len;
+        iov.iov_len = size - len;
+        goto next;
+    }
+
+    *buff = tmp;
+    ret = len;
+    goto out;
+
+err:
+    free(tmp);
+    *buff = NULL;
+
+out:
+    if (timeout_us) {
+        tv.tv_sec = 0;
+        tv.tv_usec = 0;
+        setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+    }
+    return ret;
+}
+
+/* ========= colo-proxy: setup and teardown ========== */
+
+int colo_proxy_setup(libxl__colo_proxy_state *cps)
+{
+    int skfd = 0;
+    struct sockaddr_nl sa;
+    struct nlmsghdr *h;
+    int i = 1;
+    int ret = ERROR_FAIL;
+    uint8_t *buff = NULL;
+    int64_t size;
+
+    STATE_AO_GC(cps->ao);
+
+    skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO);
+    if (skfd < 0) {
+        LOG(ERROR, "can not create a netlink socket: %s", strerror(errno));
+        goto out;
+    }
+    cps->sock_fd = skfd;
+    memset(&sa, 0, sizeof(sa));
+    sa.nl_family = AF_NETLINK;
+    sa.nl_groups = 0;
+retry:
+    sa.nl_pid = i++;
+
+    if (i > 10) {
+        LOG(ERROR, "netlink bind error");
+        goto out;
+    }
+
+    ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa));
+    if (ret < 0 && errno == EADDRINUSE) {
+        LOG(ERROR, "colo index %d has already in used", sa.nl_pid);
+        goto retry;
+    } else if (ret < 0) {
+        LOG(ERROR, "netlink bind error");
+        goto out;
+    }
+
+    cps->index = sa.nl_pid;
+    ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT);
+    if (ret < 0) {
+        goto out;
+    }
+    /* receive ack */
+    size = colo_proxy_recv(cps, &buff, 500000);
+    if (size < 0) {
+        LOG(ERROR, "Can't recv msg from kernel by netlink: %s",
+            strerror(errno));
+        goto out;
+    }
+
+    if (size) {
+        h = (struct nlmsghdr *)buff;
+        if (h->nlmsg_type == NLMSG_ERROR) {
+            /* ack's type is NLMSG_ERROR */
+            struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
+
+            if (size - sizeof(*h) < sizeof(*err)) {
+                LOG(ERROR, "NLMSG_LENGTH is too short");
+                goto out;
+            }
+
+            if (err->error) {
+                LOG(ERROR, "NLMSG_ERROR contains error %d", err->error);
+                goto out;
+            }
+        }
+    }
+
+    ret = 0;
+
+out:
+    free(buff);
+    if (ret) {
+        close(cps->sock_fd);
+        cps->sock_fd = -1;
+    }
+    return ret;
+}
+
+void colo_proxy_teardown(libxl__colo_proxy_state *cps)
+{
+    if (cps->sock_fd >= 0) {
+        close(cps->sock_fd);
+        cps->sock_fd = -1;
+    }
+}
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 3af5fdd..3b44b09 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -3112,6 +3112,15 @@  libxl__stream_read_inuse(const libxl__stream_read_state *stream)
 }
 
 /*----- colo related state structure -----*/
+typedef struct libxl__colo_proxy_state libxl__colo_proxy_state;
+struct libxl__colo_proxy_state {
+    /* set by caller of colo_proxy_setup */
+    libxl__ao *ao;
+
+    int sock_fd;
+    int index;
+};
+
 typedef struct libxl__colo_save_state libxl__colo_save_state;
 struct libxl__colo_save_state {
     int send_fd;
@@ -3126,6 +3135,9 @@  struct libxl__colo_save_state {
     /* private, used by qdisk block replication */
     bool qdisk_used;
     bool qdisk_setuped;
+
+    /* private, used by colo-proxy */
+    libxl__colo_proxy_state cps;
 };
 
 /*----- Domain suspend (save) state structure -----*/
@@ -3535,6 +3547,9 @@  struct libxl__colo_restore_state {
     bool qdisk_setuped;
     const char *host;
     const char *port;
+
+    /* private, used by colo-proxy */
+    libxl__colo_proxy_state cps;
 };
 
 struct libxl__domain_create_state {