Message ID | 1456109555-28299-27-git-send-email-wency@cn.fujitsu.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Feb 22, 2016 at 10:52:30AM +0800, Wen Congyang wrote: > setup/teardown of COLO proxy module. > we use netlink to communicate with proxy module. > About colo-proxy module: > https://lkml.org/lkml/2015/6/18/32 > How to use: > http://wiki.xen.org/wiki/COLO_-_Coarse_Grain_Lock_Stepping > > Signed-off-by: Yang Hongyang <hongyang.yang@easystack.cn> > Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> I'm tempted to just ack this patch as well. Wei.
> +extern int colo_proxy_setup(libxl__colo_proxy_state *cps); > +extern void colo_proxy_teardown(libxl__colo_proxy_state *cps); > #endif > diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c > new file mode 100644 > index 0000000..e07e640 > --- /dev/null > +++ b/tools/libxl/libxl_colo_proxy.c > @@ -0,0 +1,230 @@ > +/* > + * Copyright (C) 2015 FUJITSU LIMITED 2016? > + * Author: Yang Hongyang <hongyang.yang@easystack.cn> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU Lesser General Public License as published > + * by the Free Software Foundation; version 2.1 only. with the special > + * exception on linking described in file LICENSE. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU Lesser General Public License for more details. > + */ > + > +#include "libxl_osdeps.h" /* must come before any other headers */ > + > +#include "libxl_internal.h" > +#include "libxl_colo.h" > +#include <linux/netlink.h> > + > +#define NETLINK_COLO 28 Can you include a comment why 28? Why not 31415? > + > +enum colo_netlink_op { > + COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1), > + COLO_CHECKPOINT, > + COLO_FAILOVER, > + COLO_PROXY_INIT, > + COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */ > +}; > + > +/* ========= colo-proxy: helper functions ========== */ > + > +static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff, > + uint64_t size, int type) > +{ > + struct sockaddr_nl sa; > + struct nlmsghdr msg; > + struct iovec iov; > + struct msghdr mh; > + int ret; > + > + STATE_AO_GC(cps->ao); > + > + memset(&sa, 0, sizeof(sa)); > + sa.nl_family = AF_NETLINK; > + sa.nl_pid = 0; > + sa.nl_groups = 0; > + > + msg.nlmsg_len = NLMSG_SPACE(0); > + msg.nlmsg_flags = NLM_F_REQUEST; > + if (type == COLO_PROXY_INIT) { > + msg.nlmsg_flags |= NLM_F_ACK; > + } I don't think you need the { }? Ah, yup: 5. Block structure Every indented statement is braced apart from blocks that contain just one statement. > + msg.nlmsg_seq = 0; > + /* This is untrusty */ Umm, can you be more specific pls? > + msg.nlmsg_pid = cps->index; > + msg.nlmsg_type = type; > + > + iov.iov_base = &msg; > + iov.iov_len = msg.nlmsg_len; > + > + mh.msg_name = &sa; > + mh.msg_namelen = sizeof(sa); > + mh.msg_iov = &iov; > + mh.msg_iovlen = 1; > + mh.msg_control = NULL; > + mh.msg_controllen = 0; > + mh.msg_flags = 0; > + > + ret = sendmsg(cps->sock_fd, &mh, 0); > + if (ret <= 0) { > + LOG(ERROR, "can't send msg to kernel by netlink: %s", > + strerror(errno)); > + } > + > + return ret; > +} > + > +/* error: return -1, otherwise return 0 */ > +static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff, > + unsigned int timeout_us) > +{ > + struct sockaddr_nl sa; > + struct iovec iov; > + struct msghdr mh = { > + .msg_name = &sa, > + .msg_namelen = sizeof(sa), > + .msg_iov = &iov, > + .msg_iovlen = 1, > + }; > + struct timeval tv; > + uint32_t size = 16384; > + int64_t len = 0; > + int ret; > + > + STATE_AO_GC(cps->ao); > + uint8_t *tmp = libxl__malloc(NOGC, size); > + > + if (timeout_us) { > + tv.tv_sec = timeout_us / 1000000; > + tv.tv_usec = timeout_us % 1000000; > + setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); > + } > + > + iov.iov_base = tmp; > + iov.iov_len = size; > +next: > + ret = recvmsg(cps->sock_fd, &mh, 0); > + if (ret <= 0) { > + if (errno != EAGAIN && errno != EWOULDBLOCK) -EINTR ? > + LOGE(ERROR, "can't recv msg from kernel by netlink"); > + goto err; > + } > + > + len += ret; > + if (mh.msg_flags & MSG_TRUNC) { > + size += 16384; > + tmp = libxl__realloc(NOGC, tmp, size); You really should check 'tmp'. If this loop continues on for some time the 'size' may be in milions and this realloc will fail. > + iov.iov_base = tmp + len; > + iov.iov_len = size - len; > + goto next; > + } > + > + *buff = tmp; > + ret = len; > + goto out; > + > +err: > + free(tmp); > + *buff = NULL; > + > +out: > + if (timeout_us) { > + tv.tv_sec = 0; > + tv.tv_usec = 0; > + setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); > + } > + return ret; > +} > + > +/* ========= colo-proxy: setup and teardown ========== */ > + > +int colo_proxy_setup(libxl__colo_proxy_state *cps) > +{ > + int skfd = 0; > + struct sockaddr_nl sa; > + struct nlmsghdr *h; > + int i = 1; > + int ret = ERROR_FAIL; > + uint8_t *buff = NULL; > + int64_t size; > + > + STATE_AO_GC(cps->ao); > + > + skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO); > + if (skfd < 0) { > + LOG(ERROR, "can not create a netlink socket: %s", strerror(errno)); > + goto out; > + } > + cps->sock_fd = skfd; > + memset(&sa, 0, sizeof(sa)); > + sa.nl_family = AF_NETLINK; > + sa.nl_groups = 0; > +retry: > + sa.nl_pid = i++; > + > + if (i > 10) { > + LOG(ERROR, "netlink bind error"); > + goto out; > + } > + > + ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa)); > + if (ret < 0 && errno == EADDRINUSE) { > + LOG(ERROR, "colo index %d has already in used", sa.nl_pid); > + goto retry; > + } else if (ret < 0) { > + LOG(ERROR, "netlink bind error"); > + goto out; > + } > + > + cps->index = sa.nl_pid; > + ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT); > + if (ret < 0) { > + goto out; > + } Ditto. You can remove it. > + /* receive ack */ > + size = colo_proxy_recv(cps, &buff, 500000); > + if (size < 0) { > + LOG(ERROR, "Can't recv msg from kernel by netlink: %s", > + strerror(errno)); > + goto out; > + } > + > + if (size) { > + h = (struct nlmsghdr *)buff; > + if (h->nlmsg_type == NLMSG_ERROR) { > + /* ack's type is NLMSG_ERROR */ > + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); > + > + if (size - sizeof(*h) < sizeof(*err)) { > + LOG(ERROR, "NLMSG_LENGTH is too short"); > + goto out; > + } > + > + if (err->error) { > + LOG(ERROR, "NLMSG_ERROR contains error %d", err->error); > + goto out; > + } > + } > + } > + > + ret = 0; > + > +out: > + free(buff); > + if (ret) { > + close(cps->sock_fd); > + cps->sock_fd = -1; > + } > + return ret; > +} > + > +void colo_proxy_teardown(libxl__colo_proxy_state *cps) > +{ > + if (cps->sock_fd >= 0) { > + close(cps->sock_fd); > + cps->sock_fd = -1; > + } > +} > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h > index 3af5fdd..3b44b09 100644 > --- a/tools/libxl/libxl_internal.h > +++ b/tools/libxl/libxl_internal.h > @@ -3112,6 +3112,15 @@ libxl__stream_read_inuse(const libxl__stream_read_state *stream) > } > > /*----- colo related state structure -----*/ > +typedef struct libxl__colo_proxy_state libxl__colo_proxy_state; > +struct libxl__colo_proxy_state { > + /* set by caller of colo_proxy_setup */ > + libxl__ao *ao; > + > + int sock_fd; > + int index; > +}; > + > typedef struct libxl__colo_save_state libxl__colo_save_state; > struct libxl__colo_save_state { > int send_fd; > @@ -3126,6 +3135,9 @@ struct libxl__colo_save_state { > /* private, used by qdisk block replication */ > bool qdisk_used; > bool qdisk_setuped; > + > + /* private, used by colo-proxy */ > + libxl__colo_proxy_state cps; > }; > > /*----- Domain suspend (save) state structure -----*/ > @@ -3535,6 +3547,9 @@ struct libxl__colo_restore_state { > bool qdisk_setuped; > const char *host; > const char *port; > + > + /* private, used by colo-proxy */ > + libxl__colo_proxy_state cps; > }; > > struct libxl__domain_create_state { > -- > 2.5.0 > > >
On 03/12/2016 06:25 AM, Konrad Rzeszutek Wilk wrote: >> +extern int colo_proxy_setup(libxl__colo_proxy_state *cps); >> +extern void colo_proxy_teardown(libxl__colo_proxy_state *cps); >> #endif >> diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c >> new file mode 100644 >> index 0000000..e07e640 >> --- /dev/null >> +++ b/tools/libxl/libxl_colo_proxy.c >> @@ -0,0 +1,230 @@ >> +/* >> + * Copyright (C) 2015 FUJITSU LIMITED > > 2016? Yes, I will check all new files. >> + * Author: Yang Hongyang <hongyang.yang@easystack.cn> >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU Lesser General Public License as published >> + * by the Free Software Foundation; version 2.1 only. with the special >> + * exception on linking described in file LICENSE. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU Lesser General Public License for more details. >> + */ >> + >> +#include "libxl_osdeps.h" /* must come before any other headers */ >> + >> +#include "libxl_internal.h" >> +#include "libxl_colo.h" >> +#include <linux/netlink.h> >> + >> +#define NETLINK_COLO 28 > > Can you include a comment why 28? Why not 31415? OK, will add a comment to describe it. > >> + >> +enum colo_netlink_op { >> + COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1), >> + COLO_CHECKPOINT, >> + COLO_FAILOVER, >> + COLO_PROXY_INIT, >> + COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */ >> +}; >> + >> +/* ========= colo-proxy: helper functions ========== */ >> + >> +static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff, >> + uint64_t size, int type) >> +{ >> + struct sockaddr_nl sa; >> + struct nlmsghdr msg; >> + struct iovec iov; >> + struct msghdr mh; >> + int ret; >> + >> + STATE_AO_GC(cps->ao); >> + >> + memset(&sa, 0, sizeof(sa)); >> + sa.nl_family = AF_NETLINK; >> + sa.nl_pid = 0; >> + sa.nl_groups = 0; >> + >> + msg.nlmsg_len = NLMSG_SPACE(0); >> + msg.nlmsg_flags = NLM_F_REQUEST; >> + if (type == COLO_PROXY_INIT) { >> + msg.nlmsg_flags |= NLM_F_ACK; >> + } > > I don't think you need the { }? Yes, will fix it in the next version. > > Ah, yup: > > 5. Block structure > > Every indented statement is braced apart from blocks that contain just > one statement. > >> + msg.nlmsg_seq = 0; >> + /* This is untrusty */ > > Umm, can you be more specific pls? > >> + msg.nlmsg_pid = cps->index; >> + msg.nlmsg_type = type; >> + >> + iov.iov_base = &msg; >> + iov.iov_len = msg.nlmsg_len; >> + >> + mh.msg_name = &sa; >> + mh.msg_namelen = sizeof(sa); >> + mh.msg_iov = &iov; >> + mh.msg_iovlen = 1; >> + mh.msg_control = NULL; >> + mh.msg_controllen = 0; >> + mh.msg_flags = 0; >> + >> + ret = sendmsg(cps->sock_fd, &mh, 0); >> + if (ret <= 0) { >> + LOG(ERROR, "can't send msg to kernel by netlink: %s", >> + strerror(errno)); >> + } >> + >> + return ret; >> +} >> + >> +/* error: return -1, otherwise return 0 */ >> +static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff, >> + unsigned int timeout_us) >> +{ >> + struct sockaddr_nl sa; >> + struct iovec iov; >> + struct msghdr mh = { >> + .msg_name = &sa, >> + .msg_namelen = sizeof(sa), >> + .msg_iov = &iov, >> + .msg_iovlen = 1, >> + }; >> + struct timeval tv; >> + uint32_t size = 16384; >> + int64_t len = 0; >> + int ret; >> + >> + STATE_AO_GC(cps->ao); >> + uint8_t *tmp = libxl__malloc(NOGC, size); >> + >> + if (timeout_us) { >> + tv.tv_sec = timeout_us / 1000000; >> + tv.tv_usec = timeout_us % 1000000; >> + setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); >> + } >> + >> + iov.iov_base = tmp; >> + iov.iov_len = size; >> +next: >> + ret = recvmsg(cps->sock_fd, &mh, 0); >> + if (ret <= 0) { >> + if (errno != EAGAIN && errno != EWOULDBLOCK) > > -EINTR ? IIRC, WAGAIN and EWOULDBLOCK may have different value in some system. EINTR is not handled here. > >> + LOGE(ERROR, "can't recv msg from kernel by netlink"); >> + goto err; >> + } >> + >> + len += ret; >> + if (mh.msg_flags & MSG_TRUNC) { >> + size += 16384; >> + tmp = libxl__realloc(NOGC, tmp, size); > > You really should check 'tmp'. > > If this loop continues on for some time the 'size' may be > in milions and this realloc will fail. OK, will fix it in the next version. > >> + iov.iov_base = tmp + len; >> + iov.iov_len = size - len; >> + goto next; > >> + } >> + >> + *buff = tmp; >> + ret = len; >> + goto out; >> + >> +err: >> + free(tmp); >> + *buff = NULL; >> + >> +out: >> + if (timeout_us) { >> + tv.tv_sec = 0; >> + tv.tv_usec = 0; >> + setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); >> + } >> + return ret; >> +} >> + >> +/* ========= colo-proxy: setup and teardown ========== */ >> + >> +int colo_proxy_setup(libxl__colo_proxy_state *cps) >> +{ >> + int skfd = 0; >> + struct sockaddr_nl sa; >> + struct nlmsghdr *h; >> + int i = 1; >> + int ret = ERROR_FAIL; >> + uint8_t *buff = NULL; >> + int64_t size; >> + >> + STATE_AO_GC(cps->ao); >> + >> + skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO); >> + if (skfd < 0) { >> + LOG(ERROR, "can not create a netlink socket: %s", strerror(errno)); >> + goto out; >> + } >> + cps->sock_fd = skfd; >> + memset(&sa, 0, sizeof(sa)); >> + sa.nl_family = AF_NETLINK; >> + sa.nl_groups = 0; >> +retry: >> + sa.nl_pid = i++; >> + >> + if (i > 10) { >> + LOG(ERROR, "netlink bind error"); >> + goto out; >> + } >> + >> + ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa)); >> + if (ret < 0 && errno == EADDRINUSE) { >> + LOG(ERROR, "colo index %d has already in used", sa.nl_pid); >> + goto retry; >> + } else if (ret < 0) { >> + LOG(ERROR, "netlink bind error"); >> + goto out; >> + } >> + >> + cps->index = sa.nl_pid; >> + ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT); >> + if (ret < 0) { >> + goto out; >> + } > > Ditto. You can remove it. OK, will check all codes. Thanks Wen Congyang > >> + /* receive ack */ >> + size = colo_proxy_recv(cps, &buff, 500000); >> + if (size < 0) { >> + LOG(ERROR, "Can't recv msg from kernel by netlink: %s", >> + strerror(errno)); >> + goto out; >> + } >> + >> + if (size) { >> + h = (struct nlmsghdr *)buff; >> + if (h->nlmsg_type == NLMSG_ERROR) { >> + /* ack's type is NLMSG_ERROR */ >> + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); >> + >> + if (size - sizeof(*h) < sizeof(*err)) { >> + LOG(ERROR, "NLMSG_LENGTH is too short"); >> + goto out; >> + } >> + >> + if (err->error) { >> + LOG(ERROR, "NLMSG_ERROR contains error %d", err->error); >> + goto out; >> + } >> + } >> + } >> + >> + ret = 0; >> + >> +out: >> + free(buff); >> + if (ret) { >> + close(cps->sock_fd); >> + cps->sock_fd = -1; >> + } >> + return ret; >> +} >> + >> +void colo_proxy_teardown(libxl__colo_proxy_state *cps) >> +{ >> + if (cps->sock_fd >= 0) { >> + close(cps->sock_fd); >> + cps->sock_fd = -1; >> + } >> +} >> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h >> index 3af5fdd..3b44b09 100644 >> --- a/tools/libxl/libxl_internal.h >> +++ b/tools/libxl/libxl_internal.h >> @@ -3112,6 +3112,15 @@ libxl__stream_read_inuse(const libxl__stream_read_state *stream) >> } >> >> /*----- colo related state structure -----*/ >> +typedef struct libxl__colo_proxy_state libxl__colo_proxy_state; >> +struct libxl__colo_proxy_state { >> + /* set by caller of colo_proxy_setup */ >> + libxl__ao *ao; >> + >> + int sock_fd; >> + int index; >> +}; >> + >> typedef struct libxl__colo_save_state libxl__colo_save_state; >> struct libxl__colo_save_state { >> int send_fd; >> @@ -3126,6 +3135,9 @@ struct libxl__colo_save_state { >> /* private, used by qdisk block replication */ >> bool qdisk_used; >> bool qdisk_setuped; >> + >> + /* private, used by colo-proxy */ >> + libxl__colo_proxy_state cps; >> }; >> >> /*----- Domain suspend (save) state structure -----*/ >> @@ -3535,6 +3547,9 @@ struct libxl__colo_restore_state { >> bool qdisk_setuped; >> const char *host; >> const char *port; >> + >> + /* private, used by colo-proxy */ >> + libxl__colo_proxy_state cps; >> }; >> >> struct libxl__domain_create_state { >> -- >> 2.5.0 >> >> >> > > > . >
On 03/14/2016 05:13 PM, Wen Congyang wrote: > On 03/12/2016 06:25 AM, Konrad Rzeszutek Wilk wrote: >>> +extern int colo_proxy_setup(libxl__colo_proxy_state *cps); >>> +extern void colo_proxy_teardown(libxl__colo_proxy_state *cps); >>> #endif >>> diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c >>> new file mode 100644 >>> index 0000000..e07e640 >>> --- /dev/null >>> +++ b/tools/libxl/libxl_colo_proxy.c >>> @@ -0,0 +1,230 @@ >>> +/* >>> + * Copyright (C) 2015 FUJITSU LIMITED >> >> 2016? > > Yes, I will check all new files. > >>> + * Author: Yang Hongyang <hongyang.yang@easystack.cn> >>> + * >>> + * This program is free software; you can redistribute it and/or modify >>> + * it under the terms of the GNU Lesser General Public License as published >>> + * by the Free Software Foundation; version 2.1 only. with the special >>> + * exception on linking described in file LICENSE. >>> + * >>> + * This program is distributed in the hope that it will be useful, >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >>> + * GNU Lesser General Public License for more details. >>> + */ >>> + >>> +#include "libxl_osdeps.h" /* must come before any other headers */ >>> + >>> +#include "libxl_internal.h" >>> +#include "libxl_colo.h" >>> +#include <linux/netlink.h> >>> + >>> +#define NETLINK_COLO 28 >> >> Can you include a comment why 28? Why not 31415? > > OK, will add a comment to describe it. > >> >>> + >>> +enum colo_netlink_op { >>> + COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1), >>> + COLO_CHECKPOINT, >>> + COLO_FAILOVER, >>> + COLO_PROXY_INIT, >>> + COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */ >>> +}; >>> + >>> +/* ========= colo-proxy: helper functions ========== */ >>> + >>> +static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff, >>> + uint64_t size, int type) >>> +{ >>> + struct sockaddr_nl sa; >>> + struct nlmsghdr msg; >>> + struct iovec iov; >>> + struct msghdr mh; >>> + int ret; >>> + >>> + STATE_AO_GC(cps->ao); >>> + >>> + memset(&sa, 0, sizeof(sa)); >>> + sa.nl_family = AF_NETLINK; >>> + sa.nl_pid = 0; >>> + sa.nl_groups = 0; >>> + >>> + msg.nlmsg_len = NLMSG_SPACE(0); >>> + msg.nlmsg_flags = NLM_F_REQUEST; >>> + if (type == COLO_PROXY_INIT) { >>> + msg.nlmsg_flags |= NLM_F_ACK; >>> + } >> >> I don't think you need the { }? > > Yes, will fix it in the next version. > >> >> Ah, yup: >> >> 5. Block structure >> >> Every indented statement is braced apart from blocks that contain just >> one statement. >> >>> + msg.nlmsg_seq = 0; >>> + /* This is untrusty */ >> >> Umm, can you be more specific pls? >> >>> + msg.nlmsg_pid = cps->index; >>> + msg.nlmsg_type = type; >>> + >>> + iov.iov_base = &msg; >>> + iov.iov_len = msg.nlmsg_len; >>> + >>> + mh.msg_name = &sa; >>> + mh.msg_namelen = sizeof(sa); >>> + mh.msg_iov = &iov; >>> + mh.msg_iovlen = 1; >>> + mh.msg_control = NULL; >>> + mh.msg_controllen = 0; >>> + mh.msg_flags = 0; >>> + >>> + ret = sendmsg(cps->sock_fd, &mh, 0); >>> + if (ret <= 0) { >>> + LOG(ERROR, "can't send msg to kernel by netlink: %s", >>> + strerror(errno)); >>> + } >>> + >>> + return ret; >>> +} >>> + >>> +/* error: return -1, otherwise return 0 */ >>> +static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff, >>> + unsigned int timeout_us) >>> +{ >>> + struct sockaddr_nl sa; >>> + struct iovec iov; >>> + struct msghdr mh = { >>> + .msg_name = &sa, >>> + .msg_namelen = sizeof(sa), >>> + .msg_iov = &iov, >>> + .msg_iovlen = 1, >>> + }; >>> + struct timeval tv; >>> + uint32_t size = 16384; >>> + int64_t len = 0; >>> + int ret; >>> + >>> + STATE_AO_GC(cps->ao); >>> + uint8_t *tmp = libxl__malloc(NOGC, size); >>> + >>> + if (timeout_us) { >>> + tv.tv_sec = timeout_us / 1000000; >>> + tv.tv_usec = timeout_us % 1000000; >>> + setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); >>> + } >>> + >>> + iov.iov_base = tmp; >>> + iov.iov_len = size; >>> +next: >>> + ret = recvmsg(cps->sock_fd, &mh, 0); >>> + if (ret <= 0) { >>> + if (errno != EAGAIN && errno != EWOULDBLOCK) >> >> -EINTR ? > > IIRC, WAGAIN and EWOULDBLOCK may have different value in some system. > EINTR is not handled here. > >> >>> + LOGE(ERROR, "can't recv msg from kernel by netlink"); >>> + goto err; >>> + } >>> + >>> + len += ret; >>> + if (mh.msg_flags & MSG_TRUNC) { >>> + size += 16384; >>> + tmp = libxl__realloc(NOGC, tmp, size); >> >> You really should check 'tmp'. >> >> If this loop continues on for some time the 'size' may be >> in milions and this realloc will fail. As i see from the code, libxl_realloc will invoke _exit(-1) if malloc failed, so we'll keep it. Thanks -Xie > > OK, will fix it in the next version. > >> >>> + iov.iov_base = tmp + len; >>> + iov.iov_len = size - len; >>> + goto next; >> >>> + } >>> + >>> + *buff = tmp; >>> + ret = len; >>> + goto out; >>> + >>> +err: >>> + free(tmp); >>> + *buff = NULL; >>> + >>> +out: >>> + if (timeout_us) { >>> + tv.tv_sec = 0; >>> + tv.tv_usec = 0; >>> + setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); >>> + } >>> + return ret; >>> +} >>> + >>> +/* ========= colo-proxy: setup and teardown ========== */ >>> + >>> +int colo_proxy_setup(libxl__colo_proxy_state *cps) >>> +{ >>> + int skfd = 0; >>> + struct sockaddr_nl sa; >>> + struct nlmsghdr *h; >>> + int i = 1; >>> + int ret = ERROR_FAIL; >>> + uint8_t *buff = NULL; >>> + int64_t size; >>> + >>> + STATE_AO_GC(cps->ao); >>> + >>> + skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO); >>> + if (skfd < 0) { >>> + LOG(ERROR, "can not create a netlink socket: %s", strerror(errno)); >>> + goto out; >>> + } >>> + cps->sock_fd = skfd; >>> + memset(&sa, 0, sizeof(sa)); >>> + sa.nl_family = AF_NETLINK; >>> + sa.nl_groups = 0; >>> +retry: >>> + sa.nl_pid = i++; >>> + >>> + if (i > 10) { >>> + LOG(ERROR, "netlink bind error"); >>> + goto out; >>> + } >>> + >>> + ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa)); >>> + if (ret < 0 && errno == EADDRINUSE) { >>> + LOG(ERROR, "colo index %d has already in used", sa.nl_pid); >>> + goto retry; >>> + } else if (ret < 0) { >>> + LOG(ERROR, "netlink bind error"); >>> + goto out; >>> + } >>> + >>> + cps->index = sa.nl_pid; >>> + ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT); >>> + if (ret < 0) { >>> + goto out; >>> + } >> >> Ditto. You can remove it. > > OK, will check all codes. > > Thanks > Wen Congyang > >> >>> + /* receive ack */ >>> + size = colo_proxy_recv(cps, &buff, 500000); >>> + if (size < 0) { >>> + LOG(ERROR, "Can't recv msg from kernel by netlink: %s", >>> + strerror(errno)); >>> + goto out; >>> + } >>> + >>> + if (size) { >>> + h = (struct nlmsghdr *)buff; >>> + if (h->nlmsg_type == NLMSG_ERROR) { >>> + /* ack's type is NLMSG_ERROR */ >>> + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); >>> + >>> + if (size - sizeof(*h) < sizeof(*err)) { >>> + LOG(ERROR, "NLMSG_LENGTH is too short"); >>> + goto out; >>> + } >>> + >>> + if (err->error) { >>> + LOG(ERROR, "NLMSG_ERROR contains error %d", err->error); >>> + goto out; >>> + } >>> + } >>> + } >>> + >>> + ret = 0; >>> + >>> +out: >>> + free(buff); >>> + if (ret) { >>> + close(cps->sock_fd); >>> + cps->sock_fd = -1; >>> + } >>> + return ret; >>> +} >>> + >>> +void colo_proxy_teardown(libxl__colo_proxy_state *cps) >>> +{ >>> + if (cps->sock_fd >= 0) { >>> + close(cps->sock_fd); >>> + cps->sock_fd = -1; >>> + } >>> +} >>> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h >>> index 3af5fdd..3b44b09 100644 >>> --- a/tools/libxl/libxl_internal.h >>> +++ b/tools/libxl/libxl_internal.h >>> @@ -3112,6 +3112,15 @@ libxl__stream_read_inuse(const libxl__stream_read_state *stream) >>> } >>> >>> /*----- colo related state structure -----*/ >>> +typedef struct libxl__colo_proxy_state libxl__colo_proxy_state; >>> +struct libxl__colo_proxy_state { >>> + /* set by caller of colo_proxy_setup */ >>> + libxl__ao *ao; >>> + >>> + int sock_fd; >>> + int index; >>> +}; >>> + >>> typedef struct libxl__colo_save_state libxl__colo_save_state; >>> struct libxl__colo_save_state { >>> int send_fd; >>> @@ -3126,6 +3135,9 @@ struct libxl__colo_save_state { >>> /* private, used by qdisk block replication */ >>> bool qdisk_used; >>> bool qdisk_setuped; >>> + >>> + /* private, used by colo-proxy */ >>> + libxl__colo_proxy_state cps; >>> }; >>> >>> /*----- Domain suspend (save) state structure -----*/ >>> @@ -3535,6 +3547,9 @@ struct libxl__colo_restore_state { >>> bool qdisk_setuped; >>> const char *host; >>> const char *port; >>> + >>> + /* private, used by colo-proxy */ >>> + libxl__colo_proxy_state cps; >>> }; >>> >>> struct libxl__domain_create_state { >>> -- >>> 2.5.0 >>> >>> >>> >> >> >> . >> > > . >
diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile index 28d54d0..6fea9e0 100644 --- a/tools/libxl/Makefile +++ b/tools/libxl/Makefile @@ -67,6 +67,7 @@ endif LIBXL_OBJS-y += libxl_remus.o libxl_checkpoint_device.o libxl_remus_disk_drbd.o LIBXL_OBJS-y += libxl_colo_restore.o libxl_colo_save.o LIBXL_OBJS-y += libxl_colo_qdisk.o +LIBXL_OBJS-y += libxl_colo_proxy.o LIBXL_OBJS-$(CONFIG_X86) += libxl_cpuid.o libxl_x86.o libxl_psr.o LIBXL_OBJS-$(CONFIG_ARM) += libxl_nocpuid.o libxl_arm.o libxl_libfdt_compat.o diff --git a/tools/libxl/libxl_colo.h b/tools/libxl/libxl_colo.h index b08f672..6d8cd4f 100644 --- a/tools/libxl/libxl_colo.h +++ b/tools/libxl/libxl_colo.h @@ -27,4 +27,6 @@ extern void libxl__colo_save_teardown(libxl__egc *egc, libxl__colo_save_state *css, int rc); +extern int colo_proxy_setup(libxl__colo_proxy_state *cps); +extern void colo_proxy_teardown(libxl__colo_proxy_state *cps); #endif diff --git a/tools/libxl/libxl_colo_proxy.c b/tools/libxl/libxl_colo_proxy.c new file mode 100644 index 0000000..e07e640 --- /dev/null +++ b/tools/libxl/libxl_colo_proxy.c @@ -0,0 +1,230 @@ +/* + * Copyright (C) 2015 FUJITSU LIMITED + * Author: Yang Hongyang <hongyang.yang@easystack.cn> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "libxl_osdeps.h" /* must come before any other headers */ + +#include "libxl_internal.h" +#include "libxl_colo.h" +#include <linux/netlink.h> + +#define NETLINK_COLO 28 + +enum colo_netlink_op { + COLO_QUERY_CHECKPOINT = (NLMSG_MIN_TYPE + 1), + COLO_CHECKPOINT, + COLO_FAILOVER, + COLO_PROXY_INIT, + COLO_PROXY_RESET, /* UNUSED, will be used for continuous FT */ +}; + +/* ========= colo-proxy: helper functions ========== */ + +static int colo_proxy_send(libxl__colo_proxy_state *cps, uint8_t *buff, + uint64_t size, int type) +{ + struct sockaddr_nl sa; + struct nlmsghdr msg; + struct iovec iov; + struct msghdr mh; + int ret; + + STATE_AO_GC(cps->ao); + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + sa.nl_pid = 0; + sa.nl_groups = 0; + + msg.nlmsg_len = NLMSG_SPACE(0); + msg.nlmsg_flags = NLM_F_REQUEST; + if (type == COLO_PROXY_INIT) { + msg.nlmsg_flags |= NLM_F_ACK; + } + msg.nlmsg_seq = 0; + /* This is untrusty */ + msg.nlmsg_pid = cps->index; + msg.nlmsg_type = type; + + iov.iov_base = &msg; + iov.iov_len = msg.nlmsg_len; + + mh.msg_name = &sa; + mh.msg_namelen = sizeof(sa); + mh.msg_iov = &iov; + mh.msg_iovlen = 1; + mh.msg_control = NULL; + mh.msg_controllen = 0; + mh.msg_flags = 0; + + ret = sendmsg(cps->sock_fd, &mh, 0); + if (ret <= 0) { + LOG(ERROR, "can't send msg to kernel by netlink: %s", + strerror(errno)); + } + + return ret; +} + +/* error: return -1, otherwise return 0 */ +static int64_t colo_proxy_recv(libxl__colo_proxy_state *cps, uint8_t **buff, + unsigned int timeout_us) +{ + struct sockaddr_nl sa; + struct iovec iov; + struct msghdr mh = { + .msg_name = &sa, + .msg_namelen = sizeof(sa), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + struct timeval tv; + uint32_t size = 16384; + int64_t len = 0; + int ret; + + STATE_AO_GC(cps->ao); + uint8_t *tmp = libxl__malloc(NOGC, size); + + if (timeout_us) { + tv.tv_sec = timeout_us / 1000000; + tv.tv_usec = timeout_us % 1000000; + setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + } + + iov.iov_base = tmp; + iov.iov_len = size; +next: + ret = recvmsg(cps->sock_fd, &mh, 0); + if (ret <= 0) { + if (errno != EAGAIN && errno != EWOULDBLOCK) + LOGE(ERROR, "can't recv msg from kernel by netlink"); + goto err; + } + + len += ret; + if (mh.msg_flags & MSG_TRUNC) { + size += 16384; + tmp = libxl__realloc(NOGC, tmp, size); + iov.iov_base = tmp + len; + iov.iov_len = size - len; + goto next; + } + + *buff = tmp; + ret = len; + goto out; + +err: + free(tmp); + *buff = NULL; + +out: + if (timeout_us) { + tv.tv_sec = 0; + tv.tv_usec = 0; + setsockopt(cps->sock_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + } + return ret; +} + +/* ========= colo-proxy: setup and teardown ========== */ + +int colo_proxy_setup(libxl__colo_proxy_state *cps) +{ + int skfd = 0; + struct sockaddr_nl sa; + struct nlmsghdr *h; + int i = 1; + int ret = ERROR_FAIL; + uint8_t *buff = NULL; + int64_t size; + + STATE_AO_GC(cps->ao); + + skfd = socket(PF_NETLINK, SOCK_RAW, NETLINK_COLO); + if (skfd < 0) { + LOG(ERROR, "can not create a netlink socket: %s", strerror(errno)); + goto out; + } + cps->sock_fd = skfd; + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + sa.nl_groups = 0; +retry: + sa.nl_pid = i++; + + if (i > 10) { + LOG(ERROR, "netlink bind error"); + goto out; + } + + ret = bind(skfd, (struct sockaddr *)&sa, sizeof(sa)); + if (ret < 0 && errno == EADDRINUSE) { + LOG(ERROR, "colo index %d has already in used", sa.nl_pid); + goto retry; + } else if (ret < 0) { + LOG(ERROR, "netlink bind error"); + goto out; + } + + cps->index = sa.nl_pid; + ret = colo_proxy_send(cps, NULL, 0, COLO_PROXY_INIT); + if (ret < 0) { + goto out; + } + /* receive ack */ + size = colo_proxy_recv(cps, &buff, 500000); + if (size < 0) { + LOG(ERROR, "Can't recv msg from kernel by netlink: %s", + strerror(errno)); + goto out; + } + + if (size) { + h = (struct nlmsghdr *)buff; + if (h->nlmsg_type == NLMSG_ERROR) { + /* ack's type is NLMSG_ERROR */ + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); + + if (size - sizeof(*h) < sizeof(*err)) { + LOG(ERROR, "NLMSG_LENGTH is too short"); + goto out; + } + + if (err->error) { + LOG(ERROR, "NLMSG_ERROR contains error %d", err->error); + goto out; + } + } + } + + ret = 0; + +out: + free(buff); + if (ret) { + close(cps->sock_fd); + cps->sock_fd = -1; + } + return ret; +} + +void colo_proxy_teardown(libxl__colo_proxy_state *cps) +{ + if (cps->sock_fd >= 0) { + close(cps->sock_fd); + cps->sock_fd = -1; + } +} diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 3af5fdd..3b44b09 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -3112,6 +3112,15 @@ libxl__stream_read_inuse(const libxl__stream_read_state *stream) } /*----- colo related state structure -----*/ +typedef struct libxl__colo_proxy_state libxl__colo_proxy_state; +struct libxl__colo_proxy_state { + /* set by caller of colo_proxy_setup */ + libxl__ao *ao; + + int sock_fd; + int index; +}; + typedef struct libxl__colo_save_state libxl__colo_save_state; struct libxl__colo_save_state { int send_fd; @@ -3126,6 +3135,9 @@ struct libxl__colo_save_state { /* private, used by qdisk block replication */ bool qdisk_used; bool qdisk_setuped; + + /* private, used by colo-proxy */ + libxl__colo_proxy_state cps; }; /*----- Domain suspend (save) state structure -----*/ @@ -3535,6 +3547,9 @@ struct libxl__colo_restore_state { bool qdisk_setuped; const char *host; const char *port; + + /* private, used by colo-proxy */ + libxl__colo_proxy_state cps; }; struct libxl__domain_create_state {