@@ -767,149 +767,6 @@ static void qemu_rdma_dump_gid(const char *who, struct rdma_cm_id *id)
trace_qemu_rdma_dump_gid(who, sgid, dgid);
}
-/*
- * As of now, IPv6 over RoCE / iWARP is not supported by linux.
- * We will try the next addrinfo struct, and fail if there are
- * no other valid addresses to bind against.
- *
- * If user is listening on '[::]', then we will not have a opened a device
- * yet and have no way of verifying if the device is RoCE or not.
- *
- * In this case, the source VM will throw an error for ALL types of
- * connections (both IPv4 and IPv6) if the destination machine does not have
- * a regular infiniband network available for use.
- *
- * The only way to guarantee that an error is thrown for broken kernels is
- * for the management software to choose a *specific* interface at bind time
- * and validate what time of hardware it is.
- *
- * Unfortunately, this puts the user in a fix:
- *
- * If the source VM connects with an IPv4 address without knowing that the
- * destination has bound to '[::]' the migration will unconditionally fail
- * unless the management software is explicitly listening on the IPv4
- * address while using a RoCE-based device.
- *
- * If the source VM connects with an IPv6 address, then we're OK because we can
- * throw an error on the source (and similarly on the destination).
- *
- * But in mixed environments, this will be broken for a while until it is fixed
- * inside linux.
- *
- * We do provide a *tiny* bit of help in this function: We can list all of the
- * devices in the system and check to see if all the devices are RoCE or
- * Infiniband.
- *
- * If we detect that we have a *pure* RoCE environment, then we can safely
- * thrown an error even if the management software has specified '[::]' as the
- * bind address.
- *
- * However, if there is are multiple hetergeneous devices, then we cannot make
- * this assumption and the user just has to be sure they know what they are
- * doing.
- *
- * Patches are being reviewed on linux-rdma.
- */
-static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
-{
- /* This bug only exists in linux, to our knowledge. */
-#ifdef CONFIG_LINUX
- struct ibv_port_attr port_attr;
-
- /*
- * Verbs are only NULL if management has bound to '[::]'.
- *
- * Let's iterate through all the devices and see if there any pure IB
- * devices (non-ethernet).
- *
- * If not, then we can safely proceed with the migration.
- * Otherwise, there are no guarantees until the bug is fixed in linux.
- */
- if (!verbs) {
- int num_devices;
- struct ibv_device **dev_list = ibv_get_device_list(&num_devices);
- bool roce_found = false;
- bool ib_found = false;
-
- for (int x = 0; x < num_devices; x++) {
- verbs = ibv_open_device(dev_list[x]);
- /*
- * ibv_open_device() is not documented to set errno. If
- * it does, it's somebody else's doc bug. If it doesn't,
- * the use of errno below is wrong.
- * TODO Find out whether ibv_open_device() sets errno.
- */
- if (!verbs) {
- if (errno == EPERM) {
- continue;
- } else {
- error_setg_errno(errp, errno,
- "could not open RDMA device context");
- return -1;
- }
- }
-
- if (ibv_query_port(verbs, 1, &port_attr)) {
- ibv_close_device(verbs);
- error_setg(errp,
- "RDMA ERROR: Could not query initial IB port");
- return -1;
- }
-
- if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) {
- ib_found = true;
- } else if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
- roce_found = true;
- }
-
- ibv_close_device(verbs);
-
- }
-
- if (roce_found) {
- if (ib_found) {
- warn_report("migrations may fail:"
- " IPv6 over RoCE / iWARP in linux"
- " is broken. But since you appear to have a"
- " mixed RoCE / IB environment, be sure to only"
- " migrate over the IB fabric until the kernel "
- " fixes the bug.");
- } else {
- error_setg(errp, "RDMA ERROR: "
- "You only have RoCE / iWARP devices in your systems"
- " and your management software has specified '[::]'"
- ", but IPv6 over RoCE / iWARP is not supported in Linux.");
- return -1;
- }
- }
-
- return 0;
- }
-
- /*
- * If we have a verbs context, that means that some other than '[::]' was
- * used by the management software for binding. In which case we can
- * actually warn the user about a potentially broken kernel.
- */
-
- /* IB ports start with 1, not 0 */
- if (ibv_query_port(verbs, 1, &port_attr)) {
- error_setg(errp, "RDMA ERROR: Could not query initial IB port");
- return -1;
- }
-
- if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
- error_setg(errp, "RDMA ERROR: "
- "Linux kernel's RoCE / iWARP does not support IPv6 "
- "(but patches on linux-rdma in progress)");
- return -1;
- }
-
-#endif
-
- return 0;
-}
-
/*
* Figure out which RDMA device corresponds to the requested IP hostname
* Also create the initial connection manager identifiers for opening
@@ -955,7 +812,6 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
/* Try all addresses, saving the first error in @err */
for (struct rdma_addrinfo *e = res; e != NULL; e = e->ai_next) {
- Error **local_errp = err ? NULL : &err;
inet_ntop(e->ai_family,
&((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
@@ -964,13 +820,6 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
ret = rdma_resolve_addr(rdma->cm_id, NULL, e->ai_dst_addr,
RDMA_RESOLVE_TIMEOUT_MS);
if (ret >= 0) {
- if (e->ai_family == AF_INET6) {
- ret = qemu_rdma_broken_ipv6_kernel(rdma->cm_id->verbs,
- local_errp);
- if (ret < 0) {
- continue;
- }
- }
error_free(err);
goto route;
}
@@ -2663,7 +2512,6 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
/* Try all addresses, saving the first error in @err */
for (e = res; e != NULL; e = e->ai_next) {
- Error **local_errp = err ? NULL : &err;
inet_ntop(e->ai_family,
&((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
@@ -2672,13 +2520,6 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
if (ret < 0) {
continue;
}
- if (e->ai_family == AF_INET6) {
- ret = qemu_rdma_broken_ipv6_kernel(listen_id->verbs,
- local_errp);
- if (ret < 0) {
- continue;
- }
- }
error_free(err);
break;
}