diff mbox series

[PULL,40/65] migration/rdma: Fix error handling around rdma_getaddrinfo()

Message ID 20231011092203.1266-41-quintela@redhat.com (mailing list archive)
State New, archived
Headers show
Series [PULL,01/65] migration/qmp: Fix crash on setting tls-authz with null | expand

Commit Message

Juan Quintela Oct. 11, 2023, 9:21 a.m. UTC
From: Markus Armbruster <armbru@redhat.com>

qemu_rdma_resolve_host() and qemu_rdma_dest_init() iterate over
addresses to find one that works, holding onto the first Error from
qemu_rdma_broken_ipv6_kernel() for use when no address works.  Issues:

1. If @errp was &error_abort or &error_fatal, we'd terminate instead
   of trying the next address.  Can't actually happen, since no caller
   passes these arguments.

2. When @errp is a pointer to a variable containing NULL, and
   qemu_rdma_broken_ipv6_kernel() fails, the variable no longer
   contains NULL.  Subsequent iterations pass it again, violating
   Error usage rules.  Dangerous, as setting an error would then trip
   error_setv()'s assertion.  Works only because
   qemu_rdma_broken_ipv6_kernel() and the code following the loops
   carefully avoids setting a second error.

3. If qemu_rdma_broken_ipv6_kernel() fails, and then a later iteration
   finds a working address, @errp still holds the first error from
   qemu_rdma_broken_ipv6_kernel().  If we then run into another error,
   we report the qemu_rdma_broken_ipv6_kernel() failure instead.

4. If we don't run into another error, we leak the Error object.

Use a local error variable, and propagate to @errp.  This fixes 3. and
also cleans up 1 and partly 2.

Free this error when we have a working address.  This fixes 4.

Pass the local error variable to qemu_rdma_broken_ipv6_kernel() only
until it fails.  Pass null on any later iterations.  This cleans up
the remainder of 2.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20230928132019.2544702-34-armbru@redhat.com>
---
 migration/rdma.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/migration/rdma.c b/migration/rdma.c
index 4074509f06..3fb899f963 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -932,6 +932,7 @@  static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
  */
 static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
 {
+    Error *err = NULL;
     int ret;
     struct rdma_addrinfo *res;
     char port_str[16];
@@ -976,7 +977,10 @@  static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
         goto err_resolve_get_addr;
     }
 
+    /* Try all addresses, saving the first error in @err */
     for (e = res; e != NULL; e = e->ai_next) {
+        Error **local_errp = err ? NULL : &err;
+
         inet_ntop(e->ai_family,
             &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
         trace_qemu_rdma_resolve_host_trying(rdma->host, ip);
@@ -985,17 +989,21 @@  static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
                 RDMA_RESOLVE_TIMEOUT_MS);
         if (ret >= 0) {
             if (e->ai_family == AF_INET6) {
-                ret = qemu_rdma_broken_ipv6_kernel(rdma->cm_id->verbs, errp);
+                ret = qemu_rdma_broken_ipv6_kernel(rdma->cm_id->verbs,
+                                                   local_errp);
                 if (ret < 0) {
                     continue;
                 }
             }
+            error_free(err);
             goto route;
         }
     }
 
     rdma_freeaddrinfo(res);
-    if (errp && !*errp) {
+    if (err) {
+        error_propagate(errp, err);
+    } else {
         error_setg(errp, "RDMA ERROR: could not resolve address %s",
                    rdma->host);
     }
@@ -2707,6 +2715,7 @@  err_rdma_source_connect:
 
 static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
 {
+    Error *err = NULL;
     int ret, idx;
     struct rdma_cm_id *listen_id;
     char ip[40] = "unknown";
@@ -2765,7 +2774,11 @@  static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
         }
         goto err_dest_init_bind_addr;
     }
+
+    /* Try all addresses, saving the first error in @err */
     for (e = res; e != NULL; e = e->ai_next) {
+        Error **local_errp = err ? NULL : &err;
+
         inet_ntop(e->ai_family,
             &((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
         trace_qemu_rdma_dest_init_trying(rdma->host, ip);
@@ -2774,17 +2787,21 @@  static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
             continue;
         }
         if (e->ai_family == AF_INET6) {
-            ret = qemu_rdma_broken_ipv6_kernel(listen_id->verbs, errp);
+            ret = qemu_rdma_broken_ipv6_kernel(listen_id->verbs,
+                                               local_errp);
             if (ret < 0) {
                 continue;
             }
         }
+        error_free(err);
         break;
     }
 
     rdma_freeaddrinfo(res);
     if (!e) {
-        if (errp && !*errp) {
+        if (err) {
+            error_propagate(errp, err);
+        } else {
             error_setg(errp, "RDMA ERROR: Error: could not rdma_bind_addr!");
         }
         goto err_dest_init_bind_addr;