diff mbox series

[v12,26/27] tools/xenstore: handle dying domains in live update

Message ID 20210115083000.14186-27-jgross@suse.com (mailing list archive)
State New, archived
Headers show
Series tools/xenstore: support live update for xenstored | expand

Commit Message

Jürgen Groß Jan. 15, 2021, 8:29 a.m. UTC
From: Julien Grall <jgrall@amazon.com>

A domain could just be dying when live updating Xenstore, so the case
of not being able to map the ring page or to connect to the event
channel must be handled gracefully.

Signed-off-by: Julien Grall <jgrall@amazon.com>
Reviewed-by: Paul Durrant <paul@xen.org>
---
V4:
- new patch (Julien, I hope adding the Sob: is okay?)

V10:
- removed "XXX..." comment (Julien Grall)
---
 tools/xenstore/xenstored_control.c |  7 +++++++
 tools/xenstore/xenstored_domain.c  | 25 +++++++++++++++++--------
 tools/xenstore/xenstored_domain.h  |  2 ++
 3 files changed, 26 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/tools/xenstore/xenstored_control.c b/tools/xenstore/xenstored_control.c
index b8195eed41..3c212cc4d4 100644
--- a/tools/xenstore/xenstored_control.c
+++ b/tools/xenstore/xenstored_control.c
@@ -561,6 +561,13 @@  void lu_read_state(void)
 	lu_close_dump_state(&state);
 
 	talloc_free(ctx);
+
+	/*
+	 * We may have missed the VIRQ_DOM_EXC notification and a domain may
+	 * have died while we were live-updating. So check all the domains are
+	 * still alive.
+	 */
+	check_domains(true);
 }
 
 static const char *lu_activate_binary(const void *ctx)
diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c
index 6934f1bc89..cbeb2a309c 100644
--- a/tools/xenstore/xenstored_domain.c
+++ b/tools/xenstore/xenstored_domain.c
@@ -220,7 +220,7 @@  static bool get_domain_info(unsigned int domid, xc_dominfo_t *dominfo)
 	       dominfo->domid == domid;
 }
 
-static void domain_cleanup(void)
+void check_domains(bool restore)
 {
 	xc_dominfo_t dominfo;
 	struct domain *domain;
@@ -244,7 +244,14 @@  static void domain_cleanup(void)
 				domain->shutdown = true;
 				notify = 1;
 			}
-			if (!dominfo.dying)
+			/*
+			 * On Restore, we may have been unable to remap the
+			 * interface and the port. As we don't know whether
+			 * this was because of a dying domain, we need to
+			 * check if the interface and port are still valid.
+			 */
+			if (!dominfo.dying && domain->port &&
+			    domain->interface)
 				continue;
 		}
 		if (domain->conn) {
@@ -270,7 +277,7 @@  void handle_event(void)
 		barf_perror("Failed to read from event fd");
 
 	if (port == virq_port)
-		domain_cleanup();
+		check_domains(false);
 
 	if (xenevtchn_unmask(xce_handle, port) == -1)
 		barf_perror("Failed to write to event fd");
@@ -442,14 +449,16 @@  static struct domain *introduce_domain(const void *ctx,
 	if (!domain->introduced) {
 		interface = is_master_domain ? xenbus_map()
 					     : map_interface(domid);
-		if (!interface)
+		if (!interface && !restore)
 			return NULL;
 		if (new_domain(domain, port, restore)) {
 			rc = errno;
-			if (is_master_domain)
-				unmap_xenbus(interface);
-			else
-				unmap_interface(interface);
+			if (interface) {
+				if (is_master_domain)
+					unmap_xenbus(interface);
+				else
+					unmap_interface(interface);
+			}
 			errno = rc;
 			return NULL;
 		}
diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h
index 8f3b4e0f8b..1cc1c03ed8 100644
--- a/tools/xenstore/xenstored_domain.h
+++ b/tools/xenstore/xenstored_domain.h
@@ -21,6 +21,8 @@ 
 
 void handle_event(void);
 
+void check_domains(bool restore);
+
 /* domid, mfn, eventchn, path */
 int do_introduce(struct connection *conn, struct buffered_data *in);