diff mbox series

[v2,1/2] xl: Keep monitoring suspended domain

Message ID 20241126171941.170897-2-jason.andryuk@amd.com (mailing list archive)
State New
Headers show
Series xl suspend/resume commands | expand

Commit Message

Jason Andryuk Nov. 26, 2024, 5:19 p.m. UTC
When a VM transitioned to LIBXL_SHUTDOWN_REASON_SUSPEND, the xl daemon
was exiting as 0 = DOMAIN_RESTART_NONE "No domain restart".
Later, when the VM actually shutdown, the missing xl daemon meant the
domain wasn't cleaned up properly.

Add a new DOMAIN_RESTART_SUSPENDED to handle the case.  The xl daemon
keeps running to react to future shutdown events.

The domain death event needs to be re-enabled to catch subsequent
events.  The libxl_evgen_domain_death is moved from death_list to
death_reported, and then it isn't found on subsequent iterations through
death_list.  We enable the new event before disabling the old event, to
keep the xenstore watch active.  If it is unregistered and
re-registered, it'll fire immediately for our suspended domain which
will end up continuously re-triggering.

Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
---
 tools/xl/xl.h           |  1 +
 tools/xl/xl_vmcontrol.c | 18 +++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/tools/xl/xl.h b/tools/xl/xl.h
index 9c86bb1d98..967d034cfe 100644
--- a/tools/xl/xl.h
+++ b/tools/xl/xl.h
@@ -301,6 +301,7 @@  typedef enum {
     DOMAIN_RESTART_NORMAL,       /* Domain should be restarted */
     DOMAIN_RESTART_RENAME,       /* Domain should be renamed and restarted */
     DOMAIN_RESTART_SOFT_RESET,   /* Soft reset should be performed */
+    DOMAIN_RESTART_SUSPENDED,    /* Domain suspended - keep looping */
 } domain_restart_type;
 
 extern void printf_info_sexp(int domid, libxl_domain_config *d_config, FILE *fh);
diff --git a/tools/xl/xl_vmcontrol.c b/tools/xl/xl_vmcontrol.c
index fa1a4420e3..c45d497c28 100644
--- a/tools/xl/xl_vmcontrol.c
+++ b/tools/xl/xl_vmcontrol.c
@@ -417,7 +417,7 @@  static domain_restart_type handle_domain_death(uint32_t *r_domid,
         break;
     case LIBXL_SHUTDOWN_REASON_SUSPEND:
         LOG("Domain has suspended.");
-        return 0;
+        return DOMAIN_RESTART_SUSPENDED;
     case LIBXL_SHUTDOWN_REASON_CRASH:
         action = d_config->on_crash;
         break;
@@ -1030,6 +1030,7 @@  start:
         }
     }
     while (1) {
+        libxl_evgen_domain_death *deathw2 = NULL;
         libxl_event *event;
         ret = domain_wait_event(domid, &event);
         if (ret) goto out;
@@ -1100,9 +1101,24 @@  start:
                 ret = 0;
                 goto out;
 
+            case DOMAIN_RESTART_SUSPENDED:
+                LOG("Continue waiting for domain %u", domid);
+                /*
+                 * Enable a new event before disabling the old.  This ensures
+                 * the xenstore watch remains active.  Otherwise it'll fire
+                 * immediately on re-registration and find our suspended domain.
+                 */
+                ret = libxl_evenable_domain_death(ctx, domid, 0, &deathw2);
+                if (ret) goto out;
+                libxl_evdisable_domain_death(ctx, deathw);
+                deathw = deathw2;
+                deathw2 = NULL;
+                break;
+
             default:
                 abort();
             }
+            break;
 
         case LIBXL_EVENT_TYPE_DOMAIN_DEATH:
             LOG("Domain %u has been destroyed.", domid);