diff mbox series

[1/2] libxl: Fix guest kexec - skip cpuid policy

Message ID 20230121213908.6504-2-jandryuk@gmail.com (mailing list archive)
State Superseded
Headers show
Series tools: guest kexec fixes | expand

Commit Message

Jason Andryuk Jan. 21, 2023, 9:39 p.m. UTC
When a domain performs a kexec (soft reset), libxl__build_pre() is
called with the existing domid.  Calling libxl__cpuid_legacy() on the
existing domain fails since the cpuid policy has already been set, and
the guest isn't rebuilt and doesn't kexec.

xc: error: Failed to set d1's policy (err leaf 0xffffffff, subleaf 0xffffffff, msr 0xffffffff) (17 = File exists): Internal error
libxl: error: libxl_cpuid.c:494:libxl__cpuid_legacy: Domain 1:Failed to apply CPUID policy: File exists
libxl: error: libxl_create.c:1641:domcreate_rebuild_done: Domain 1:cannot (re-)build domain: -3
libxl: error: libxl_xshelp.c:201:libxl__xs_read_mandatory: xenstore read failed: `/libxl/1/type': No such file or directory
libxl: warning: libxl_dom.c:49:libxl__domain_type: unable to get domain type for domid=1, assuming HVM

During a soft_reset, skip calling libxl__cpuid_legacy() to avoid the
issue.  Before the fixes commit, the libxl__cpuid_legacy() failure would
have been ignored, so kexec would continue.

Fixes: 34990446ca91 "libxl: don't ignore the return value from xc_cpuid_apply_policy"
Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
---
Probably a backport candidate since this has been broken for a while.
---
 tools/libs/light/libxl_create.c   | 4 ++--
 tools/libs/light/libxl_dom.c      | 5 +++--
 tools/libs/light/libxl_internal.h | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

Comments

Jürgen Groß Jan. 23, 2023, 8:31 a.m. UTC | #1
On 21.01.23 22:39, Jason Andryuk wrote:
> When a domain performs a kexec (soft reset), libxl__build_pre() is
> called with the existing domid.  Calling libxl__cpuid_legacy() on the
> existing domain fails since the cpuid policy has already been set, and
> the guest isn't rebuilt and doesn't kexec.
> 
> xc: error: Failed to set d1's policy (err leaf 0xffffffff, subleaf 0xffffffff, msr 0xffffffff) (17 = File exists): Internal error
> libxl: error: libxl_cpuid.c:494:libxl__cpuid_legacy: Domain 1:Failed to apply CPUID policy: File exists
> libxl: error: libxl_create.c:1641:domcreate_rebuild_done: Domain 1:cannot (re-)build domain: -3
> libxl: error: libxl_xshelp.c:201:libxl__xs_read_mandatory: xenstore read failed: `/libxl/1/type': No such file or directory
> libxl: warning: libxl_dom.c:49:libxl__domain_type: unable to get domain type for domid=1, assuming HVM
> 
> During a soft_reset, skip calling libxl__cpuid_legacy() to avoid the
> issue.  Before the fixes commit, the libxl__cpuid_legacy() failure would
> have been ignored, so kexec would continue.
> 
> Fixes: 34990446ca91 "libxl: don't ignore the return value from xc_cpuid_apply_policy"
> Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
> ---
> Probably a backport candidate since this has been broken for a while.
> ---
>   tools/libs/light/libxl_create.c   | 4 ++--
>   tools/libs/light/libxl_dom.c      | 5 +++--
>   tools/libs/light/libxl_internal.h | 2 +-
>   3 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
> index 5cddc3df79..587a515dff 100644
> --- a/tools/libs/light/libxl_create.c
> +++ b/tools/libs/light/libxl_create.c
> @@ -510,7 +510,7 @@ int libxl__domain_build(libxl__gc *gc,
>       struct timeval start_time;
>       int i, ret;
>   
> -    ret = libxl__build_pre(gc, domid, d_config, state);
> +    ret = libxl__build_pre(gc, domid, d_config, state, false);

Instead of adding a parameter to libxl__build_pre() I'd rather add another
bool "soft_reset" to libxl__domain_build_state.

This would be more similar to the libxl__domain_build_state->restore use
case.


Juergen
diff mbox series

Patch

diff --git a/tools/libs/light/libxl_create.c b/tools/libs/light/libxl_create.c
index 5cddc3df79..587a515dff 100644
--- a/tools/libs/light/libxl_create.c
+++ b/tools/libs/light/libxl_create.c
@@ -510,7 +510,7 @@  int libxl__domain_build(libxl__gc *gc,
     struct timeval start_time;
     int i, ret;
 
-    ret = libxl__build_pre(gc, domid, d_config, state);
+    ret = libxl__build_pre(gc, domid, d_config, state, false);
     if (ret)
         goto out;
 
@@ -1440,7 +1440,7 @@  static void domcreate_bootloader_done(libxl__egc *egc,
         goto out;
     }
 
-    rc = libxl__build_pre(gc, domid, d_config, state);
+    rc = libxl__build_pre(gc, domid, d_config, state, dcs->soft_reset);
     if (rc)
         goto out;
 
diff --git a/tools/libs/light/libxl_dom.c b/tools/libs/light/libxl_dom.c
index b454f988fb..7cebf5047f 100644
--- a/tools/libs/light/libxl_dom.c
+++ b/tools/libs/light/libxl_dom.c
@@ -241,7 +241,8 @@  static int numa_place_domain(libxl__gc *gc, uint32_t domid,
 }
 
 int libxl__build_pre(libxl__gc *gc, uint32_t domid,
-              libxl_domain_config *d_config, libxl__domain_build_state *state)
+              libxl_domain_config *d_config, libxl__domain_build_state *state,
+              bool soft_reset)
 {
     libxl_domain_build_info *const info = &d_config->b_info;
     libxl_ctx *ctx = libxl__gc_owner(gc);
@@ -382,7 +383,7 @@  int libxl__build_pre(libxl__gc *gc, uint32_t domid,
     /* Construct a CPUID policy, but only for brand new domains.  Domains
      * being migrated-in/restored have CPUID handled during the
      * static_data_done() callback. */
-    if (!state->restore)
+    if (!state->restore && !soft_reset)
         rc = libxl__cpuid_legacy(ctx, domid, false, info);
 
 out:
diff --git a/tools/libs/light/libxl_internal.h b/tools/libs/light/libxl_internal.h
index 0dc8b8f210..f0af44b523 100644
--- a/tools/libs/light/libxl_internal.h
+++ b/tools/libs/light/libxl_internal.h
@@ -1418,7 +1418,7 @@  _hidden void libxl__domain_build_state_dispose(libxl__domain_build_state *s);
 
 _hidden int libxl__build_pre(libxl__gc *gc, uint32_t domid,
               libxl_domain_config * const d_config,
-              libxl__domain_build_state *state);
+              libxl__domain_build_state *state, bool soft_reset);
 _hidden int libxl__build_post(libxl__gc *gc, uint32_t domid,
                libxl_domain_build_info *info, libxl__domain_build_state *state,
                char **vms_ents, char **local_ents);