diff mbox series

[for-4.17,v3,13/15] tools/ocaml/xenstored: set uncaught exception handler

Message ID a370618057664382fe9e6e503e0dc0be8818e5ce.1667920496.git.edvin.torok@citrix.com (mailing list archive)
State New, archived
Headers show
Series OCaml fixes for Xen 4.17 | expand

Commit Message

Edwin Török Nov. 8, 2022, 3:34 p.m. UTC
Helps debug fatal errors during live update

Previously this would've just gone to /dev/null, because:
* daemonize reopens stderr as /dev/null
* systemd redirects stderr to /dev/null too

Previously the only way to debug this was to manually run oxenstored with
--no-fork, but when you have a fatal error and oxenstored just
disappears you'd want to know why.
There has been at least one observed instance of a bug where oxenstored
just disappeared inexplicably (it was believed due to an OOM exception).

Signed-off-by: Edwin Török <edvin.torok@citrix.com>
---
Reason for inclusion in 4.17:
- avoids losing crucial information during a fatal error (e.g. during
  live update)

Changes since v2:
- new in v3
---
 tools/ocaml/xenstored/logging.ml   | 33 ++++++++++++++++++++++++++++++
 tools/ocaml/xenstored/xenstored.ml |  3 ++-
 2 files changed, 35 insertions(+), 1 deletion(-)

Comments

Christian Lindig Nov. 9, 2022, 2:07 p.m. UTC | #1
> On 8 Nov 2022, at 15:34, Edwin Török <edvin.torok@citrix.com> wrote:
> 
> Helps debug fatal errors during live update
> 
> Previously this would've just gone to /dev/null, because:
> * daemonize reopens stderr as /dev/null
> * systemd redirects stderr to /dev/null too
> 
> Previously the only way to debug this was to manually run oxenstored with
> --no-fork, but when you have a fatal error and oxenstored just
> disappears you'd want to know why.
> There has been at least one observed instance of a bug where oxenstored
> just disappeared inexplicably (it was believed due to an OOM exception).
> 
> Signed-off-by: Edwin Török <edvin.torok@citrix.com>
> ---
> Reason for inclusion in 4.17:
> - avoids losing crucial information during a fatal error (e.g. during
>  live update)

Acked-by: Christian Lindig <christian.lindig@citrix.com>

> +let print_flush msg =
> +  prerr_endline msg;
> +  flush stderr

The flush is not required because prerr_endline does it implicitly.
diff mbox series

Patch

diff --git a/tools/ocaml/xenstored/logging.ml b/tools/ocaml/xenstored/logging.ml
index 021ebc465b..cced038c48 100644
--- a/tools/ocaml/xenstored/logging.ml
+++ b/tools/ocaml/xenstored/logging.ml
@@ -342,3 +342,36 @@  let xb_answer ~tid ~con ~ty data =
 let watch_not_fired ~con perms path =
   let data = Printf.sprintf "EPERM perms=[%s] path=%s" perms path in
   access_logging ~tid:0 ~con ~data Watch_not_fired ~level:Info
+
+let print_flush msg =
+  prerr_endline msg;
+  flush stderr
+
+let msg_of exn bt =
+  Printf.sprintf "Fatal exception: %s\n%s\n" (Printexc.to_string exn)
+    (Printexc.raw_backtrace_to_string bt)
+
+let fallback_exception_handler exn bt =
+  (* stderr goes to /dev/null, so use the logger where possible,
+     but always print to stderr too, in case everything else fails,
+     e.g. this can be used to debug with --no-fork
+
+     this function should try not to raise exceptions, but if it does
+     the ocaml runtime should still print the exception, both the original,
+     and the one from this function, but to stderr this time
+  *)
+  let msg = msg_of exn bt in
+  print_flush msg;
+  (* See Printexc.set_uncaught_exception_handler, need to flush,
+     so has to call stop and flush *)
+  match !xenstored_logger with
+  | Some l -> error "xenstored-fallback" "%s" msg; l.stop ()
+  | None ->
+    (* Too early, no logger set yet.
+       We normally try to use the configured logger so we don't flood syslog
+       during development for example, or if the user has a file set
+    *)
+    try Syslog.log Syslog.Daemon Syslog.Err msg
+    with e ->
+      let bt = Printexc.get_raw_backtrace () in
+      print_flush @@ msg_of e bt
diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml
index 78177b116f..6828764f92 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -357,7 +357,8 @@  let tweak_gc () =
   Gc.set { (Gc.get ()) with Gc.max_overhead = !Define.gc_max_overhead }
 
 
-let _ =
+let () =
+  Printexc.set_uncaught_exception_handler Logging.fallback_exception_handler;
   let cf = do_argv in
   let pidfile =
     if Sys.file_exists (config_filename cf) then