Helps debug fatal errors during live update

Previously this would've just gone to /dev/null, because:
* daemonize reopens stderr as /dev/null
* systemd redirects stderr to /dev/null too

Previously the only way to debug this was to manually run oxenstored with
--no-fork, but when you have a fatal error and oxenstored just
disappears you'd want to know why.
There has been at least one observed instance of a bug where oxenstored
just disappeared inexplicably (it was believed due to an OOM exception).

Signed-off-by: Edwin Török <edvin.to...@citrix.com>
---
Reason for inclusion in 4.17:
- avoids losing crucial information during a fatal error (e.g. during
  live update)

Changes since v2:
- new in v3
---
 tools/ocaml/xenstored/logging.ml   | 33 ++++++++++++++++++++++++++++++
 tools/ocaml/xenstored/xenstored.ml |  3 ++-
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/tools/ocaml/xenstored/logging.ml b/tools/ocaml/xenstored/logging.ml
index 021ebc465b..cced038c48 100644
--- a/tools/ocaml/xenstored/logging.ml
+++ b/tools/ocaml/xenstored/logging.ml
@@ -342,3 +342,36 @@ let xb_answer ~tid ~con ~ty data =
 let watch_not_fired ~con perms path =
   let data = Printf.sprintf "EPERM perms=[%s] path=%s" perms path in
   access_logging ~tid:0 ~con ~data Watch_not_fired ~level:Info
+
+let print_flush msg =
+  prerr_endline msg;
+  flush stderr
+
+let msg_of exn bt =
+  Printf.sprintf "Fatal exception: %s\n%s\n" (Printexc.to_string exn)
+    (Printexc.raw_backtrace_to_string bt)
+
+let fallback_exception_handler exn bt =
+  (* stderr goes to /dev/null, so use the logger where possible,
+     but always print to stderr too, in case everything else fails,
+     e.g. this can be used to debug with --no-fork
+
+     this function should try not to raise exceptions, but if it does
+     the ocaml runtime should still print the exception, both the original,
+     and the one from this function, but to stderr this time
+  *)
+  let msg = msg_of exn bt in
+  print_flush msg;
+  (* See Printexc.set_uncaught_exception_handler, need to flush,
+     so has to call stop and flush *)
+  match !xenstored_logger with
+  | Some l -> error "xenstored-fallback" "%s" msg; l.stop ()
+  | None ->
+    (* Too early, no logger set yet.
+       We normally try to use the configured logger so we don't flood syslog
+       during development for example, or if the user has a file set
+    *)
+    try Syslog.log Syslog.Daemon Syslog.Err msg
+    with e ->
+      let bt = Printexc.get_raw_backtrace () in
+      print_flush @@ msg_of e bt
diff --git a/tools/ocaml/xenstored/xenstored.ml 
b/tools/ocaml/xenstored/xenstored.ml
index 78177b116f..6828764f92 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -357,7 +357,8 @@ let tweak_gc () =
   Gc.set { (Gc.get ()) with Gc.max_overhead = !Define.gc_max_overhead }
 
 
-let _ =
+let () =
+  Printexc.set_uncaught_exception_handler Logging.fallback_exception_handler;
   let cf = do_argv in
   let pidfile =
     if Sys.file_exists (config_filename cf) then
-- 
2.34.1


Reply via email to