diff mbox series

[1/2] python/machine: Add debug logging to key state changes

Message ID 20221024195355.860504-2-jsnow@redhat.com (mailing list archive)
State New, archived
Headers show
Series python: misc machine.py improvements | expand

Commit Message

John Snow Oct. 24, 2022, 7:53 p.m. UTC
When key decisions are made about the lifetime of the VM process being
managed, there's no log entry. Juxtaposed with the very verbose runstate
change logging of the QMP module, machine seems a bit too introverted
now.

Season the machine.py module with logging statements to taste to help
make a tastier soup.

Signed-off-by: John Snow <jsnow@redhat.com>
---
 python/qemu/machine/machine.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

Comments

Daniel P. Berrangé Oct. 25, 2022, 8:24 a.m. UTC | #1
On Mon, Oct 24, 2022 at 03:53:54PM -0400, John Snow wrote:
> When key decisions are made about the lifetime of the VM process being
> managed, there's no log entry. Juxtaposed with the very verbose runstate
> change logging of the QMP module, machine seems a bit too introverted
> now.
> 
> Season the machine.py module with logging statements to taste to help
> make a tastier soup.
> 
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
>  python/qemu/machine/machine.py | 18 ++++++++++++++++++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
> index 37191f433b2..c467f951d5d 100644
> --- a/python/qemu/machine/machine.py
> +++ b/python/qemu/machine/machine.py
> @@ -373,6 +373,7 @@ def _post_shutdown(self) -> None:
>          Called to cleanup the VM instance after the process has exited.
>          May also be called after a failed launch.
>          """
> +        LOG.debug("Cleaning up after VM process")
>          try:
>              self._close_qmp_connection()
>          except Exception as err:  # pylint: disable=broad-except
> @@ -497,6 +498,7 @@ def _early_cleanup(self) -> None:
>          # for QEMU to exit, while QEMU is waiting for the socket to
>          # become writable.
>          if self._console_socket is not None:
> +            LOG.debug("Closing console socket")
>              self._console_socket.close()
>              self._console_socket = None
>  
> @@ -507,6 +509,7 @@ def _hard_shutdown(self) -> None:
>          :raise subprocess.Timeout: When timeout is exceeds 60 seconds
>              waiting for the QEMU process to terminate.
>          """
> +        LOG.debug("Performing hard shutdown")
>          self._early_cleanup()
>          self._subp.kill()
>          self._subp.wait(timeout=60)
> @@ -523,6 +526,13 @@ def _soft_shutdown(self, timeout: Optional[int]) -> None:
>          :raise subprocess.TimeoutExpired: When timeout is exceeded waiting for
>              the QEMU process to terminate.
>          """
> +        LOG.debug("Attempting graceful termination")
> +        if self._quit_issued:
> +            LOG.debug(
> +                "Anticipating QEMU termination due to prior 'quit' command, "
> +                "or explicit call to wait()"
> +            )
> +
>          self._early_cleanup()
>  
>          if self._qmp_connection:
> @@ -553,6 +563,10 @@ def _do_shutdown(self, timeout: Optional[int]) -> None:
>          try:
>              self._soft_shutdown(timeout)
>          except Exception as exc:
> +            if isinstance(exc, subprocess.TimeoutExpired):
> +                LOG.debug("Timed out waiting for QEMU process to exit")
> +            LOG.debug("Graceful shutdown failed, "
> +                      "falling back to hard shutdown")

If you add 'exc_info=True' when logging from inside an 'except'
clause, it captures the stack trace which is often critical for
debugging unexpected exceptions, especially when you're catchintg
the top level 'Exception' class instead of a very specialized
class.

>              self._hard_shutdown()
>              raise AbnormalShutdown("Could not perform graceful shutdown") \
>                  from exc
> @@ -575,6 +589,10 @@ def shutdown(self,
>          if not self._launched:
>              return
>  
> +        LOG.debug("Shutting down VM appliance; timeout=%s", timeout)
> +        if hard:
> +            LOG.debug("Caller requests immediate termination of QEMU process.")
> +
>          try:
>              if hard:
>                  self._user_killed = True
> -- 
> 2.37.3
> 
> 

With regards,
Daniel
John Snow Oct. 25, 2022, 9:48 p.m. UTC | #2
On Tue, Oct 25, 2022 at 4:24 AM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Mon, Oct 24, 2022 at 03:53:54PM -0400, John Snow wrote:
> > When key decisions are made about the lifetime of the VM process being
> > managed, there's no log entry. Juxtaposed with the very verbose runstate
> > change logging of the QMP module, machine seems a bit too introverted
> > now.
> >
> > Season the machine.py module with logging statements to taste to help
> > make a tastier soup.
> >
> > Signed-off-by: John Snow <jsnow@redhat.com>
> > ---
> >  python/qemu/machine/machine.py | 18 ++++++++++++++++++
> >  1 file changed, 18 insertions(+)
> >
> > diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
> > index 37191f433b2..c467f951d5d 100644
> > --- a/python/qemu/machine/machine.py
> > +++ b/python/qemu/machine/machine.py
> > @@ -373,6 +373,7 @@ def _post_shutdown(self) -> None:
> >          Called to cleanup the VM instance after the process has exited.
> >          May also be called after a failed launch.
> >          """
> > +        LOG.debug("Cleaning up after VM process")
> >          try:
> >              self._close_qmp_connection()
> >          except Exception as err:  # pylint: disable=broad-except
> > @@ -497,6 +498,7 @@ def _early_cleanup(self) -> None:
> >          # for QEMU to exit, while QEMU is waiting for the socket to
> >          # become writable.
> >          if self._console_socket is not None:
> > +            LOG.debug("Closing console socket")
> >              self._console_socket.close()
> >              self._console_socket = None
> >
> > @@ -507,6 +509,7 @@ def _hard_shutdown(self) -> None:
> >          :raise subprocess.Timeout: When timeout is exceeds 60 seconds
> >              waiting for the QEMU process to terminate.
> >          """
> > +        LOG.debug("Performing hard shutdown")
> >          self._early_cleanup()
> >          self._subp.kill()
> >          self._subp.wait(timeout=60)
> > @@ -523,6 +526,13 @@ def _soft_shutdown(self, timeout: Optional[int]) -> None:
> >          :raise subprocess.TimeoutExpired: When timeout is exceeded waiting for
> >              the QEMU process to terminate.
> >          """
> > +        LOG.debug("Attempting graceful termination")
> > +        if self._quit_issued:
> > +            LOG.debug(
> > +                "Anticipating QEMU termination due to prior 'quit' command, "
> > +                "or explicit call to wait()"
> > +            )
> > +
> >          self._early_cleanup()
> >
> >          if self._qmp_connection:
> > @@ -553,6 +563,10 @@ def _do_shutdown(self, timeout: Optional[int]) -> None:
> >          try:
> >              self._soft_shutdown(timeout)
> >          except Exception as exc:
> > +            if isinstance(exc, subprocess.TimeoutExpired):
> > +                LOG.debug("Timed out waiting for QEMU process to exit")
> > +            LOG.debug("Graceful shutdown failed, "
> > +                      "falling back to hard shutdown")
>
> If you add 'exc_info=True' when logging from inside an 'except'
> clause, it captures the stack trace which is often critical for
> debugging unexpected exceptions, especially when you're catchintg
> the top level 'Exception' class instead of a very specialized
> class.
>

Sure. If the exception goes unhandled, ultimately, we'll see that
stack trace twice. On the other hand, if someone handles this trace
and you still want to see the exception in the debug log somewhere,
it's probably a fair trade-off.

I'll touch it up while we're here.

--js
diff mbox series

Patch

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index 37191f433b2..c467f951d5d 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -373,6 +373,7 @@  def _post_shutdown(self) -> None:
         Called to cleanup the VM instance after the process has exited.
         May also be called after a failed launch.
         """
+        LOG.debug("Cleaning up after VM process")
         try:
             self._close_qmp_connection()
         except Exception as err:  # pylint: disable=broad-except
@@ -497,6 +498,7 @@  def _early_cleanup(self) -> None:
         # for QEMU to exit, while QEMU is waiting for the socket to
         # become writable.
         if self._console_socket is not None:
+            LOG.debug("Closing console socket")
             self._console_socket.close()
             self._console_socket = None
 
@@ -507,6 +509,7 @@  def _hard_shutdown(self) -> None:
         :raise subprocess.Timeout: When timeout is exceeds 60 seconds
             waiting for the QEMU process to terminate.
         """
+        LOG.debug("Performing hard shutdown")
         self._early_cleanup()
         self._subp.kill()
         self._subp.wait(timeout=60)
@@ -523,6 +526,13 @@  def _soft_shutdown(self, timeout: Optional[int]) -> None:
         :raise subprocess.TimeoutExpired: When timeout is exceeded waiting for
             the QEMU process to terminate.
         """
+        LOG.debug("Attempting graceful termination")
+        if self._quit_issued:
+            LOG.debug(
+                "Anticipating QEMU termination due to prior 'quit' command, "
+                "or explicit call to wait()"
+            )
+
         self._early_cleanup()
 
         if self._qmp_connection:
@@ -553,6 +563,10 @@  def _do_shutdown(self, timeout: Optional[int]) -> None:
         try:
             self._soft_shutdown(timeout)
         except Exception as exc:
+            if isinstance(exc, subprocess.TimeoutExpired):
+                LOG.debug("Timed out waiting for QEMU process to exit")
+            LOG.debug("Graceful shutdown failed, "
+                      "falling back to hard shutdown")
             self._hard_shutdown()
             raise AbnormalShutdown("Could not perform graceful shutdown") \
                 from exc
@@ -575,6 +589,10 @@  def shutdown(self,
         if not self._launched:
             return
 
+        LOG.debug("Shutting down VM appliance; timeout=%s", timeout)
+        if hard:
+            LOG.debug("Caller requests immediate termination of QEMU process.")
+
         try:
             if hard:
                 self._user_killed = True