diff mbox series

[v4,2/3] spapr: use DEVICE_UNPLUG_ERROR to report unplug errors

Message ID 20210707003314.37110-3-danielhb413@gmail.com (mailing list archive)
State New, archived
Headers show
Series DEVICE_UNPLUG_ERROR QAPI event | expand

Commit Message

Daniel Henrique Barboza July 7, 2021, 12:33 a.m. UTC
Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
error path, signalling that the hotunplug process wasn't successful.
This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
to signal this error to the management layer.

We also have another error path in spapr_memory_unplug_rollback() for
configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
in the hotunplug error path, but it will reconfigure them. Let's send
the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
case of older kernels.

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 hw/ppc/spapr.c     |  8 ++++++++
 hw/ppc/spapr_drc.c | 15 +++++++++------
 2 files changed, 17 insertions(+), 6 deletions(-)

Comments

Greg Kurz July 7, 2021, 9:28 a.m. UTC | #1
On Tue,  6 Jul 2021 21:33:13 -0300
Daniel Henrique Barboza <danielhb413@gmail.com> wrote:

> Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
> error path, signalling that the hotunplug process wasn't successful.
> This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
> to signal this error to the management layer.
> 
> We also have another error path in spapr_memory_unplug_rollback() for
> configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
> in the hotunplug error path, but it will reconfigure them. Let's send
> the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
> case of older kernels.
> 
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> ---

Reviewed-by: Greg Kurz <groug@kaod.org>

>  hw/ppc/spapr.c     |  8 ++++++++
>  hw/ppc/spapr_drc.c | 15 +++++++++------
>  2 files changed, 17 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 4dd90b75cc..fc071a1767 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -29,6 +29,7 @@
>  #include "qemu/datadir.h"
>  #include "qapi/error.h"
>  #include "qapi/qapi-events-machine.h"
> +#include "qapi/qapi-events-qdev.h"
>  #include "qapi/visitor.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/hostmem.h"
> @@ -3641,7 +3642,14 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
>       */
>      qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
>                                   "for device %s", dev->id);
> +
> +    /*
> +     * Send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_ERROR
> +     * while the deprecation of MEM_UNPLUG_ERROR is
> +     * pending.
> +     */
>      qapi_event_send_mem_unplug_error(dev->id, qapi_error);
> +    qapi_event_send_device_unplug_error(dev->id, qapi_error);
>  }
>  
>  /* Callback to be called during DRC release. */
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index a2f2634601..45a7b1aa16 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -17,6 +17,8 @@
>  #include "hw/ppc/spapr_drc.h"
>  #include "qom/object.h"
>  #include "migration/vmstate.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-events-qdev.h"
>  #include "qapi/visitor.h"
>  #include "qemu/error-report.h"
>  #include "hw/ppc/spapr.h" /* for RTAS return codes */
> @@ -160,6 +162,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>           * means that the kernel is refusing the removal.
>           */
>          if (drc->unplug_requested && drc->dev) {
> +            const char qapi_error_fmt[] = "Device hotunplug rejected by the "
> +                                          "guest for device %s";
> +            g_autofree char *qapi_error = NULL;
> +
>              if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
>                  spapr = SPAPR_MACHINE(qdev_get_machine());
>  
> @@ -167,13 +173,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>              }
>  
>              drc->unplug_requested = false;
> -            error_report("Device hotunplug rejected by the guest "
> -                         "for device %s", drc->dev->id);
> +            error_report(qapi_error_fmt, drc->dev->id);
>  
> -            /*
> -             * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
> -             * it is implemented.
> -             */
> +            qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
> +            qapi_event_send_device_unplug_error(drc->dev->id, qapi_error);
>          }
>  
>          return RTAS_OUT_SUCCESS; /* Nothing to do */
Markus Armbruster July 8, 2021, 1:08 p.m. UTC | #2
Daniel Henrique Barboza <danielhb413@gmail.com> writes:

> Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
> error path, signalling that the hotunplug process wasn't successful.
> This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
> to signal this error to the management layer.
>
> We also have another error path in spapr_memory_unplug_rollback() for
> configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
> in the hotunplug error path, but it will reconfigure them. Let's send
> the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
> case of older kernels.
>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> ---
>  hw/ppc/spapr.c     |  8 ++++++++
>  hw/ppc/spapr_drc.c | 15 +++++++++------
>  2 files changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 4dd90b75cc..fc071a1767 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -29,6 +29,7 @@
>  #include "qemu/datadir.h"
>  #include "qapi/error.h"
>  #include "qapi/qapi-events-machine.h"
> +#include "qapi/qapi-events-qdev.h"
>  #include "qapi/visitor.h"
>  #include "sysemu/sysemu.h"
>  #include "sysemu/hostmem.h"
> @@ -3641,7 +3642,14 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
>       */
>      qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
>                                   "for device %s", dev->id);
> +
> +    /*
> +     * Send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_ERROR
> +     * while the deprecation of MEM_UNPLUG_ERROR is
> +     * pending.
> +     */
>      qapi_event_send_mem_unplug_error(dev->id, qapi_error);
> +    qapi_event_send_device_unplug_error(dev->id, qapi_error);

Can dev->id be null here?

Such devices exist, but maybe not here.

If dev->id can be null, we pass null to
qapi_event_send_device_unplug_error(), which is not okay.  The output
visitor papers over by replacing with "".  Let's not rely on this
misfeature.

>  }
>  
>  /* Callback to be called during DRC release. */
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index a2f2634601..45a7b1aa16 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -17,6 +17,8 @@
>  #include "hw/ppc/spapr_drc.h"
>  #include "qom/object.h"
>  #include "migration/vmstate.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-events-qdev.h"
>  #include "qapi/visitor.h"
>  #include "qemu/error-report.h"
>  #include "hw/ppc/spapr.h" /* for RTAS return codes */
> @@ -160,6 +162,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>           * means that the kernel is refusing the removal.
>           */
>          if (drc->unplug_requested && drc->dev) {
> +            const char qapi_error_fmt[] = "Device hotunplug rejected by the "
> +                                          "guest for device %s";
> +            g_autofree char *qapi_error = NULL;
> +
>              if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
>                  spapr = SPAPR_MACHINE(qdev_get_machine());
>  
> @@ -167,13 +173,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>              }
>  
>              drc->unplug_requested = false;
> -            error_report("Device hotunplug rejected by the guest "
> -                         "for device %s", drc->dev->id);
> +            error_report(qapi_error_fmt, drc->dev->id);
>  
> -            /*
> -             * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
> -             * it is implemented.
> -             */
> +            qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
> +            qapi_event_send_device_unplug_error(drc->dev->id, qapi_error);
>          }
>  
>          return RTAS_OUT_SUCCESS; /* Nothing to do */

Likewise, only here we also pass it to g_strdup_printf() and possibly
vprintf() via error_report().  Null arguments to %s crash on some
systems.  The issue predates your patch.

I'm not sure reporting failed unplug with error_report() is a good idea,
but it's not your patch's idea.
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4dd90b75cc..fc071a1767 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -29,6 +29,7 @@ 
 #include "qemu/datadir.h"
 #include "qapi/error.h"
 #include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qapi/visitor.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/hostmem.h"
@@ -3641,7 +3642,14 @@  void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
      */
     qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
                                  "for device %s", dev->id);
+
+    /*
+     * Send both MEM_UNPLUG_ERROR and DEVICE_UNPLUG_ERROR
+     * while the deprecation of MEM_UNPLUG_ERROR is
+     * pending.
+     */
     qapi_event_send_mem_unplug_error(dev->id, qapi_error);
+    qapi_event_send_device_unplug_error(dev->id, qapi_error);
 }
 
 /* Callback to be called during DRC release. */
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index a2f2634601..45a7b1aa16 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -17,6 +17,8 @@ 
 #include "hw/ppc/spapr_drc.h"
 #include "qom/object.h"
 #include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-qdev.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
 #include "hw/ppc/spapr.h" /* for RTAS return codes */
@@ -160,6 +162,10 @@  static uint32_t drc_unisolate_logical(SpaprDrc *drc)
          * means that the kernel is refusing the removal.
          */
         if (drc->unplug_requested && drc->dev) {
+            const char qapi_error_fmt[] = "Device hotunplug rejected by the "
+                                          "guest for device %s";
+            g_autofree char *qapi_error = NULL;
+
             if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
                 spapr = SPAPR_MACHINE(qdev_get_machine());
 
@@ -167,13 +173,10 @@  static uint32_t drc_unisolate_logical(SpaprDrc *drc)
             }
 
             drc->unplug_requested = false;
-            error_report("Device hotunplug rejected by the guest "
-                         "for device %s", drc->dev->id);
+            error_report(qapi_error_fmt, drc->dev->id);
 
-            /*
-             * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
-             * it is implemented.
-             */
+            qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
+            qapi_event_send_device_unplug_error(drc->dev->id, qapi_error);
         }
 
         return RTAS_OUT_SUCCESS; /* Nothing to do */