diff mbox series

[v2,2/2] spapr: use DEVICE_UNPLUG_ERROR to report unplug errors

Message ID 20210604200353.1206897-3-danielhb413@gmail.com (mailing list archive)
State New, archived
Headers show
Series DEVICE_UNPLUG_ERROR QAPI event | expand

Commit Message

Daniel Henrique Barboza June 4, 2021, 8:03 p.m. UTC
Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
error path, signalling that the hotunplug process wasn't successful.
This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
to signal this error to the management layer.

We also have another error path in spapr_memory_unplug_rollback() for
configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
in the hotunplug error path, but it will reconfigure them.  Let's send
the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
case of older kernels.

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 hw/ppc/spapr.c     |  2 +-
 hw/ppc/spapr_drc.c | 15 +++++++++------
 2 files changed, 10 insertions(+), 7 deletions(-)

Comments

David Gibson June 7, 2021, 2:24 a.m. UTC | #1
On Fri, Jun 04, 2021 at 05:03:53PM -0300, Daniel Henrique Barboza wrote:
> Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
> error path, signalling that the hotunplug process wasn't successful.
> This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
> to signal this error to the management layer.
> 
> We also have another error path in spapr_memory_unplug_rollback() for
> configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
> in the hotunplug error path, but it will reconfigure them.  Let's send
> the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
> case of older kernels.
> 
> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

> ---
>  hw/ppc/spapr.c     |  2 +-
>  hw/ppc/spapr_drc.c | 15 +++++++++------
>  2 files changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index c23bcc4490..29aa2f467d 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -3639,7 +3639,7 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
>       */
>      qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
>                                   "for device %s", dev->id);
> -    qapi_event_send_mem_unplug_error(dev->id, qapi_error);
> +    qapi_event_send_device_unplug_error(dev->id, qapi_error);
>  }
>  
>  /* Callback to be called during DRC release. */
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index a2f2634601..0e1a8733bc 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -17,6 +17,8 @@
>  #include "hw/ppc/spapr_drc.h"
>  #include "qom/object.h"
>  #include "migration/vmstate.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-events-machine.h"
>  #include "qapi/visitor.h"
>  #include "qemu/error-report.h"
>  #include "hw/ppc/spapr.h" /* for RTAS return codes */
> @@ -160,6 +162,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>           * means that the kernel is refusing the removal.
>           */
>          if (drc->unplug_requested && drc->dev) {
> +            const char qapi_error_fmt[] = "Device hotunplug rejected by the "
> +                                          "guest for device %s";
> +            g_autofree char *qapi_error = NULL;
> +
>              if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
>                  spapr = SPAPR_MACHINE(qdev_get_machine());
>  
> @@ -167,13 +173,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>              }
>  
>              drc->unplug_requested = false;
> -            error_report("Device hotunplug rejected by the guest "
> -                         "for device %s", drc->dev->id);
> +            error_report(qapi_error_fmt, drc->dev->id);
>  
> -            /*
> -             * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
> -             * it is implemented.
> -             */
> +            qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
> +            qapi_event_send_device_unplug_error(drc->dev->id, qapi_error);
>          }
>  
>          return RTAS_OUT_SUCCESS; /* Nothing to do */
Markus Armbruster June 11, 2021, 12:18 p.m. UTC | #2
Daniel Henrique Barboza <danielhb413@gmail.com> writes:

> Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
> error path, signalling that the hotunplug process wasn't successful.
> This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
> to signal this error to the management layer.
>
> We also have another error path in spapr_memory_unplug_rollback() for
> configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
> in the hotunplug error path, but it will reconfigure them.  Let's send
> the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
> case of older kernels.
>
> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> ---
>  hw/ppc/spapr.c     |  2 +-
>  hw/ppc/spapr_drc.c | 15 +++++++++------
>  2 files changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index c23bcc4490..29aa2f467d 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -3639,7 +3639,7 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
>       */
>      qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
>                                   "for device %s", dev->id);
> -    qapi_event_send_mem_unplug_error(dev->id, qapi_error);
> +    qapi_event_send_device_unplug_error(dev->id, qapi_error);

Incompatible change: we now emit DEVICE_UNPLUG_ERROR instead of
MEM_UNPLUG_ERROR.  Intentional?

If yes, we need a release note.

To avoid the incompatible, we can emit both, and deprecate
MEM_UNPLUG_ERROR.

What about the MEM_UNPLUG_ERROR in acpi_memory_hotplug_write()?

>  }
>  
>  /* Callback to be called during DRC release. */
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index a2f2634601..0e1a8733bc 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -17,6 +17,8 @@
>  #include "hw/ppc/spapr_drc.h"
>  #include "qom/object.h"
>  #include "migration/vmstate.h"
> +#include "qapi/error.h"
> +#include "qapi/qapi-events-machine.h"
>  #include "qapi/visitor.h"
>  #include "qemu/error-report.h"
>  #include "hw/ppc/spapr.h" /* for RTAS return codes */
> @@ -160,6 +162,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>           * means that the kernel is refusing the removal.
>           */
>          if (drc->unplug_requested && drc->dev) {
> +            const char qapi_error_fmt[] = "Device hotunplug rejected by the "
> +                                          "guest for device %s";
> +            g_autofree char *qapi_error = NULL;
> +
>              if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
>                  spapr = SPAPR_MACHINE(qdev_get_machine());
>  
> @@ -167,13 +173,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>              }
>  
>              drc->unplug_requested = false;
> -            error_report("Device hotunplug rejected by the guest "
> -                         "for device %s", drc->dev->id);
> +            error_report(qapi_error_fmt, drc->dev->id);
>  
> -            /*
> -             * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
> -             * it is implemented.
> -             */
> +            qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
> +            qapi_event_send_device_unplug_error(drc->dev->id, qapi_error);
>          }
>  
>          return RTAS_OUT_SUCCESS; /* Nothing to do */

Reporting both to stderr and QMP is odd.  Can you describe a use case
where the report to stderr is useful?
Daniel Henrique Barboza June 16, 2021, 4:58 p.m. UTC | #3
On 6/11/21 9:18 AM, Markus Armbruster wrote:
> Daniel Henrique Barboza <danielhb413@gmail.com> writes:
> 
>> Linux Kernel 5.12 is now unisolating CPU DRCs in the device_removal
>> error path, signalling that the hotunplug process wasn't successful.
>> This allow us to send a DEVICE_UNPLUG_ERROR in drc_unisolate_logical()
>> to signal this error to the management layer.
>>
>> We also have another error path in spapr_memory_unplug_rollback() for
>> configured LMB DRCs. Kernels older than 5.13 will not unisolate the LMBs
>> in the hotunplug error path, but it will reconfigure them.  Let's send
>> the DEVICE_UNPLUG_ERROR event in that code path as well to cover the
>> case of older kernels.
>>
>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
>> ---
>>   hw/ppc/spapr.c     |  2 +-
>>   hw/ppc/spapr_drc.c | 15 +++++++++------
>>   2 files changed, 10 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index c23bcc4490..29aa2f467d 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -3639,7 +3639,7 @@ void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
>>        */
>>       qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
>>                                    "for device %s", dev->id);
>> -    qapi_event_send_mem_unplug_error(dev->id, qapi_error);
>> +    qapi_event_send_device_unplug_error(dev->id, qapi_error);
> 
> Incompatible change: we now emit DEVICE_UNPLUG_ERROR instead of
> MEM_UNPLUG_ERROR.  Intentional?
> 
> If yes, we need a release note.
> 
> To avoid the incompatible, we can emit both, and deprecate
> MEM_UNPLUG_ERROR.
> 
> What about the MEM_UNPLUG_ERROR in acpi_memory_hotplug_write()?

I'll emit DEVICE_UNPLUG_ERROR together with all MEM_UNPLUG_ERROR instances.
Then we can deprecate MEM_UNPLUG_ERROR.

By the way, how do I mark MEM_UNPLUG_ERROR as deprecated? I see examples
of command line options being documented as deprecated in
docs/system/deprecated.rst and some deprecated QOM/QDEV properties are
marked as deprecated directly in their .json files, but I didn't find
any case where a whole event is deprecated. Would something like this be
adequate?


$ git diff
diff --git a/qapi/machine.json b/qapi/machine.json
index 58a9c86b36..ce3d873c64 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1261,6 +1261,10 @@
  #
  # @msg: Informative message
  #
+#
+# @deprecated: Starting in 6.1 this event has been replaced by
+#              DEVICE_UNPLUG_ERROR.
+#
  # Since: 2.4
  #
  # Example:



Thanks,


Daniel


> 
>>   }
>>   
>>   /* Callback to be called during DRC release. */
>> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
>> index a2f2634601..0e1a8733bc 100644
>> --- a/hw/ppc/spapr_drc.c
>> +++ b/hw/ppc/spapr_drc.c
>> @@ -17,6 +17,8 @@
>>   #include "hw/ppc/spapr_drc.h"
>>   #include "qom/object.h"
>>   #include "migration/vmstate.h"
>> +#include "qapi/error.h"
>> +#include "qapi/qapi-events-machine.h"
>>   #include "qapi/visitor.h"
>>   #include "qemu/error-report.h"
>>   #include "hw/ppc/spapr.h" /* for RTAS return codes */
>> @@ -160,6 +162,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>>            * means that the kernel is refusing the removal.
>>            */
>>           if (drc->unplug_requested && drc->dev) {
>> +            const char qapi_error_fmt[] = "Device hotunplug rejected by the "
>> +                                          "guest for device %s";
>> +            g_autofree char *qapi_error = NULL;
>> +
>>               if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
>>                   spapr = SPAPR_MACHINE(qdev_get_machine());
>>   
>> @@ -167,13 +173,10 @@ static uint32_t drc_unisolate_logical(SpaprDrc *drc)
>>               }
>>   
>>               drc->unplug_requested = false;
>> -            error_report("Device hotunplug rejected by the guest "
>> -                         "for device %s", drc->dev->id);
>> +            error_report(qapi_error_fmt, drc->dev->id);
>>   
>> -            /*
>> -             * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
>> -             * it is implemented.
>> -             */
>> +            qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
>> +            qapi_event_send_device_unplug_error(drc->dev->id, qapi_error);
>>           }
>>   
>>           return RTAS_OUT_SUCCESS; /* Nothing to do */
> 
> Reporting both to stderr and QMP is odd.  Can you describe a use case
> where the report to stderr is useful?
>
Eric Blake June 16, 2021, 5:58 p.m. UTC | #4
On Wed, Jun 16, 2021 at 01:58:04PM -0300, Daniel Henrique Barboza wrote:
> > Incompatible change: we now emit DEVICE_UNPLUG_ERROR instead of
> > MEM_UNPLUG_ERROR.  Intentional?
> > 
> > If yes, we need a release note.
> > 
> > To avoid the incompatible, we can emit both, and deprecate
> > MEM_UNPLUG_ERROR.
> > 
> > What about the MEM_UNPLUG_ERROR in acpi_memory_hotplug_write()?
> 
> I'll emit DEVICE_UNPLUG_ERROR together with all MEM_UNPLUG_ERROR instances.
> Then we can deprecate MEM_UNPLUG_ERROR.
> 
> By the way, how do I mark MEM_UNPLUG_ERROR as deprecated? I see examples
> of command line options being documented as deprecated in
> docs/system/deprecated.rst and some deprecated QOM/QDEV properties are
> marked as deprecated directly in their .json files, but I didn't find
> any case where a whole event is deprecated. Would something like this be
> adequate?

Almost.  That documents the deprecation for readers, but you also need
to mark it for viewing by machine code...

> 
> 
> $ git diff
> diff --git a/qapi/machine.json b/qapi/machine.json
> index 58a9c86b36..ce3d873c64 100644
> --- a/qapi/machine.json
> +++ b/qapi/machine.json
> @@ -1261,6 +1261,10 @@
>  #
>  # @msg: Informative message
>  #
> +#
> +# @deprecated: Starting in 6.1 this event has been replaced by
> +#              DEVICE_UNPLUG_ERROR.
> +#
>  # Since: 2.4
>  #
>  # Example:

...do that by adding 'features':['deprecated'] to the QAPI event
definition.
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index c23bcc4490..29aa2f467d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -3639,7 +3639,7 @@  void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
      */
     qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
                                  "for device %s", dev->id);
-    qapi_event_send_mem_unplug_error(dev->id, qapi_error);
+    qapi_event_send_device_unplug_error(dev->id, qapi_error);
 }
 
 /* Callback to be called during DRC release. */
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index a2f2634601..0e1a8733bc 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -17,6 +17,8 @@ 
 #include "hw/ppc/spapr_drc.h"
 #include "qom/object.h"
 #include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-machine.h"
 #include "qapi/visitor.h"
 #include "qemu/error-report.h"
 #include "hw/ppc/spapr.h" /* for RTAS return codes */
@@ -160,6 +162,10 @@  static uint32_t drc_unisolate_logical(SpaprDrc *drc)
          * means that the kernel is refusing the removal.
          */
         if (drc->unplug_requested && drc->dev) {
+            const char qapi_error_fmt[] = "Device hotunplug rejected by the "
+                                          "guest for device %s";
+            g_autofree char *qapi_error = NULL;
+
             if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
                 spapr = SPAPR_MACHINE(qdev_get_machine());
 
@@ -167,13 +173,10 @@  static uint32_t drc_unisolate_logical(SpaprDrc *drc)
             }
 
             drc->unplug_requested = false;
-            error_report("Device hotunplug rejected by the guest "
-                         "for device %s", drc->dev->id);
+            error_report(qapi_error_fmt, drc->dev->id);
 
-            /*
-             * TODO: send a QAPI DEVICE_UNPLUG_ERROR event when
-             * it is implemented.
-             */
+            qapi_error = g_strdup_printf(qapi_error_fmt, drc->dev->id);
+            qapi_event_send_device_unplug_error(drc->dev->id, qapi_error);
         }
 
         return RTAS_OUT_SUCCESS; /* Nothing to do */