diff mbox series

[3/3] spapr: Migrate SpaprDrc::unplug_requested

Message ID 158076938222.2118610.14456984179352959929.stgit@bahia.lan (mailing list archive)
State New, archived
Headers show
Series spapr: Fix device unplug vs CAS or migration | expand

Commit Message

Greg Kurz Feb. 3, 2020, 10:36 p.m. UTC
Hot unplugging a device is an asynchronous operation. If the guest is
migrated after the event was sent but before it could release the
device with RTAS, the destination QEMU doesn't know about the pending
unplug operation and doesn't actually remove the device when the guest
finally releases it. The device

Migrate SpaprDrc::unplug_requested to fix the inconsistency. This is
done with a subsection that is only sent if an unplug request is
pending. This allows to preserve migration with older guests in the
case of a pending hotplug request. This will cause migration to fail
if the destination can't handle the subsection, but this is better
than ending with an inconsistency.

Signed-off-by: Greg Kurz <groug@kaod.org>
---
 hw/ppc/spapr_drc.c |   27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

Comments

David Gibson Feb. 14, 2020, 2:29 a.m. UTC | #1
On Mon, Feb 03, 2020 at 11:36:22PM +0100, Greg Kurz wrote:
> Hot unplugging a device is an asynchronous operation. If the guest is
> migrated after the event was sent but before it could release the
> device with RTAS, the destination QEMU doesn't know about the pending
> unplug operation and doesn't actually remove the device when the guest
> finally releases it. The device
> 
> Migrate SpaprDrc::unplug_requested to fix the inconsistency. This is
> done with a subsection that is only sent if an unplug request is
> pending. This allows to preserve migration with older guests in the
> case of a pending hotplug request. This will cause migration to fail
> if the destination can't handle the subsection, but this is better
> than ending with an inconsistency.
> 
> Signed-off-by: Greg Kurz <groug@kaod.org>
> ---
>  hw/ppc/spapr_drc.c |   27 +++++++++++++++++++++++++--
>  1 file changed, 25 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index d512ac6e1e7f..6f5cab70fc6b 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -455,6 +455,22 @@ void spapr_drc_reset(SpaprDrc *drc)
>      }
>  }
>  
> +static bool spapr_drc_unplug_requested_needed(void *opaque)
> +{
> +    return spapr_drc_unplug_requested(opaque);
> +}
> +
> +static const VMStateDescription vmstate_spapr_drc_unplug_requested = {
> +    .name = "spapr_drc/unplug_requested",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = spapr_drc_unplug_requested_needed,
> +    .fields  = (VMStateField []) {
> +        VMSTATE_BOOL(unplug_requested, SpaprDrc),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static bool spapr_drc_needed(void *opaque)
>  {
>      SpaprDrc *drc = (SpaprDrc *)opaque;
> @@ -467,8 +483,11 @@ static bool spapr_drc_needed(void *opaque)
>      /*
>       * We need to migrate the state if it's not equal to the expected
>       * long-term state, which is the same as the coldplugged initial
> -     * state */
> -    return !spapr_drc_device_ready(drc);
> +     * state, or if an unplug request is pending.
> +     */
> +    return
> +        spapr_drc_unplug_requested_needed(drc) ||
> +        !spapr_drc_device_ready(drc);

Hrm.  You start the series by splitting spapr_drc_device_ready() from
spapr_drc_needed().  But at this point, I'm pretty sure you've now got
all the callers of spapr_drc_device_ready() doing equivalent logic
about them, so they might as well be one function again.  That seems
pretty roundabout.

I don't think the rationale for not using the drc_ready function from
the CAS path really makes sense.  It's not just an accident that those
use the same logic - in both cases what we're testing is "Is the DRC
in a state other than that of a default cold-plugged device?".

Changing the name might be sensible, but I still think we want a
common function for the two cases.

>  }
>  
>  static const VMStateDescription vmstate_spapr_drc = {
> @@ -479,6 +498,10 @@ static const VMStateDescription vmstate_spapr_drc = {
>      .fields  = (VMStateField []) {
>          VMSTATE_UINT32(state, SpaprDrc),
>          VMSTATE_END_OF_LIST()
> +    },
> +    .subsections = (const VMStateDescription * []) {
> +        &vmstate_spapr_drc_unplug_requested,
> +        NULL
>      }
>  };
>  
>
Greg Kurz Feb. 14, 2020, 11:48 a.m. UTC | #2
On Fri, 14 Feb 2020 13:29:00 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Mon, Feb 03, 2020 at 11:36:22PM +0100, Greg Kurz wrote:
> > Hot unplugging a device is an asynchronous operation. If the guest is
> > migrated after the event was sent but before it could release the
> > device with RTAS, the destination QEMU doesn't know about the pending
> > unplug operation and doesn't actually remove the device when the guest
> > finally releases it. The device
> > 
> > Migrate SpaprDrc::unplug_requested to fix the inconsistency. This is
> > done with a subsection that is only sent if an unplug request is
> > pending. This allows to preserve migration with older guests in the
> > case of a pending hotplug request. This will cause migration to fail
> > if the destination can't handle the subsection, but this is better
> > than ending with an inconsistency.
> > 
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > ---
> >  hw/ppc/spapr_drc.c |   27 +++++++++++++++++++++++++--
> >  1 file changed, 25 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> > index d512ac6e1e7f..6f5cab70fc6b 100644
> > --- a/hw/ppc/spapr_drc.c
> > +++ b/hw/ppc/spapr_drc.c
> > @@ -455,6 +455,22 @@ void spapr_drc_reset(SpaprDrc *drc)
> >      }
> >  }
> >  
> > +static bool spapr_drc_unplug_requested_needed(void *opaque)
> > +{
> > +    return spapr_drc_unplug_requested(opaque);
> > +}
> > +
> > +static const VMStateDescription vmstate_spapr_drc_unplug_requested = {
> > +    .name = "spapr_drc/unplug_requested",
> > +    .version_id = 1,
> > +    .minimum_version_id = 1,
> > +    .needed = spapr_drc_unplug_requested_needed,
> > +    .fields  = (VMStateField []) {
> > +        VMSTATE_BOOL(unplug_requested, SpaprDrc),
> > +        VMSTATE_END_OF_LIST()
> > +    }
> > +};
> > +
> >  static bool spapr_drc_needed(void *opaque)
> >  {
> >      SpaprDrc *drc = (SpaprDrc *)opaque;
> > @@ -467,8 +483,11 @@ static bool spapr_drc_needed(void *opaque)
> >      /*
> >       * We need to migrate the state if it's not equal to the expected
> >       * long-term state, which is the same as the coldplugged initial
> > -     * state */
> > -    return !spapr_drc_device_ready(drc);
> > +     * state, or if an unplug request is pending.
> > +     */
> > +    return
> > +        spapr_drc_unplug_requested_needed(drc) ||
> > +        !spapr_drc_device_ready(drc);
> 
> Hrm.  You start the series by splitting spapr_drc_device_ready() from
> spapr_drc_needed().  But at this point, I'm pretty sure you've now got
> all the callers of spapr_drc_device_ready() doing equivalent logic
> about them, so they might as well be one function again.  That seems
> pretty roundabout.
> 

Yeah... I did the split because an earlier draft of this series had
a separate path at some point for the plug and unplug cases... but
I agree these should be reunited.

> I don't think the rationale for not using the drc_ready function from
> the CAS path really makes sense.  It's not just an accident that those
> use the same logic - in both cases what we're testing is "Is the DRC
> in a state other than that of a default cold-plugged device?".
>

"Is the DRC in a state other than that of a default cold-plugged device
or is an unplug request pending ?" since the DRC of the device to be
unplugged only transitions away from the "ready state" when the guest
asks to isolate the device with the "set-indicator" RTAS call.

> Changing the name might be sensible, but I still think we want a
> common function for the two cases.
> 

I'll go for that. Maybe reverse the semantics, like if "the DRC has
no attached device or it has an attached device without pending unplug
request" then it is in a steady state that doesn't require anything
special at CAS or migration time, eg. spapr_drc_steady() ?

> >  }
> >  
> >  static const VMStateDescription vmstate_spapr_drc = {
> > @@ -479,6 +498,10 @@ static const VMStateDescription vmstate_spapr_drc = {
> >      .fields  = (VMStateField []) {
> >          VMSTATE_UINT32(state, SpaprDrc),
> >          VMSTATE_END_OF_LIST()
> > +    },
> > +    .subsections = (const VMStateDescription * []) {
> > +        &vmstate_spapr_drc_unplug_requested,
> > +        NULL
> >      }
> >  };
> >  
> > 
>
diff mbox series

Patch

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index d512ac6e1e7f..6f5cab70fc6b 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -455,6 +455,22 @@  void spapr_drc_reset(SpaprDrc *drc)
     }
 }
 
+static bool spapr_drc_unplug_requested_needed(void *opaque)
+{
+    return spapr_drc_unplug_requested(opaque);
+}
+
+static const VMStateDescription vmstate_spapr_drc_unplug_requested = {
+    .name = "spapr_drc/unplug_requested",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_drc_unplug_requested_needed,
+    .fields  = (VMStateField []) {
+        VMSTATE_BOOL(unplug_requested, SpaprDrc),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static bool spapr_drc_needed(void *opaque)
 {
     SpaprDrc *drc = (SpaprDrc *)opaque;
@@ -467,8 +483,11 @@  static bool spapr_drc_needed(void *opaque)
     /*
      * We need to migrate the state if it's not equal to the expected
      * long-term state, which is the same as the coldplugged initial
-     * state */
-    return !spapr_drc_device_ready(drc);
+     * state, or if an unplug request is pending.
+     */
+    return
+        spapr_drc_unplug_requested_needed(drc) ||
+        !spapr_drc_device_ready(drc);
 }
 
 static const VMStateDescription vmstate_spapr_drc = {
@@ -479,6 +498,10 @@  static const VMStateDescription vmstate_spapr_drc = {
     .fields  = (VMStateField []) {
         VMSTATE_UINT32(state, SpaprDrc),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_spapr_drc_unplug_requested,
+        NULL
     }
 };