diff mbox series

[v3,1/2] spapr: Disable legacy virtio devices for pseries-5.0 and later

Message ID 20200305043009.611636-2-david@gibson.dropbear.id.au
State New, archived
Headers show
Series spapr: Use vIOMMU translation for virtio by default | expand

Commit Message

David Gibson March 5, 2020, 4:30 a.m. UTC
PAPR specifies a kind of odd, paravirtualized PCI bus, which looks to
the guess mostly like classic PCI, even if some of the individual
devices on the bus are PCI Express.  One consequence of that is that
virtio-pci devices still default to being in transitional mode, though
legacy mode is now disabled by default on current q35 x86 machine
types.

Legacy mode virtio devices aren't really necessary any more, and are
causing some problems for future changes.  Therefore, for the
pseries-5.0 machine type (and onwards), switch to modern-only
virtio-pci devices by default.

This does mean we no longer support guest kernels prior to 4.0, unless
they have modern virtio support backported (which some distro kernels
like that in RHEL7 do).

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

Comments

Greg Kurz March 5, 2020, 10:31 a.m. UTC | #1
On Thu,  5 Mar 2020 15:30:08 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> PAPR specifies a kind of odd, paravirtualized PCI bus, which looks to
> the guess mostly like classic PCI, even if some of the individual
> devices on the bus are PCI Express.  One consequence of that is that
> virtio-pci devices still default to being in transitional mode, though
> legacy mode is now disabled by default on current q35 x86 machine
> types.
> 
> Legacy mode virtio devices aren't really necessary any more, and are
> causing some problems for future changes.  Therefore, for the
> pseries-5.0 machine type (and onwards), switch to modern-only
> virtio-pci devices by default.
> 
> This does mean we no longer support guest kernels prior to 4.0, unless
> they have modern virtio support backported (which some distro kernels
> like that in RHEL7 do).
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---

Reviewed-by: Greg Kurz <groug@kaod.org>

FWIW, I could test the following:
- allows a RHEL7 guest with pre 4.0 kernel to boot, as mentioned
  in the changelog
- breaks boot of older RHEL 6.10 guests as expected
- allows migration of older machine types to/from QEMU 4.2

Tested-by: Greg Kurz <groug@kaod.org>

>  hw/ppc/spapr.c | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 2eb0d8f70d..3cfc98ac61 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -65,6 +65,7 @@
>  
>  #include "hw/pci/pci.h"
>  #include "hw/scsi/scsi.h"
> +#include "hw/virtio/virtio-pci.h"
>  #include "hw/virtio/virtio-scsi.h"
>  #include "hw/virtio/vhost-scsi-common.h"
>  
> @@ -4566,8 +4567,20 @@ static const TypeInfo spapr_machine_info = {
>  
>  static void spapr_machine_latest_class_options(MachineClass *mc)
>  {
> +    /*
> +     * Most defaults for the latest behaviour are inherited from the
> +     * base class, but we need to override the (non ppc specific)
> +     * default behaviour for virtio.  We can't do that from the base
> +     * class since it doesn't have a compat_props.
> +     */
> +    static GlobalProperty compat[] = {
> +        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
> +    };
> +
>      mc->alias = "pseries";
>      mc->is_default = true;
> +
> +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
>  }
>  
>  #define DEFINE_SPAPR_MACHINE(suffix, verstr, latest)                 \
> @@ -4607,6 +4620,9 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
>  static void spapr_machine_4_2_class_options(MachineClass *mc)
>  {
>      SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> +    static GlobalProperty compat[] = {
> +        { TYPE_VIRTIO_PCI, "disable-legacy", "auto" },
> +    };
>  
>      spapr_machine_5_0_class_options(mc);
>      compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
> @@ -4614,6 +4630,7 @@ static void spapr_machine_4_2_class_options(MachineClass *mc)
>      smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>      smc->rma_limit = 16 * GiB;
>      mc->nvdimm_supported = false;
> +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
>  }
>  
>  DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
Greg Kurz March 10, 2020, 9:43 a.m. UTC | #2
On Thu, 5 Mar 2020 11:31:46 +0100
Greg Kurz <groug@kaod.org> wrote:

> On Thu,  5 Mar 2020 15:30:08 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > PAPR specifies a kind of odd, paravirtualized PCI bus, which looks to
> > the guess mostly like classic PCI, even if some of the individual
> > devices on the bus are PCI Express.  One consequence of that is that
> > virtio-pci devices still default to being in transitional mode, though
> > legacy mode is now disabled by default on current q35 x86 machine
> > types.
> > 
> > Legacy mode virtio devices aren't really necessary any more, and are
> > causing some problems for future changes.  Therefore, for the
> > pseries-5.0 machine type (and onwards), switch to modern-only
> > virtio-pci devices by default.
> > 
> > This does mean we no longer support guest kernels prior to 4.0, unless
> > they have modern virtio support backported (which some distro kernels
> > like that in RHEL7 do).
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> 
> FWIW, I could test the following:
> - allows a RHEL7 guest with pre 4.0 kernel to boot, as mentioned
>   in the changelog
> - breaks boot of older RHEL 6.10 guests as expected
> - allows migration of older machine types to/from QEMU 4.2
> 
> Tested-by: Greg Kurz <groug@kaod.org>
> 

Wait... I gave a try to virtiofsd and there's a problem:

$ ./ppc64-softmmu/qemu-system-ppc64 -device vhost-user-fs-pci
Unexpected error in object_property_find() at /home/greg/Work/qemu/qemu-ppc/qom/object.c:1231:
qemu-system-ppc64: -device vhost-user-fs-pci: can't apply global virtio-pci.disable-legacy=on: Property '.disable-legacy' not found
Aborted (core dumped)

It is still not possible to set the disable-legacy prop on the
vhost-user-fs-pci device, even without this patch, but QEMU
doesn't abort:

$ ./ppc64-softmmu/qemu-system-ppc64 -device vhost-user-fs-pci -global virtio-pci.disable-legacy=on
qemu-system-ppc64: -device vhost-user-fs-pci: can't apply global virtio-pci.disable-legacy=on: Property '.disable-legacy' not found
$

It seems to be related to the fact that vhost-user-fs-pci is a non-transitional
only device, as shown with this workaround:

--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4574,7 +4574,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
      * class since it doesn't have a compat_props.
      */
     static GlobalProperty compat[] = {
-        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
+        { TYPE_VIRTIO_PCI "-transitional", "disable-legacy", "on", },
     };
 
     mc->alias = "pseries";


but there's probably a better way to address this.

MST, Any suggestion ?

> >  hw/ppc/spapr.c | 17 +++++++++++++++++
> >  1 file changed, 17 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 2eb0d8f70d..3cfc98ac61 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -65,6 +65,7 @@
> >  
> >  #include "hw/pci/pci.h"
> >  #include "hw/scsi/scsi.h"
> > +#include "hw/virtio/virtio-pci.h"
> >  #include "hw/virtio/virtio-scsi.h"
> >  #include "hw/virtio/vhost-scsi-common.h"
> >  
> > @@ -4566,8 +4567,20 @@ static const TypeInfo spapr_machine_info = {
> >  
> >  static void spapr_machine_latest_class_options(MachineClass *mc)
> >  {
> > +    /*
> > +     * Most defaults for the latest behaviour are inherited from the
> > +     * base class, but we need to override the (non ppc specific)
> > +     * default behaviour for virtio.  We can't do that from the base
> > +     * class since it doesn't have a compat_props.
> > +     */
> > +    static GlobalProperty compat[] = {
> > +        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
> > +    };
> > +
> >      mc->alias = "pseries";
> >      mc->is_default = true;
> > +
> > +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> >  }
> >  
> >  #define DEFINE_SPAPR_MACHINE(suffix, verstr, latest)                 \
> > @@ -4607,6 +4620,9 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
> >  static void spapr_machine_4_2_class_options(MachineClass *mc)
> >  {
> >      SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> > +    static GlobalProperty compat[] = {
> > +        { TYPE_VIRTIO_PCI, "disable-legacy", "auto" },
> > +    };
> >  
> >      spapr_machine_5_0_class_options(mc);
> >      compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
> > @@ -4614,6 +4630,7 @@ static void spapr_machine_4_2_class_options(MachineClass *mc)
> >      smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >      smc->rma_limit = 16 * GiB;
> >      mc->nvdimm_supported = false;
> > +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> >  }
> >  
> >  DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
>
Michael S. Tsirkin March 10, 2020, 9:57 a.m. UTC | #3
On Tue, Mar 10, 2020 at 10:43:29AM +0100, Greg Kurz wrote:
> On Thu, 5 Mar 2020 11:31:46 +0100
> Greg Kurz <groug@kaod.org> wrote:
> 
> > On Thu,  5 Mar 2020 15:30:08 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> > 
> > > PAPR specifies a kind of odd, paravirtualized PCI bus, which looks to
> > > the guess mostly like classic PCI, even if some of the individual
> > > devices on the bus are PCI Express.  One consequence of that is that
> > > virtio-pci devices still default to being in transitional mode, though
> > > legacy mode is now disabled by default on current q35 x86 machine
> > > types.
> > > 
> > > Legacy mode virtio devices aren't really necessary any more, and are
> > > causing some problems for future changes.  Therefore, for the
> > > pseries-5.0 machine type (and onwards), switch to modern-only
> > > virtio-pci devices by default.
> > > 
> > > This does mean we no longer support guest kernels prior to 4.0, unless
> > > they have modern virtio support backported (which some distro kernels
> > > like that in RHEL7 do).
> > > 
> > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > 
> > Reviewed-by: Greg Kurz <groug@kaod.org>
> > 
> > FWIW, I could test the following:
> > - allows a RHEL7 guest with pre 4.0 kernel to boot, as mentioned
> >   in the changelog
> > - breaks boot of older RHEL 6.10 guests as expected
> > - allows migration of older machine types to/from QEMU 4.2
> > 
> > Tested-by: Greg Kurz <groug@kaod.org>
> > 
> 
> Wait... I gave a try to virtiofsd and there's a problem:
> 
> $ ./ppc64-softmmu/qemu-system-ppc64 -device vhost-user-fs-pci
> Unexpected error in object_property_find() at /home/greg/Work/qemu/qemu-ppc/qom/object.c:1231:
> qemu-system-ppc64: -device vhost-user-fs-pci: can't apply global virtio-pci.disable-legacy=on: Property '.disable-legacy' not found
> Aborted (core dumped)
> 
> It is still not possible to set the disable-legacy prop on the
> vhost-user-fs-pci device, even without this patch, but QEMU
> doesn't abort:
> 
> $ ./ppc64-softmmu/qemu-system-ppc64 -device vhost-user-fs-pci -global virtio-pci.disable-legacy=on
> qemu-system-ppc64: -device vhost-user-fs-pci: can't apply global virtio-pci.disable-legacy=on: Property '.disable-legacy' not found
> $
> 
> It seems to be related to the fact that vhost-user-fs-pci is a non-transitional
> only device, as shown with this workaround:
> 
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -4574,7 +4574,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
>       * class since it doesn't have a compat_props.
>       */
>      static GlobalProperty compat[] = {
> -        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
> +        { TYPE_VIRTIO_PCI "-transitional", "disable-legacy", "on", },
>      };
>  
>      mc->alias = "pseries";
> 
> 
> but there's probably a better way to address this.
> 
> MST, Any suggestion ?



Ugh looks like we have two types named virtio-pci?

Something's very wrong ...

> > >  hw/ppc/spapr.c | 17 +++++++++++++++++
> > >  1 file changed, 17 insertions(+)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index 2eb0d8f70d..3cfc98ac61 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -65,6 +65,7 @@
> > >  
> > >  #include "hw/pci/pci.h"
> > >  #include "hw/scsi/scsi.h"
> > > +#include "hw/virtio/virtio-pci.h"
> > >  #include "hw/virtio/virtio-scsi.h"
> > >  #include "hw/virtio/vhost-scsi-common.h"
> > >  
> > > @@ -4566,8 +4567,20 @@ static const TypeInfo spapr_machine_info = {
> > >  
> > >  static void spapr_machine_latest_class_options(MachineClass *mc)
> > >  {
> > > +    /*
> > > +     * Most defaults for the latest behaviour are inherited from the
> > > +     * base class, but we need to override the (non ppc specific)
> > > +     * default behaviour for virtio.  We can't do that from the base
> > > +     * class since it doesn't have a compat_props.
> > > +     */
> > > +    static GlobalProperty compat[] = {
> > > +        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
> > > +    };
> > > +
> > >      mc->alias = "pseries";
> > >      mc->is_default = true;
> > > +
> > > +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> > >  }
> > >  
> > >  #define DEFINE_SPAPR_MACHINE(suffix, verstr, latest)                 \
> > > @@ -4607,6 +4620,9 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
> > >  static void spapr_machine_4_2_class_options(MachineClass *mc)
> > >  {
> > >      SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> > > +    static GlobalProperty compat[] = {
> > > +        { TYPE_VIRTIO_PCI, "disable-legacy", "auto" },
> > > +    };
> > >  
> > >      spapr_machine_5_0_class_options(mc);
> > >      compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
> > > @@ -4614,6 +4630,7 @@ static void spapr_machine_4_2_class_options(MachineClass *mc)
> > >      smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> > >      smc->rma_limit = 16 * GiB;
> > >      mc->nvdimm_supported = false;
> > > +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> > >  }
> > >  
> > >  DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
> >
Michael S. Tsirkin March 10, 2020, 11:03 a.m. UTC | #4
On Tue, Mar 10, 2020 at 10:43:29AM +0100, Greg Kurz wrote:
> On Thu, 5 Mar 2020 11:31:46 +0100
> Greg Kurz <groug@kaod.org> wrote:
> 
> > On Thu,  5 Mar 2020 15:30:08 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> > 
> > > PAPR specifies a kind of odd, paravirtualized PCI bus, which looks to
> > > the guess mostly like classic PCI, even if some of the individual
> > > devices on the bus are PCI Express.  One consequence of that is that
> > > virtio-pci devices still default to being in transitional mode, though
> > > legacy mode is now disabled by default on current q35 x86 machine
> > > types.
> > > 
> > > Legacy mode virtio devices aren't really necessary any more, and are
> > > causing some problems for future changes.  Therefore, for the
> > > pseries-5.0 machine type (and onwards), switch to modern-only
> > > virtio-pci devices by default.
> > > 
> > > This does mean we no longer support guest kernels prior to 4.0, unless
> > > they have modern virtio support backported (which some distro kernels
> > > like that in RHEL7 do).
> > > 
> > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > 
> > Reviewed-by: Greg Kurz <groug@kaod.org>
> > 
> > FWIW, I could test the following:
> > - allows a RHEL7 guest with pre 4.0 kernel to boot, as mentioned
> >   in the changelog
> > - breaks boot of older RHEL 6.10 guests as expected
> > - allows migration of older machine types to/from QEMU 4.2
> > 
> > Tested-by: Greg Kurz <groug@kaod.org>
> > 
> 
> Wait... I gave a try to virtiofsd and there's a problem:
> 
> $ ./ppc64-softmmu/qemu-system-ppc64 -device vhost-user-fs-pci
> Unexpected error in object_property_find() at /home/greg/Work/qemu/qemu-ppc/qom/object.c:1231:
> qemu-system-ppc64: -device vhost-user-fs-pci: can't apply global virtio-pci.disable-legacy=on: Property '.disable-legacy' not found
> Aborted (core dumped)
> 
> It is still not possible to set the disable-legacy prop on the
> vhost-user-fs-pci device, even without this patch, but QEMU
> doesn't abort:
> 
> $ ./ppc64-softmmu/qemu-system-ppc64 -device vhost-user-fs-pci -global virtio-pci.disable-legacy=on
> qemu-system-ppc64: -device vhost-user-fs-pci: can't apply global virtio-pci.disable-legacy=on: Property '.disable-legacy' not found
> $
> 
> It seems to be related to the fact that vhost-user-fs-pci is a non-transitional
> only device, as shown with this workaround:
> 
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -4574,7 +4574,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
>       * class since it doesn't have a compat_props.
>       */
>      static GlobalProperty compat[] = {
> -        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
> +        { TYPE_VIRTIO_PCI "-transitional", "disable-legacy", "on", },
>      };
>  
>      mc->alias = "pseries";

Does this actually help? There's no type virtio-pci-transitional, is
there?

> 
> but there's probably a better way to address this.
> 
> MST, Any suggestion ?

Hmm I'm not sure how to fix this properly. The only idea that comes
to mind is a new internal-only "x-disable-legacy" that would be on virtio-pci,
duplicating functionality of disable-legacy but intended for
globals like this.



> > >  hw/ppc/spapr.c | 17 +++++++++++++++++
> > >  1 file changed, 17 insertions(+)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index 2eb0d8f70d..3cfc98ac61 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -65,6 +65,7 @@
> > >  
> > >  #include "hw/pci/pci.h"
> > >  #include "hw/scsi/scsi.h"
> > > +#include "hw/virtio/virtio-pci.h"
> > >  #include "hw/virtio/virtio-scsi.h"
> > >  #include "hw/virtio/vhost-scsi-common.h"
> > >  
> > > @@ -4566,8 +4567,20 @@ static const TypeInfo spapr_machine_info = {
> > >  
> > >  static void spapr_machine_latest_class_options(MachineClass *mc)
> > >  {
> > > +    /*
> > > +     * Most defaults for the latest behaviour are inherited from the
> > > +     * base class, but we need to override the (non ppc specific)
> > > +     * default behaviour for virtio.  We can't do that from the base
> > > +     * class since it doesn't have a compat_props.
> > > +     */
> > > +    static GlobalProperty compat[] = {
> > > +        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
> > > +    };
> > > +
> > >      mc->alias = "pseries";
> > >      mc->is_default = true;
> > > +
> > > +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> > >  }
> > >  
> > >  #define DEFINE_SPAPR_MACHINE(suffix, verstr, latest)                 \
> > > @@ -4607,6 +4620,9 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
> > >  static void spapr_machine_4_2_class_options(MachineClass *mc)
> > >  {
> > >      SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> > > +    static GlobalProperty compat[] = {
> > > +        { TYPE_VIRTIO_PCI, "disable-legacy", "auto" },
> > > +    };
> > >  
> > >      spapr_machine_5_0_class_options(mc);
> > >      compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
> > > @@ -4614,6 +4630,7 @@ static void spapr_machine_4_2_class_options(MachineClass *mc)
> > >      smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> > >      smc->rma_limit = 16 * GiB;
> > >      mc->nvdimm_supported = false;
> > > +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> > >  }
> > >  
> > >  DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
> >
Daniel P. Berrangé March 10, 2020, 11:56 a.m. UTC | #5
On Thu, Mar 05, 2020 at 03:30:08PM +1100, David Gibson wrote:
> PAPR specifies a kind of odd, paravirtualized PCI bus, which looks to
> the guess mostly like classic PCI, even if some of the individual
> devices on the bus are PCI Express.  One consequence of that is that
> virtio-pci devices still default to being in transitional mode, though
> legacy mode is now disabled by default on current q35 x86 machine
> types.

Two things to note here

x86 defaults to the i440fx  machine type, and so defaults
to transitional mode. AFAIK, only RHEL-8 downstream changed
x86 to defualt to q35

With q35 whether you get transitional mode or not is actually
dependent on where you place the device. If it is placed into
a PCIe root port, then it is modern-only. If it is placed into
a PCI bridge, then it is transitional still.

> Legacy mode virtio devices aren't really necessary any more, and are

Legacy mode is required for RHEL-6 which has not reached EOL yet.

> causing some problems for future changes.  Therefore, for the
> pseries-5.0 machine type (and onwards), switch to modern-only
> virtio-pci devices by default.


The challenge I see with pseries, as compared to x86 is around
how apps deal with mgmt / guest setup.  With x86 there are
distinct machine types i440fx / q35, so mgmt apps could decide
what todo based on the chipset & device support. eg they can
determine whether the guest supports PCIe at all, and they
can determine whether the guest supports virtio-1.0. Thus
they can decide between three options

 - Use i440fx
 - Use q35 with placement in PCI bridge
 - Use q35 with placement in PCIe root port

These rules applies no matter what version of q35/i440fx
is in use.

With PPC, we're changing behaviour of the existing pseries
machine type in a minor version. Management apps need to
avoid creating logic that depends on a specific minor version
because these version numbers are all changed by downstream
distro vendors. IOW, as a comparison to x86, this change is
akin to altering behaviour of the i440fx machine type so
that it disables legacy mode despite still being PCI, and
not PCIe.

Is there any liklihood we'll ever introduce a true PCIe
based machine type for PPC, so we get something much
closer to x86/aarch64 machine types in terms of PCIe
architecture ?

> This does mean we no longer support guest kernels prior to 4.0, unless
> they have modern virtio support backported (which some distro kernels
> like that in RHEL7 do).
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/ppc/spapr.c | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 2eb0d8f70d..3cfc98ac61 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -65,6 +65,7 @@
>  
>  #include "hw/pci/pci.h"
>  #include "hw/scsi/scsi.h"
> +#include "hw/virtio/virtio-pci.h"
>  #include "hw/virtio/virtio-scsi.h"
>  #include "hw/virtio/vhost-scsi-common.h"
>  
> @@ -4566,8 +4567,20 @@ static const TypeInfo spapr_machine_info = {
>  
>  static void spapr_machine_latest_class_options(MachineClass *mc)
>  {
> +    /*
> +     * Most defaults for the latest behaviour are inherited from the
> +     * base class, but we need to override the (non ppc specific)
> +     * default behaviour for virtio.  We can't do that from the base
> +     * class since it doesn't have a compat_props.
> +     */
> +    static GlobalProperty compat[] = {
> +        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
> +    };
> +
>      mc->alias = "pseries";
>      mc->is_default = true;
> +
> +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
>  }
>  
>  #define DEFINE_SPAPR_MACHINE(suffix, verstr, latest)                 \
> @@ -4607,6 +4620,9 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
>  static void spapr_machine_4_2_class_options(MachineClass *mc)
>  {
>      SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> +    static GlobalProperty compat[] = {
> +        { TYPE_VIRTIO_PCI, "disable-legacy", "auto" },
> +    };
>  
>      spapr_machine_5_0_class_options(mc);
>      compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
> @@ -4614,6 +4630,7 @@ static void spapr_machine_4_2_class_options(MachineClass *mc)
>      smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>      smc->rma_limit = 16 * GiB;
>      mc->nvdimm_supported = false;
> +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
>  }
>  
>  DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
> -- 
> 2.24.1
> 
> 

Regards,
Daniel
Greg Kurz March 10, 2020, 12:24 p.m. UTC | #6
On Tue, 10 Mar 2020 07:03:34 -0400
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Tue, Mar 10, 2020 at 10:43:29AM +0100, Greg Kurz wrote:
> > On Thu, 5 Mar 2020 11:31:46 +0100
> > Greg Kurz <groug@kaod.org> wrote:
> > 
> > > On Thu,  5 Mar 2020 15:30:08 +1100
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > 
> > > > PAPR specifies a kind of odd, paravirtualized PCI bus, which looks to
> > > > the guess mostly like classic PCI, even if some of the individual
> > > > devices on the bus are PCI Express.  One consequence of that is that
> > > > virtio-pci devices still default to being in transitional mode, though
> > > > legacy mode is now disabled by default on current q35 x86 machine
> > > > types.
> > > > 
> > > > Legacy mode virtio devices aren't really necessary any more, and are
> > > > causing some problems for future changes.  Therefore, for the
> > > > pseries-5.0 machine type (and onwards), switch to modern-only
> > > > virtio-pci devices by default.
> > > > 
> > > > This does mean we no longer support guest kernels prior to 4.0, unless
> > > > they have modern virtio support backported (which some distro kernels
> > > > like that in RHEL7 do).
> > > > 
> > > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > > ---
> > > 
> > > Reviewed-by: Greg Kurz <groug@kaod.org>
> > > 
> > > FWIW, I could test the following:
> > > - allows a RHEL7 guest with pre 4.0 kernel to boot, as mentioned
> > >   in the changelog
> > > - breaks boot of older RHEL 6.10 guests as expected
> > > - allows migration of older machine types to/from QEMU 4.2
> > > 
> > > Tested-by: Greg Kurz <groug@kaod.org>
> > > 
> > 
> > Wait... I gave a try to virtiofsd and there's a problem:
> > 
> > $ ./ppc64-softmmu/qemu-system-ppc64 -device vhost-user-fs-pci
> > Unexpected error in object_property_find() at /home/greg/Work/qemu/qemu-ppc/qom/object.c:1231:
> > qemu-system-ppc64: -device vhost-user-fs-pci: can't apply global virtio-pci.disable-legacy=on: Property '.disable-legacy' not found
> > Aborted (core dumped)
> > 
> > It is still not possible to set the disable-legacy prop on the
> > vhost-user-fs-pci device, even without this patch, but QEMU
> > doesn't abort:
> > 
> > $ ./ppc64-softmmu/qemu-system-ppc64 -device vhost-user-fs-pci -global virtio-pci.disable-legacy=on
> > qemu-system-ppc64: -device vhost-user-fs-pci: can't apply global virtio-pci.disable-legacy=on: Property '.disable-legacy' not found
> > $
> > 
> > It seems to be related to the fact that vhost-user-fs-pci is a non-transitional
> > only device, as shown with this workaround:
> > 
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -4574,7 +4574,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
> >       * class since it doesn't have a compat_props.
> >       */
> >      static GlobalProperty compat[] = {
> > -        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
> > +        { TYPE_VIRTIO_PCI "-transitional", "disable-legacy", "on", },
> >      };
> >  
> >      mc->alias = "pseries";
> 
> Does this actually help? There's no type virtio-pci-transitional, is
> there?
> 

Oops you're right. There's no such type and it doesn't help in
the end... I should have tried that with -global before posting

   -global virtio-pci-transitional.disable-legacy=on

qemu-system-ppc64: warning: global virtio-pci-transitional.disable-legacy has invalid class name

Maybe worth adding a warning as well when done from the machine code, but
this is another story. Sorry for the noise.

> > 
> > but there's probably a better way to address this.
> > 
> > MST, Any suggestion ?
> 
> Hmm I'm not sure how to fix this properly. The only idea that comes
> to mind is a new internal-only "x-disable-legacy" that would be on virtio-pci,
> duplicating functionality of disable-legacy but intended for
> globals like this.
> 

Maybe but Daniel P. Berrangé jumped in and has some arguments
against what we're trying to achieve with this series...

A suivre.

> 
> 
> > > >  hw/ppc/spapr.c | 17 +++++++++++++++++
> > > >  1 file changed, 17 insertions(+)
> > > > 
> > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > index 2eb0d8f70d..3cfc98ac61 100644
> > > > --- a/hw/ppc/spapr.c
> > > > +++ b/hw/ppc/spapr.c
> > > > @@ -65,6 +65,7 @@
> > > >  
> > > >  #include "hw/pci/pci.h"
> > > >  #include "hw/scsi/scsi.h"
> > > > +#include "hw/virtio/virtio-pci.h"
> > > >  #include "hw/virtio/virtio-scsi.h"
> > > >  #include "hw/virtio/vhost-scsi-common.h"
> > > >  
> > > > @@ -4566,8 +4567,20 @@ static const TypeInfo spapr_machine_info = {
> > > >  
> > > >  static void spapr_machine_latest_class_options(MachineClass *mc)
> > > >  {
> > > > +    /*
> > > > +     * Most defaults for the latest behaviour are inherited from the
> > > > +     * base class, but we need to override the (non ppc specific)
> > > > +     * default behaviour for virtio.  We can't do that from the base
> > > > +     * class since it doesn't have a compat_props.
> > > > +     */
> > > > +    static GlobalProperty compat[] = {
> > > > +        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
> > > > +    };
> > > > +
> > > >      mc->alias = "pseries";
> > > >      mc->is_default = true;
> > > > +
> > > > +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> > > >  }
> > > >  
> > > >  #define DEFINE_SPAPR_MACHINE(suffix, verstr, latest)                 \
> > > > @@ -4607,6 +4620,9 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
> > > >  static void spapr_machine_4_2_class_options(MachineClass *mc)
> > > >  {
> > > >      SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> > > > +    static GlobalProperty compat[] = {
> > > > +        { TYPE_VIRTIO_PCI, "disable-legacy", "auto" },
> > > > +    };
> > > >  
> > > >      spapr_machine_5_0_class_options(mc);
> > > >      compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
> > > > @@ -4614,6 +4630,7 @@ static void spapr_machine_4_2_class_options(MachineClass *mc)
> > > >      smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> > > >      smc->rma_limit = 16 * GiB;
> > > >      mc->nvdimm_supported = false;
> > > > +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> > > >  }
> > > >  
> > > >  DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
> > > 
>
David Gibson March 11, 2020, 12:58 a.m. UTC | #7
On Tue, Mar 10, 2020 at 11:56:11AM +0000, Daniel P. Berrangé wrote:
> On Thu, Mar 05, 2020 at 03:30:08PM +1100, David Gibson wrote:
> > PAPR specifies a kind of odd, paravirtualized PCI bus, which looks to
> > the guess mostly like classic PCI, even if some of the individual
> > devices on the bus are PCI Express.  One consequence of that is that
> > virtio-pci devices still default to being in transitional mode, though
> > legacy mode is now disabled by default on current q35 x86 machine
> > types.
> 
> Two things to note here
> 
> x86 defaults to the i440fx  machine type, and so defaults
> to transitional mode. AFAIK, only RHEL-8 downstream changed
> x86 to defualt to q35
> 
> With q35 whether you get transitional mode or not is actually
> dependent on where you place the device. If it is placed into
> a PCIe root port, then it is modern-only. If it is placed into
> a PCI bridge, then it is transitional still.

Yes, I'm aware.

> > Legacy mode virtio devices aren't really necessary any more, and are
> 
> Legacy mode is required for RHEL-6 which has not reached EOL yet.

Yeah.. I'm concerned about this, but I'm not sure what to do about it.

> > causing some problems for future changes.  Therefore, for the
> > pseries-5.0 machine type (and onwards), switch to modern-only
> > virtio-pci devices by default.
> 
> The challenge I see with pseries, as compared to x86 is around
> how apps deal with mgmt / guest setup.  With x86 there are
> distinct machine types i440fx / q35, so mgmt apps could decide
> what todo based on the chipset & device support. eg they can
> determine whether the guest supports PCIe at all, and they
> can determine whether the guest supports virtio-1.0. Thus
> they can decide between three options
> 
>  - Use i440fx
>  - Use q35 with placement in PCI bridge
>  - Use q35 with placement in PCIe root port
> 
> These rules applies no matter what version of q35/i440fx
> is in use.

Yeah.. x86 also has the advantage of enough visibility that it can
reasonbly easily get mgmt layers to do stuff about it.  This is much
harder for POWER :/.

> With PPC, we're changing behaviour of the existing pseries
> machine type in a minor version. Management apps need to
> avoid creating logic that depends on a specific minor version
> because these version numbers are all changed by downstream
> distro vendors. IOW, as a comparison to x86, this change is
> akin to altering behaviour of the i440fx machine type so
> that it disables legacy mode despite still being PCI, and
> not PCIe.
> 
> Is there any liklihood we'll ever introduce a true PCIe
> based machine type for PPC, so we get something much
> closer to x86/aarch64 machine types in terms of PCIe
> architecture ?

It doesn't really make sense to do so.  For x86 - or more strictly for
pc - the change from PCI to PCIe is a pretty fundamental system change
with affects in lots of places, which makes a whole new versioned
series of machine types a reasonable option.

For pseries - that's not really the case.  PCI under PAPR is
paravirtualized, and it always has been.  The interface we're matching
is not real hardware, but the PAPR spec and to a lesser extent the
existing PowerVM implementation of it.

[Aside: you've made a subtle but common x86-centric assumption above
 that there's only one important platform design per ISA.  There
 is a real PCIe based PPC  machine type in "pnv" (and maybe some of
 the embedded ones as well), but that's not the environment we care
 about for guests in production, since we can't use KVM with it]

What PAPR has is an odd hybrid - individual devices can be PCIe (we
have calls to access extended config space) - but the overall bus
structure is more-or-less like vanilla PCI.

I think it would be possible to kind of expose a more PCIe like
structure, but a) it would be weirdly artificial, b) it doesn't match
the PAPR interfaces very well, c) it would make our behaviour
different from PowerVM.

It would certainly be possible to better handle PCIe devices on a root
bus.  That's been on my todo someday list for ages, but I've kept
putting it off because the tangible benefits are pretty minimal.

Note that several things that I believe are now in the PCIe spec, but
really derive more from PC legacy considerations, don't apply at all
for PAPR.  e.g. there's no meaningful distinction between integrated
and slotted devices, multiple independent host bridges is routine and
doesn't require any (virtual) hardware visible domain numbers.

> > This does mean we no longer support guest kernels prior to 4.0, unless
> > they have modern virtio support backported (which some distro kernels
> > like that in RHEL7 do).
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  hw/ppc/spapr.c | 17 +++++++++++++++++
> >  1 file changed, 17 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 2eb0d8f70d..3cfc98ac61 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -65,6 +65,7 @@
> >  
> >  #include "hw/pci/pci.h"
> >  #include "hw/scsi/scsi.h"
> > +#include "hw/virtio/virtio-pci.h"
> >  #include "hw/virtio/virtio-scsi.h"
> >  #include "hw/virtio/vhost-scsi-common.h"
> >  
> > @@ -4566,8 +4567,20 @@ static const TypeInfo spapr_machine_info = {
> >  
> >  static void spapr_machine_latest_class_options(MachineClass *mc)
> >  {
> > +    /*
> > +     * Most defaults for the latest behaviour are inherited from the
> > +     * base class, but we need to override the (non ppc specific)
> > +     * default behaviour for virtio.  We can't do that from the base
> > +     * class since it doesn't have a compat_props.
> > +     */
> > +    static GlobalProperty compat[] = {
> > +        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
> > +    };
> > +
> >      mc->alias = "pseries";
> >      mc->is_default = true;
> > +
> > +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> >  }
> >  
> >  #define DEFINE_SPAPR_MACHINE(suffix, verstr, latest)                 \
> > @@ -4607,6 +4620,9 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
> >  static void spapr_machine_4_2_class_options(MachineClass *mc)
> >  {
> >      SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> > +    static GlobalProperty compat[] = {
> > +        { TYPE_VIRTIO_PCI, "disable-legacy", "auto" },
> > +    };
> >  
> >      spapr_machine_5_0_class_options(mc);
> >      compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
> > @@ -4614,6 +4630,7 @@ static void spapr_machine_4_2_class_options(MachineClass *mc)
> >      smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >      smc->rma_limit = 16 * GiB;
> >      mc->nvdimm_supported = false;
> > +    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
> >  }
> >  
> >  DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
> 
> Regards,
> Daniel
Michael S. Tsirkin March 11, 2020, 7:11 a.m. UTC | #8
On Wed, Mar 11, 2020 at 11:58:57AM +1100, David Gibson wrote:
> Note that several things that I believe are now in the PCIe spec, but
> really derive more from PC legacy considerations, don't apply at all
> for PAPR.  e.g. there's no meaningful distinction between integrated
> and slotted devices, multiple independent host bridges is routine and
> doesn't require any (virtual) hardware visible domain numbers.

Domain numbers are a Linux thing, not a PCIe thing. On x86 they come
from ACPI segment numbers. As such they aren't usually hardware
visible on x86, they are supplied by firmware.
David Gibson March 12, 2020, 1:14 a.m. UTC | #9
On Wed, Mar 11, 2020 at 03:11:16AM -0400, Michael S. Tsirkin wrote:
> On Wed, Mar 11, 2020 at 11:58:57AM +1100, David Gibson wrote:
> > Note that several things that I believe are now in the PCIe spec, but
> > really derive more from PC legacy considerations, don't apply at all
> > for PAPR.  e.g. there's no meaningful distinction between integrated
> > and slotted devices, multiple independent host bridges is routine and
> > doesn't require any (virtual) hardware visible domain numbers.
> 
> Domain numbers are a Linux thing, not a PCIe thing. On x86 they come
> from ACPI segment numbers. As such they aren't usually hardware
> visible on x86, they are supplied by firmware.

Oh, ok.  I thought that at least on the standard IO 0xcf8 host bridge
controller the domain number was written into certain registers to
select the relevant root bus.

On POWER the domain numbers are arbitrarily assigned within Linux.
"Hardware" (well, the firmware/hypervisor) uses a different
identifier, called the BUID (generally a large, 64-bit pseudo-address)
in the device tree and hypercalls.

[As an aside, this means the use of domain numbers in libvirt XML is
complete bogosity]
Michael S. Tsirkin March 12, 2020, 6:41 a.m. UTC | #10
On Thu, Mar 12, 2020 at 12:14:20PM +1100, David Gibson wrote:
> On Wed, Mar 11, 2020 at 03:11:16AM -0400, Michael S. Tsirkin wrote:
> > On Wed, Mar 11, 2020 at 11:58:57AM +1100, David Gibson wrote:
> > > Note that several things that I believe are now in the PCIe spec, but
> > > really derive more from PC legacy considerations, don't apply at all
> > > for PAPR.  e.g. there's no meaningful distinction between integrated
> > > and slotted devices, multiple independent host bridges is routine and
> > > doesn't require any (virtual) hardware visible domain numbers.
> > 
> > Domain numbers are a Linux thing, not a PCIe thing. On x86 they come
> > from ACPI segment numbers. As such they aren't usually hardware
> > visible on x86, they are supplied by firmware.
> 
> Oh, ok.  I thought that at least on the standard IO 0xcf8 host bridge
> controller the domain number was written into certain registers to
> select the relevant root bus.

standard 0xcf8 can only access 256 bus numbers. software does not
much care on which root bus these are though.

> On POWER the domain numbers are arbitrarily assigned within Linux.
> "Hardware" (well, the firmware/hypervisor) uses a different
> identifier, called the BUID (generally a large, 64-bit pseudo-address)
> in the device tree and hypercalls.
> 
> [As an aside, this means the use of domain numbers in libvirt XML is
> complete bogosity]

For fun, they aren't actually used either. And of course using the word
"domain" in a domain XML format means you can't search for it anywhere
without getting a million unrelated hits.



> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 2eb0d8f70d..3cfc98ac61 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -65,6 +65,7 @@ 
 
 #include "hw/pci/pci.h"
 #include "hw/scsi/scsi.h"
+#include "hw/virtio/virtio-pci.h"
 #include "hw/virtio/virtio-scsi.h"
 #include "hw/virtio/vhost-scsi-common.h"
 
@@ -4566,8 +4567,20 @@  static const TypeInfo spapr_machine_info = {
 
 static void spapr_machine_latest_class_options(MachineClass *mc)
 {
+    /*
+     * Most defaults for the latest behaviour are inherited from the
+     * base class, but we need to override the (non ppc specific)
+     * default behaviour for virtio.  We can't do that from the base
+     * class since it doesn't have a compat_props.
+     */
+    static GlobalProperty compat[] = {
+        { TYPE_VIRTIO_PCI, "disable-legacy", "on", },
+    };
+
     mc->alias = "pseries";
     mc->is_default = true;
+
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 }
 
 #define DEFINE_SPAPR_MACHINE(suffix, verstr, latest)                 \
@@ -4607,6 +4620,9 @@  DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
 static void spapr_machine_4_2_class_options(MachineClass *mc)
 {
     SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+    static GlobalProperty compat[] = {
+        { TYPE_VIRTIO_PCI, "disable-legacy", "auto" },
+    };
 
     spapr_machine_5_0_class_options(mc);
     compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
@@ -4614,6 +4630,7 @@  static void spapr_machine_4_2_class_options(MachineClass *mc)
     smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
     smc->rma_limit = 16 * GiB;
     mc->nvdimm_supported = false;
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 }
 
 DEFINE_SPAPR_MACHINE(4_2, "4.2", false);