diff mbox series

[v2] hw/arm/virt enable support for virtio-mem

Message ID 20201105174311.566751-1-Jonathan.Cameron@huawei.com (mailing list archive)
State New, archived
Headers show
Series [v2] hw/arm/virt enable support for virtio-mem | expand

Commit Message

Jonathan Cameron Nov. 5, 2020, 5:43 p.m. UTC
Basically a cut and paste job from the x86 support with the exception of
needing a larger block size as the Memory Block Size (MIN_SECTION_SIZE)
on ARM64 in Linux is 1G.

Tested:
* In full emulation and with KVM on an arm64 server.
* cold and hotplug for the virtio-mem-pci device.
* Wide range of memory sizes, added at creation and later.
* Fairly basic memory usage of memory added.  Seems to function as normal.
* NUMA setup with virtio-mem-pci devices on each node.
* Simple migration test.

Related kernel patch just enables the Kconfig item for ARM64 as an
alternative to x86 in drivers/virtio/Kconfig

The original patches from David Hildenbrand stated that he thought it should
work for ARM64 but it wasn't enabled in the kernel [1]
It appears he was correct and everything 'just works'.

The build system related stuff is intended to ensure virtio-mem support is
not built for arm32 (build will fail due no defined block size).
If there is a more elegant way to do this, please point me in the right
direction.

[1] https://lore.kernel.org/linux-mm/20191212171137.13872-1-david@redhat.com/

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 default-configs/devices/aarch64-softmmu.mak |  1 +
 hw/arm/Kconfig                              |  1 +
 hw/arm/virt.c                               | 64 ++++++++++++++++++++-
 hw/virtio/Kconfig                           |  4 ++
 hw/virtio/virtio-mem.c                      |  2 +
 5 files changed, 71 insertions(+), 1 deletion(-)

Comments

Philippe Mathieu-Daudé Nov. 5, 2020, 5:51 p.m. UTC | #1
Cc'ing Igor.

On 11/5/20 6:43 PM, Jonathan Cameron wrote:
> Basically a cut and paste job from the x86 support with the exception of
> needing a larger block size as the Memory Block Size (MIN_SECTION_SIZE)
> on ARM64 in Linux is 1G.
> 
> Tested:
> * In full emulation and with KVM on an arm64 server.
> * cold and hotplug for the virtio-mem-pci device.
> * Wide range of memory sizes, added at creation and later.
> * Fairly basic memory usage of memory added.  Seems to function as normal.
> * NUMA setup with virtio-mem-pci devices on each node.
> * Simple migration test.
> 
> Related kernel patch just enables the Kconfig item for ARM64 as an
> alternative to x86 in drivers/virtio/Kconfig
> 
> The original patches from David Hildenbrand stated that he thought it should
> work for ARM64 but it wasn't enabled in the kernel [1]
> It appears he was correct and everything 'just works'.
> 
> The build system related stuff is intended to ensure virtio-mem support is
> not built for arm32 (build will fail due no defined block size).
> If there is a more elegant way to do this, please point me in the right
> direction.
> 
> [1] https://lore.kernel.org/linux-mm/20191212171137.13872-1-david@redhat.com/
> 
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>  default-configs/devices/aarch64-softmmu.mak |  1 +
>  hw/arm/Kconfig                              |  1 +
>  hw/arm/virt.c                               | 64 ++++++++++++++++++++-
>  hw/virtio/Kconfig                           |  4 ++
>  hw/virtio/virtio-mem.c                      |  2 +
>  5 files changed, 71 insertions(+), 1 deletion(-)
> 
> diff --git a/default-configs/devices/aarch64-softmmu.mak b/default-configs/devices/aarch64-softmmu.mak
> index 958b1e08e4..31d6128a29 100644
> --- a/default-configs/devices/aarch64-softmmu.mak
> +++ b/default-configs/devices/aarch64-softmmu.mak
> @@ -6,3 +6,4 @@ include arm-softmmu.mak
>  CONFIG_XLNX_ZYNQMP_ARM=y
>  CONFIG_XLNX_VERSAL=y
>  CONFIG_SBSA_REF=y
> +CONFIG_ARCH_VIRTIO_MEM_SUPPORTED=y
> diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
> index fdf4464b94..eeae77eee9 100644
> --- a/hw/arm/Kconfig
> +++ b/hw/arm/Kconfig
> @@ -20,6 +20,7 @@ config ARM_VIRT
>      select PLATFORM_BUS
>      select SMBIOS
>      select VIRTIO_MMIO
> +    select VIRTIO_MEM_SUPPORTED if ARCH_VIRTIO_MEM_SUPPORTED
>      select ACPI_PCI
>      select MEM_DEVICE
>      select DIMM
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 8abb385d4e..6c96d71106 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -73,9 +73,11 @@
>  #include "hw/acpi/acpi.h"
>  #include "target/arm/internals.h"
>  #include "hw/mem/pc-dimm.h"
> +#include "hw/mem/memory-device.h"
>  #include "hw/mem/nvdimm.h"
>  #include "hw/acpi/generic_event_device.h"
>  #include "hw/virtio/virtio-iommu.h"
> +#include "hw/virtio/virtio-mem-pci.h"
>  #include "hw/char/pl011.h"
>  #include "qemu/guest-random.h"
>  
> @@ -2286,6 +2288,34 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev,
>                           dev, &error_abort);
>  }
>  
> +static void virt_virtio_md_pci_pre_plug(HotplugHandler *hotplug_dev,
> +                                      DeviceState *dev, Error **errp)
> +{
> +    HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
> +    Error *local_err = NULL;
> +
> +    if (!hotplug_dev2 && dev->hotplugged) {
> +        /*
> +         * Without a bus hotplug handler, we cannot control the plug/unplug
> +         * order. We should never reach this point when hotplugging,
> +         * however, better add a safety net.
> +         */
> +        error_setg(errp, "hotplug of virtio-mem based memory devices not"
> +                   " supported on this bus.");
> +        return;
> +    }
> +    /*
> +     * First, see if we can plug this memory device at all. If that
> +     * succeeds, branch of to the actual hotplug handler.
> +     */
> +    memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL,
> +                           &local_err);
> +    if (!local_err && hotplug_dev2) {
> +        hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err);
> +    }
> +    error_propagate(errp, local_err);
> +}
> +
>  static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
>                                              DeviceState *dev, Error **errp)
>  {
> @@ -2293,6 +2323,8 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
>  
>      if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
>          virt_memory_pre_plug(hotplug_dev, dev, errp);
> +    } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
> +        virt_virtio_md_pci_pre_plug(hotplug_dev, dev, errp);
>      } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
>          hwaddr db_start = 0, db_end = 0;
>          char *resv_prop_str;
> @@ -2322,6 +2354,27 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
>      }
>  }
>  
> +static void virt_virtio_md_pci_plug(HotplugHandler *hotplug_dev,
> +                                  DeviceState *dev, Error **errp)
> +{
> +    HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
> +    Error *local_err = NULL;
> +
> +    /*
> +     * Plug the memory device first and then branch off to the actual
> +     * hotplug handler. If that one fails, we can easily undo the memory
> +     * device bits.
> +     */
> +    memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
> +    if (hotplug_dev2) {
> +        hotplug_handler_plug(hotplug_dev2, dev, &local_err);
> +        if (local_err) {
> +            memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
> +        }
> +    }
> +    error_propagate(errp, local_err);
> +}
> +
>  static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
>                                          DeviceState *dev, Error **errp)
>  {
> @@ -2336,6 +2389,9 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
>      if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
>          virt_memory_plug(hotplug_dev, dev, errp);
>      }
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
> +        virt_virtio_md_pci_plug(hotplug_dev, dev, errp);
> +    }
>      if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
>          PCIDevice *pdev = PCI_DEVICE(dev);
>  
> @@ -2363,6 +2419,11 @@ static void virt_dimm_unplug_request(HotplugHandler *hotplug_dev,
>          goto out;
>      }
>  
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
> +        error_setg(&local_err,
> +                   "virtio-mem based memory devices cannot be unplugged.");
> +        goto out;
> +    }
>      hotplug_handler_unplug_request(HOTPLUG_HANDLER(vms->acpi_dev), dev,
>                                     &local_err);
>  out:
> @@ -2413,7 +2474,8 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
>                                                          DeviceState *dev)
>  {
>      if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE) ||
> -       (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM))) {
> +        object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
> +        object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
>          return HOTPLUG_HANDLER(machine);
>      }
>      if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
> diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
> index 0eda25c4e1..00dbf2939e 100644
> --- a/hw/virtio/Kconfig
> +++ b/hw/virtio/Kconfig
> @@ -48,6 +48,10 @@ config VIRTIO_PMEM
>      depends on VIRTIO_PMEM_SUPPORTED
>      select MEM_DEVICE
>  
> +config ARCH_VIRTIO_MEM_SUPPORTED
> +    bool
> +    default n
> +
>  config VIRTIO_MEM_SUPPORTED
>      bool
>  
> diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
> index 7c8ca9f28b..16f9de6ab6 100644
> --- a/hw/virtio/virtio-mem.c
> +++ b/hw/virtio/virtio-mem.c
> @@ -53,6 +53,8 @@
>   */
>  #if defined(TARGET_X86_64) || defined(TARGET_I386)
>  #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
> +#elif defined(TARGET_AARCH64)
> +#define VIRTIO_MEM_USABLE_EXTENT (2 * (1024 * MiB))
>  #else
>  #error VIRTIO_MEM_USABLE_EXTENT not defined
>  #endif
>
Eric Auger Nov. 9, 2020, 1:40 p.m. UTC | #2
Hi Jonathan,

On 11/5/20 6:43 PM, Jonathan Cameron wrote:
> Basically a cut and paste job from the x86 support with the exception of
> needing a larger block size as the Memory Block Size (MIN_SECTION_SIZE)
> on ARM64 in Linux is 1G.
> 
> Tested:
> * In full emulation and with KVM on an arm64 server.
> * cold and hotplug for the virtio-mem-pci device.
> * Wide range of memory sizes, added at creation and later.
> * Fairly basic memory usage of memory added.  Seems to function as normal.
> * NUMA setup with virtio-mem-pci devices on each node.
> * Simple migration test.

I would add in the commit message that the hot-unplug of the device is
not supported.
> 
> Related kernel patch just enables the Kconfig item for ARM64 as an
> alternative to x86 in drivers/virtio/Kconfig
> 
> The original patches from David Hildenbrand stated that he thought it should
> work for ARM64 but it wasn't enabled in the kernel [1]
> It appears he was correct and everything 'just works'.
Did you try with 64kB page guest as well?
> 
> The build system related stuff is intended to ensure virtio-mem support is
> not built for arm32 (build will fail due no defined block size).
> If there is a more elegant way to do this, please point me in the right
> direction.
I guess you meant CONFIG_ARCH_VIRTIO_MEM_SUPPORTED introduction
> 
> [1] https://lore.kernel.org/linux-mm/20191212171137.13872-1-david@redhat.com/
> 
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>  default-configs/devices/aarch64-softmmu.mak |  1 +
>  hw/arm/Kconfig                              |  1 +
>  hw/arm/virt.c                               | 64 ++++++++++++++++++++-
>  hw/virtio/Kconfig                           |  4 ++
>  hw/virtio/virtio-mem.c                      |  2 +
>  5 files changed, 71 insertions(+), 1 deletion(-)
> 
> diff --git a/default-configs/devices/aarch64-softmmu.mak b/default-configs/devices/aarch64-softmmu.mak
> index 958b1e08e4..31d6128a29 100644
> --- a/default-configs/devices/aarch64-softmmu.mak
> +++ b/default-configs/devices/aarch64-softmmu.mak
> @@ -6,3 +6,4 @@ include arm-softmmu.mak
>  CONFIG_XLNX_ZYNQMP_ARM=y
>  CONFIG_XLNX_VERSAL=y
>  CONFIG_SBSA_REF=y
> +CONFIG_ARCH_VIRTIO_MEM_SUPPORTED=y
> diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
> index fdf4464b94..eeae77eee9 100644
> --- a/hw/arm/Kconfig
> +++ b/hw/arm/Kconfig
> @@ -20,6 +20,7 @@ config ARM_VIRT
>      select PLATFORM_BUS
>      select SMBIOS
>      select VIRTIO_MMIO
> +    select VIRTIO_MEM_SUPPORTED if ARCH_VIRTIO_MEM_SUPPORTED
>      select ACPI_PCI
>      select MEM_DEVICE
>      select DIMM
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 8abb385d4e..6c96d71106 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -73,9 +73,11 @@
>  #include "hw/acpi/acpi.h"
>  #include "target/arm/internals.h"
>  #include "hw/mem/pc-dimm.h"
> +#include "hw/mem/memory-device.h"
>  #include "hw/mem/nvdimm.h"
>  #include "hw/acpi/generic_event_device.h"
>  #include "hw/virtio/virtio-iommu.h"
> +#include "hw/virtio/virtio-mem-pci.h"
>  #include "hw/char/pl011.h"
>  #include "qemu/guest-random.h"
>  
> @@ -2286,6 +2288,34 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev,
>                           dev, &error_abort);
>  }
>  
> +static void virt_virtio_md_pci_pre_plug(HotplugHandler *hotplug_dev,
> +                                      DeviceState *dev, Error **errp)
> +{
> +    HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
> +    Error *local_err = NULL;
> +
> +    if (!hotplug_dev2 && dev->hotplugged) {
> +        /*
> +         * Without a bus hotplug handler, we cannot control the plug/unplug
> +         * order. We should never reach this point when hotplugging,
> +         * however, better add a safety net.
> +         */
> +        error_setg(errp, "hotplug of virtio-mem based memory devices not"
> +                   " supported on this bus.");
> +        return;
> +    }
> +    /*
> +     * First, see if we can plug this memory device at all. If that
> +     * succeeds, branch of to the actual hotplug handler.
> +     */
> +    memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL,
> +                           &local_err);
> +    if (!local_err && hotplug_dev2) {
> +        hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err);
> +    }
> +    error_propagate(errp, local_err);
> +}
> +
>  static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
>                                              DeviceState *dev, Error **errp)
>  {
> @@ -2293,6 +2323,8 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
>  
>      if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
>          virt_memory_pre_plug(hotplug_dev, dev, errp);
> +    } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
> +        virt_virtio_md_pci_pre_plug(hotplug_dev, dev, errp);
>      } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
>          hwaddr db_start = 0, db_end = 0;
>          char *resv_prop_str;
> @@ -2322,6 +2354,27 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
>      }
>  }
>  
> +static void virt_virtio_md_pci_plug(HotplugHandler *hotplug_dev,
> +                                  DeviceState *dev, Error **errp)
> +{
> +    HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
> +    Error *local_err = NULL;
> +
> +    /*
> +     * Plug the memory device first and then branch off to the actual
> +     * hotplug handler. If that one fails, we can easily undo the memory
> +     * device bits.
> +     */
> +    memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
> +    if (hotplug_dev2) {
> +        hotplug_handler_plug(hotplug_dev2, dev, &local_err);
> +        if (local_err) {
> +            memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
> +        }
> +    }
> +    error_propagate(errp, local_err);
> +}
> +
>  static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
>                                          DeviceState *dev, Error **errp)
>  {
> @@ -2336,6 +2389,9 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
>      if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
>          virt_memory_plug(hotplug_dev, dev, errp);
>      }
nit: while at it we can use "else if" here and below
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
> +        virt_virtio_md_pci_plug(hotplug_dev, dev, errp);
> +    }
>      if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
>          PCIDevice *pdev = PCI_DEVICE(dev);
>  
> @@ -2363,6 +2419,11 @@ static void virt_dimm_unplug_request(HotplugHandler *hotplug_dev,
>          goto out;
>      }
virt_dimm_unplug_request() is called if the device type is TYPE_PC_DIMM
while here the device type is TYPE_VIRTIO_MEM_PCI.

Shouldn't this error be returned in
virt_machine_device_unplug_request_cb instead or do like it is done in
pc.c, add a virt_virtio_md_pci_unplug_request() helper with the
error_setg there.
>  
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
> +        error_setg(&local_err,
> +                   "virtio-mem based memory devices cannot be unplugged.");
> +        goto out;
> +    }
>      hotplug_handler_unplug_request(HOTPLUG_HANDLER(vms->acpi_dev), dev,
>                                     &local_err);
>  out:
> @@ -2413,7 +2474,8 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
>                                                          DeviceState *dev)
>  {
>      if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE) ||
> -       (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM))) {
> +        object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
> +        object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
>          return HOTPLUG_HANDLER(machine);
>      }
>      if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
> diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
> index 0eda25c4e1..00dbf2939e 100644
> --- a/hw/virtio/Kconfig
> +++ b/hw/virtio/Kconfig
> @@ -48,6 +48,10 @@ config VIRTIO_PMEM
>      depends on VIRTIO_PMEM_SUPPORTED
>      select MEM_DEVICE
>  
> +config ARCH_VIRTIO_MEM_SUPPORTED
> +    bool
> +    default n
> +
>  config VIRTIO_MEM_SUPPORTED
>      bool
>  
> diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
> index 7c8ca9f28b..16f9de6ab6 100644
> --- a/hw/virtio/virtio-mem.c
> +++ b/hw/virtio/virtio-mem.c
> @@ -53,6 +53,8 @@
>   */
>  #if defined(TARGET_X86_64) || defined(TARGET_I386)
>  #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
> +#elif defined(TARGET_AARCH64)
> +#define VIRTIO_MEM_USABLE_EXTENT (2 * (1024 * MiB))
>  #else
>  #error VIRTIO_MEM_USABLE_EXTENT not defined
>  #endif
> 
Otherwise looks good to me.

Thanks!

Eric
David Hildenbrand Nov. 9, 2020, 7:47 p.m. UTC | #3
On 05.11.20 18:43, Jonathan Cameron wrote:
> Basically a cut and paste job from the x86 support with the exception of
> needing a larger block size as the Memory Block Size (MIN_SECTION_SIZE)
> on ARM64 in Linux is 1G.
> 
> Tested:
> * In full emulation and with KVM on an arm64 server.
> * cold and hotplug for the virtio-mem-pci device.
> * Wide range of memory sizes, added at creation and later.
> * Fairly basic memory usage of memory added.  Seems to function as normal.
> * NUMA setup with virtio-mem-pci devices on each node.
> * Simple migration test.
> 
> Related kernel patch just enables the Kconfig item for ARM64 as an
> alternative to x86 in drivers/virtio/Kconfig
> 
> The original patches from David Hildenbrand stated that he thought it should
> work for ARM64 but it wasn't enabled in the kernel [1]
> It appears he was correct and everything 'just works'.
> 
> The build system related stuff is intended to ensure virtio-mem support is
> not built for arm32 (build will fail due no defined block size).
> If there is a more elegant way to do this, please point me in the right
> direction.

You might be aware of https://virtio-mem.gitlab.io/developer-guide.html 
and the "issue" with 64k base pages - 512MB granularity. Similar as the 
question from Auger, have you tried running arm64 with differing page 
sizes in host/guest?

With recent kernels, you can use "memhp_default_state=online_movable" on 
the kernel cmdline to make memory unplug more likely to succeed - 
especially with 64k base pages. You just have to be sure to not hotplug 
"too much memory" to a VM.


I had my prototype living at

git@github.com:davidhildenbrand/qemu.git / virtio-mem-arm64

which looks very similar to your patch. That is good :)

[...]

>   static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
>                                           DeviceState *dev, Error **errp)
>   {
> @@ -2336,6 +2389,9 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
>       if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
>           virt_memory_plug(hotplug_dev, dev, errp);
>       }
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
> +        virt_virtio_md_pci_plug(hotplug_dev, dev, errp);
> +    }

These better all be "else if".

>       if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
>           PCIDevice *pdev = PCI_DEVICE(dev);
>   
> @@ -2363,6 +2419,11 @@ static void virt_dimm_unplug_request(HotplugHandler *hotplug_dev,
>           goto out;
>       }
>   
> +    if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
> +        error_setg(&local_err,
> +                   "virtio-mem based memory devices cannot be unplugged.");
> +        goto out;
> +    }

This should go into virt_machine_device_unplug_request_cb() instead, no?
[...]
Jonathan Cameron Nov. 24, 2020, 6:11 p.m. UTC | #4
On Mon, 9 Nov 2020 20:47:09 +0100
David Hildenbrand <david@redhat.com> wrote:

+CC Eric based on similar query in other branch of the thread.

> On 05.11.20 18:43, Jonathan Cameron wrote:
> > Basically a cut and paste job from the x86 support with the exception of
> > needing a larger block size as the Memory Block Size (MIN_SECTION_SIZE)
> > on ARM64 in Linux is 1G.
> > 
> > Tested:
> > * In full emulation and with KVM on an arm64 server.
> > * cold and hotplug for the virtio-mem-pci device.
> > * Wide range of memory sizes, added at creation and later.
> > * Fairly basic memory usage of memory added.  Seems to function as normal.
> > * NUMA setup with virtio-mem-pci devices on each node.
> > * Simple migration test.
> > 
> > Related kernel patch just enables the Kconfig item for ARM64 as an
> > alternative to x86 in drivers/virtio/Kconfig
> > 
> > The original patches from David Hildenbrand stated that he thought it should
> > work for ARM64 but it wasn't enabled in the kernel [1]
> > It appears he was correct and everything 'just works'.
> > 
> > The build system related stuff is intended to ensure virtio-mem support is
> > not built for arm32 (build will fail due no defined block size).
> > If there is a more elegant way to do this, please point me in the right
> > direction.  
> 
> You might be aware of https://virtio-mem.gitlab.io/developer-guide.html 
> and the "issue" with 64k base pages - 512MB granularity. Similar as the 
> question from Auger, have you tried running arm64 with differing page 
> sizes in host/guest?
> 

Hi David,

> With recent kernels, you can use "memhp_default_state=online_movable" on 
> the kernel cmdline to make memory unplug more likely to succeed - 
> especially with 64k base pages. You just have to be sure to not hotplug 
> "too much memory" to a VM.

Thanks for the pointer - that definitely simplifies testing.  Was getting a bit
tedious with out that.

As ever other stuff got in the way, so I only just got back to looking at this.

I've not done a particularly comprehensive set of tests yet, but things seem
to 'work' with mixed page sizes.

With 64K pages in general, you run into a problem with the device block_size being
smaller than the subblock_size.  I've just added a check for that into the
virtio-mem kernel driver and have it fail to probe if that happens.  I don't
think such a setup makes any sense anyway so no loss there.  Should it make sense
to drop that restriction in the future we can deal with that then without breaking
backwards compatibility.

So the question is whether it makes sense to bother with virtio-mem support
at all on ARM64 with 64k pages given currently the minimum workable block_size
is 512MiB?  I guess there is an argument of virtio-mem being a possibly more
convenient interface than full memory HP.  Curious to hear what people think on
this?

4K guest on 64K host seems fine and no such limit is needed - though there
may be performance issues doing that.

64k guest on 4k host with 512MiB block size seems fine.

If there are any places anyone thinks need particular poking I'd appreciate a hint :)

Jonathan



> 
> 
> I had my prototype living at
> 
> git@github.com:davidhildenbrand/qemu.git / virtio-mem-arm64
> 
> which looks very similar to your patch. That is good :)
> 
> [...]
> 
> >   static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
> >                                           DeviceState *dev, Error **errp)
> >   {
> > @@ -2336,6 +2389,9 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
> >       if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> >           virt_memory_plug(hotplug_dev, dev, errp);
> >       }
> > +    if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
> > +        virt_virtio_md_pci_plug(hotplug_dev, dev, errp);
> > +    }  
> 
> These better all be "else if".
> 
> >       if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
> >           PCIDevice *pdev = PCI_DEVICE(dev);
> >   
> > @@ -2363,6 +2419,11 @@ static void virt_dimm_unplug_request(HotplugHandler *hotplug_dev,
> >           goto out;
> >       }
> >   
> > +    if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
> > +        error_setg(&local_err,
> > +                   "virtio-mem based memory devices cannot be unplugged.");
> > +        goto out;
> > +    }  
> 
> This should go into virt_machine_device_unplug_request_cb() instead, no?
> [...]
> 
>
David Hildenbrand Nov. 24, 2020, 7:17 p.m. UTC | #5
On 24.11.20 19:11, Jonathan Cameron wrote:
> On Mon, 9 Nov 2020 20:47:09 +0100
> David Hildenbrand <david@redhat.com> wrote:
> 
> +CC Eric based on similar query in other branch of the thread.
> 
>> On 05.11.20 18:43, Jonathan Cameron wrote:
>>> Basically a cut and paste job from the x86 support with the exception of
>>> needing a larger block size as the Memory Block Size (MIN_SECTION_SIZE)
>>> on ARM64 in Linux is 1G.
>>>
>>> Tested:
>>> * In full emulation and with KVM on an arm64 server.
>>> * cold and hotplug for the virtio-mem-pci device.
>>> * Wide range of memory sizes, added at creation and later.
>>> * Fairly basic memory usage of memory added.  Seems to function as normal.
>>> * NUMA setup with virtio-mem-pci devices on each node.
>>> * Simple migration test.
>>>
>>> Related kernel patch just enables the Kconfig item for ARM64 as an
>>> alternative to x86 in drivers/virtio/Kconfig
>>>
>>> The original patches from David Hildenbrand stated that he thought it should
>>> work for ARM64 but it wasn't enabled in the kernel [1]
>>> It appears he was correct and everything 'just works'.
>>>
>>> The build system related stuff is intended to ensure virtio-mem support is
>>> not built for arm32 (build will fail due no defined block size).
>>> If there is a more elegant way to do this, please point me in the right
>>> direction.  
>>
>> You might be aware of https://virtio-mem.gitlab.io/developer-guide.html 
>> and the "issue" with 64k base pages - 512MB granularity. Similar as the 
>> question from Auger, have you tried running arm64 with differing page 
>> sizes in host/guest?
>>
> 
> Hi David,
> 
>> With recent kernels, you can use "memhp_default_state=online_movable" on 
>> the kernel cmdline to make memory unplug more likely to succeed - 
>> especially with 64k base pages. You just have to be sure to not hotplug 
>> "too much memory" to a VM.
> 
> Thanks for the pointer - that definitely simplifies testing.  Was getting a bit
> tedious with out that.
> 
> As ever other stuff got in the way, so I only just got back to looking at this.
> 
> I've not done a particularly comprehensive set of tests yet, but things seem
> to 'work' with mixed page sizes.
> 
> With 64K pages in general, you run into a problem with the device block_size being
> smaller than the subblock_size.  I've just added a check for that into the

"device block size smaller than subblock size" - that's very common,
e.g.,  on x86-64.

E.g., device_block_size is 2MiB, subblock size 4MiB - until we improve
that in the future in Linux guests.

Or did you mean something else?

> virtio-mem kernel driver and have it fail to probe if that happens.  I don't
> think such a setup makes any sense anyway so no loss there.  Should it make sense
> to drop that restriction in the future we can deal with that then without breaking
> backwards compatibility.
> 
> So the question is whether it makes sense to bother with virtio-mem support
> at all on ARM64 with 64k pages given currently the minimum workable block_size
> is 512MiB?  I guess there is an argument of virtio-mem being a possibly more
> convenient interface than full memory HP.  Curious to hear what people think on
> this?

IMHO we really want it. For example, RHEL is always 64k. This is a
current guest limitation, to be improved in the future - either by
moving away from 512MB huge pages with 64k or by improving
alloc_contig_range().

> 
> 4K guest on 64K host seems fine and no such limit is needed - though there
> may be performance issues doing that.

Yeah, if one is lucky to get one of these 512 MiB huge pages at all :)

> 
> 64k guest on 4k host with 512MiB block size seems fine.
> 
> If there are any places anyone thinks need particular poking I'd appreciate a hint :)

If things seem to work for now, that's great :) Thanks!
Andrew Jones Nov. 25, 2020, 8:38 a.m. UTC | #6
On Tue, Nov 24, 2020 at 08:17:35PM +0100, David Hildenbrand wrote:
> On 24.11.20 19:11, Jonathan Cameron wrote:
> > On Mon, 9 Nov 2020 20:47:09 +0100
> > David Hildenbrand <david@redhat.com> wrote:
> > 
> > +CC Eric based on similar query in other branch of the thread.
> > 
> >> On 05.11.20 18:43, Jonathan Cameron wrote:
> >>> Basically a cut and paste job from the x86 support with the exception of
> >>> needing a larger block size as the Memory Block Size (MIN_SECTION_SIZE)
> >>> on ARM64 in Linux is 1G.
> >>>
> >>> Tested:
> >>> * In full emulation and with KVM on an arm64 server.
> >>> * cold and hotplug for the virtio-mem-pci device.
> >>> * Wide range of memory sizes, added at creation and later.
> >>> * Fairly basic memory usage of memory added.  Seems to function as normal.
> >>> * NUMA setup with virtio-mem-pci devices on each node.
> >>> * Simple migration test.
> >>>
> >>> Related kernel patch just enables the Kconfig item for ARM64 as an
> >>> alternative to x86 in drivers/virtio/Kconfig
> >>>
> >>> The original patches from David Hildenbrand stated that he thought it should
> >>> work for ARM64 but it wasn't enabled in the kernel [1]
> >>> It appears he was correct and everything 'just works'.
> >>>
> >>> The build system related stuff is intended to ensure virtio-mem support is
> >>> not built for arm32 (build will fail due no defined block size).
> >>> If there is a more elegant way to do this, please point me in the right
> >>> direction.  
> >>
> >> You might be aware of https://virtio-mem.gitlab.io/developer-guide.html 
> >> and the "issue" with 64k base pages - 512MB granularity. Similar as the 
> >> question from Auger, have you tried running arm64 with differing page 
> >> sizes in host/guest?
> >>
> > 
> > Hi David,
> > 
> >> With recent kernels, you can use "memhp_default_state=online_movable" on 
> >> the kernel cmdline to make memory unplug more likely to succeed - 
> >> especially with 64k base pages. You just have to be sure to not hotplug 
> >> "too much memory" to a VM.
> > 
> > Thanks for the pointer - that definitely simplifies testing.  Was getting a bit
> > tedious with out that.
> > 
> > As ever other stuff got in the way, so I only just got back to looking at this.
> > 
> > I've not done a particularly comprehensive set of tests yet, but things seem
> > to 'work' with mixed page sizes.
> > 
> > With 64K pages in general, you run into a problem with the device block_size being
> > smaller than the subblock_size.  I've just added a check for that into the
> 
> "device block size smaller than subblock size" - that's very common,
> e.g.,  on x86-64.
> 
> E.g., device_block_size is 2MiB, subblock size 4MiB - until we improve
> that in the future in Linux guests.
> 
> Or did you mean something else?
> 
> > virtio-mem kernel driver and have it fail to probe if that happens.  I don't
> > think such a setup makes any sense anyway so no loss there.  Should it make sense
> > to drop that restriction in the future we can deal with that then without breaking
> > backwards compatibility.
> > 
> > So the question is whether it makes sense to bother with virtio-mem support
> > at all on ARM64 with 64k pages given currently the minimum workable block_size
> > is 512MiB?  I guess there is an argument of virtio-mem being a possibly more
> > convenient interface than full memory HP.  Curious to hear what people think on
> > this?
> 
> IMHO we really want it. For example, RHEL is always 64k. This is a
> current guest limitation, to be improved in the future - either by
> moving away from 512MB huge pages with 64k or by improving
> alloc_contig_range().

Even with 64k pages you may be able to have 2MB huge pages by setting
default_hugepagesz=2M on the kernel command line.

Thanks,
drew

> 
> > 
> > 4K guest on 64K host seems fine and no such limit is needed - though there
> > may be performance issues doing that.
> 
> Yeah, if one is lucky to get one of these 512 MiB huge pages at all :)
> 
> > 
> > 64k guest on 4k host with 512MiB block size seems fine.
> > 
> > If there are any places anyone thinks need particular poking I'd appreciate a hint :)
> 
> If things seem to work for now, that's great :) Thanks!
> 
> -- 
> Thanks,
> 
> David / dhildenb
> 
>
David Hildenbrand Nov. 25, 2020, 8:45 a.m. UTC | #7
On 25.11.20 09:38, Andrew Jones wrote:
> On Tue, Nov 24, 2020 at 08:17:35PM +0100, David Hildenbrand wrote:
>> On 24.11.20 19:11, Jonathan Cameron wrote:
>>> On Mon, 9 Nov 2020 20:47:09 +0100
>>> David Hildenbrand <david@redhat.com> wrote:
>>>
>>> +CC Eric based on similar query in other branch of the thread.
>>>
>>>> On 05.11.20 18:43, Jonathan Cameron wrote:
>>>>> Basically a cut and paste job from the x86 support with the exception of
>>>>> needing a larger block size as the Memory Block Size (MIN_SECTION_SIZE)
>>>>> on ARM64 in Linux is 1G.
>>>>>
>>>>> Tested:
>>>>> * In full emulation and with KVM on an arm64 server.
>>>>> * cold and hotplug for the virtio-mem-pci device.
>>>>> * Wide range of memory sizes, added at creation and later.
>>>>> * Fairly basic memory usage of memory added.  Seems to function as normal.
>>>>> * NUMA setup with virtio-mem-pci devices on each node.
>>>>> * Simple migration test.
>>>>>
>>>>> Related kernel patch just enables the Kconfig item for ARM64 as an
>>>>> alternative to x86 in drivers/virtio/Kconfig
>>>>>
>>>>> The original patches from David Hildenbrand stated that he thought it should
>>>>> work for ARM64 but it wasn't enabled in the kernel [1]
>>>>> It appears he was correct and everything 'just works'.
>>>>>
>>>>> The build system related stuff is intended to ensure virtio-mem support is
>>>>> not built for arm32 (build will fail due no defined block size).
>>>>> If there is a more elegant way to do this, please point me in the right
>>>>> direction.  
>>>>
>>>> You might be aware of https://virtio-mem.gitlab.io/developer-guide.html 
>>>> and the "issue" with 64k base pages - 512MB granularity. Similar as the 
>>>> question from Auger, have you tried running arm64 with differing page 
>>>> sizes in host/guest?
>>>>
>>>
>>> Hi David,
>>>
>>>> With recent kernels, you can use "memhp_default_state=online_movable" on 
>>>> the kernel cmdline to make memory unplug more likely to succeed - 
>>>> especially with 64k base pages. You just have to be sure to not hotplug 
>>>> "too much memory" to a VM.
>>>
>>> Thanks for the pointer - that definitely simplifies testing.  Was getting a bit
>>> tedious with out that.
>>>
>>> As ever other stuff got in the way, so I only just got back to looking at this.
>>>
>>> I've not done a particularly comprehensive set of tests yet, but things seem
>>> to 'work' with mixed page sizes.
>>>
>>> With 64K pages in general, you run into a problem with the device block_size being
>>> smaller than the subblock_size.  I've just added a check for that into the
>>
>> "device block size smaller than subblock size" - that's very common,
>> e.g.,  on x86-64.
>>
>> E.g., device_block_size is 2MiB, subblock size 4MiB - until we improve
>> that in the future in Linux guests.
>>
>> Or did you mean something else?
>>
>>> virtio-mem kernel driver and have it fail to probe if that happens.  I don't
>>> think such a setup makes any sense anyway so no loss there.  Should it make sense
>>> to drop that restriction in the future we can deal with that then without breaking
>>> backwards compatibility.
>>>
>>> So the question is whether it makes sense to bother with virtio-mem support
>>> at all on ARM64 with 64k pages given currently the minimum workable block_size
>>> is 512MiB?  I guess there is an argument of virtio-mem being a possibly more
>>> convenient interface than full memory HP.  Curious to hear what people think on
>>> this?
>>
>> IMHO we really want it. For example, RHEL is always 64k. This is a
>> current guest limitation, to be improved in the future - either by
>> moving away from 512MB huge pages with 64k or by improving
>> alloc_contig_range().
> 
> Even with 64k pages you may be able to have 2MB huge pages by setting
> default_hugepagesz=2M on the kernel command line.

Yes, but not for THP, right? Last time I checked that move was not
performed yet - resulting in MAX_ORDER/pageblock_order in Linux
corresponding to 512 MB.

> 
> Thanks,
> drew
Andrew Jones Nov. 25, 2020, 10:47 a.m. UTC | #8
On Wed, Nov 25, 2020 at 09:45:19AM +0100, David Hildenbrand wrote:
> On 25.11.20 09:38, Andrew Jones wrote:
> > On Tue, Nov 24, 2020 at 08:17:35PM +0100, David Hildenbrand wrote:
> >> On 24.11.20 19:11, Jonathan Cameron wrote:
> >>> On Mon, 9 Nov 2020 20:47:09 +0100
> >>> David Hildenbrand <david@redhat.com> wrote:
> >>>
> >>> +CC Eric based on similar query in other branch of the thread.
> >>>
> >>>> On 05.11.20 18:43, Jonathan Cameron wrote:
> >>>>> Basically a cut and paste job from the x86 support with the exception of
> >>>>> needing a larger block size as the Memory Block Size (MIN_SECTION_SIZE)
> >>>>> on ARM64 in Linux is 1G.
> >>>>>
> >>>>> Tested:
> >>>>> * In full emulation and with KVM on an arm64 server.
> >>>>> * cold and hotplug for the virtio-mem-pci device.
> >>>>> * Wide range of memory sizes, added at creation and later.
> >>>>> * Fairly basic memory usage of memory added.  Seems to function as normal.
> >>>>> * NUMA setup with virtio-mem-pci devices on each node.
> >>>>> * Simple migration test.
> >>>>>
> >>>>> Related kernel patch just enables the Kconfig item for ARM64 as an
> >>>>> alternative to x86 in drivers/virtio/Kconfig
> >>>>>
> >>>>> The original patches from David Hildenbrand stated that he thought it should
> >>>>> work for ARM64 but it wasn't enabled in the kernel [1]
> >>>>> It appears he was correct and everything 'just works'.
> >>>>>
> >>>>> The build system related stuff is intended to ensure virtio-mem support is
> >>>>> not built for arm32 (build will fail due no defined block size).
> >>>>> If there is a more elegant way to do this, please point me in the right
> >>>>> direction.  
> >>>>
> >>>> You might be aware of https://virtio-mem.gitlab.io/developer-guide.html 
> >>>> and the "issue" with 64k base pages - 512MB granularity. Similar as the 
> >>>> question from Auger, have you tried running arm64 with differing page 
> >>>> sizes in host/guest?
> >>>>
> >>>
> >>> Hi David,
> >>>
> >>>> With recent kernels, you can use "memhp_default_state=online_movable" on 
> >>>> the kernel cmdline to make memory unplug more likely to succeed - 
> >>>> especially with 64k base pages. You just have to be sure to not hotplug 
> >>>> "too much memory" to a VM.
> >>>
> >>> Thanks for the pointer - that definitely simplifies testing.  Was getting a bit
> >>> tedious with out that.
> >>>
> >>> As ever other stuff got in the way, so I only just got back to looking at this.
> >>>
> >>> I've not done a particularly comprehensive set of tests yet, but things seem
> >>> to 'work' with mixed page sizes.
> >>>
> >>> With 64K pages in general, you run into a problem with the device block_size being
> >>> smaller than the subblock_size.  I've just added a check for that into the
> >>
> >> "device block size smaller than subblock size" - that's very common,
> >> e.g.,  on x86-64.
> >>
> >> E.g., device_block_size is 2MiB, subblock size 4MiB - until we improve
> >> that in the future in Linux guests.
> >>
> >> Or did you mean something else?
> >>
> >>> virtio-mem kernel driver and have it fail to probe if that happens.  I don't
> >>> think such a setup makes any sense anyway so no loss there.  Should it make sense
> >>> to drop that restriction in the future we can deal with that then without breaking
> >>> backwards compatibility.
> >>>
> >>> So the question is whether it makes sense to bother with virtio-mem support
> >>> at all on ARM64 with 64k pages given currently the minimum workable block_size
> >>> is 512MiB?  I guess there is an argument of virtio-mem being a possibly more
> >>> convenient interface than full memory HP.  Curious to hear what people think on
> >>> this?
> >>
> >> IMHO we really want it. For example, RHEL is always 64k. This is a
> >> current guest limitation, to be improved in the future - either by
> >> moving away from 512MB huge pages with 64k or by improving
> >> alloc_contig_range().
> > 
> > Even with 64k pages you may be able to have 2MB huge pages by setting
> > default_hugepagesz=2M on the kernel command line.
> 
> Yes, but not for THP, right? Last time I checked that move was not
> performed yet - resulting in MAX_ORDER/pageblock_order in Linux
> corresponding to 512 MB.
>

Yes, I believe you're correct. At least on the machine I've booted with
default_hugepagesz=2M, I see

 $ cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
 536870912

(I'm not running a latest mainline kernel though.)

Thanks,
drew
David Hildenbrand Nov. 25, 2020, 10:57 a.m. UTC | #9
On 25.11.20 11:47, Andrew Jones wrote:
> On Wed, Nov 25, 2020 at 09:45:19AM +0100, David Hildenbrand wrote:
>> On 25.11.20 09:38, Andrew Jones wrote:
>>> On Tue, Nov 24, 2020 at 08:17:35PM +0100, David Hildenbrand wrote:
>>>> On 24.11.20 19:11, Jonathan Cameron wrote:
>>>>> On Mon, 9 Nov 2020 20:47:09 +0100
>>>>> David Hildenbrand <david@redhat.com> wrote:
>>>>>
>>>>> +CC Eric based on similar query in other branch of the thread.
>>>>>
>>>>>> On 05.11.20 18:43, Jonathan Cameron wrote:
>>>>>>> Basically a cut and paste job from the x86 support with the exception of
>>>>>>> needing a larger block size as the Memory Block Size (MIN_SECTION_SIZE)
>>>>>>> on ARM64 in Linux is 1G.
>>>>>>>
>>>>>>> Tested:
>>>>>>> * In full emulation and with KVM on an arm64 server.
>>>>>>> * cold and hotplug for the virtio-mem-pci device.
>>>>>>> * Wide range of memory sizes, added at creation and later.
>>>>>>> * Fairly basic memory usage of memory added.  Seems to function as normal.
>>>>>>> * NUMA setup with virtio-mem-pci devices on each node.
>>>>>>> * Simple migration test.
>>>>>>>
>>>>>>> Related kernel patch just enables the Kconfig item for ARM64 as an
>>>>>>> alternative to x86 in drivers/virtio/Kconfig
>>>>>>>
>>>>>>> The original patches from David Hildenbrand stated that he thought it should
>>>>>>> work for ARM64 but it wasn't enabled in the kernel [1]
>>>>>>> It appears he was correct and everything 'just works'.
>>>>>>>
>>>>>>> The build system related stuff is intended to ensure virtio-mem support is
>>>>>>> not built for arm32 (build will fail due no defined block size).
>>>>>>> If there is a more elegant way to do this, please point me in the right
>>>>>>> direction.  
>>>>>>
>>>>>> You might be aware of https://virtio-mem.gitlab.io/developer-guide.html 
>>>>>> and the "issue" with 64k base pages - 512MB granularity. Similar as the 
>>>>>> question from Auger, have you tried running arm64 with differing page 
>>>>>> sizes in host/guest?
>>>>>>
>>>>>
>>>>> Hi David,
>>>>>
>>>>>> With recent kernels, you can use "memhp_default_state=online_movable" on 
>>>>>> the kernel cmdline to make memory unplug more likely to succeed - 
>>>>>> especially with 64k base pages. You just have to be sure to not hotplug 
>>>>>> "too much memory" to a VM.
>>>>>
>>>>> Thanks for the pointer - that definitely simplifies testing.  Was getting a bit
>>>>> tedious with out that.
>>>>>
>>>>> As ever other stuff got in the way, so I only just got back to looking at this.
>>>>>
>>>>> I've not done a particularly comprehensive set of tests yet, but things seem
>>>>> to 'work' with mixed page sizes.
>>>>>
>>>>> With 64K pages in general, you run into a problem with the device block_size being
>>>>> smaller than the subblock_size.  I've just added a check for that into the
>>>>
>>>> "device block size smaller than subblock size" - that's very common,
>>>> e.g.,  on x86-64.
>>>>
>>>> E.g., device_block_size is 2MiB, subblock size 4MiB - until we improve
>>>> that in the future in Linux guests.
>>>>
>>>> Or did you mean something else?
>>>>
>>>>> virtio-mem kernel driver and have it fail to probe if that happens.  I don't
>>>>> think such a setup makes any sense anyway so no loss there.  Should it make sense
>>>>> to drop that restriction in the future we can deal with that then without breaking
>>>>> backwards compatibility.
>>>>>
>>>>> So the question is whether it makes sense to bother with virtio-mem support
>>>>> at all on ARM64 with 64k pages given currently the minimum workable block_size
>>>>> is 512MiB?  I guess there is an argument of virtio-mem being a possibly more
>>>>> convenient interface than full memory HP.  Curious to hear what people think on
>>>>> this?
>>>>
>>>> IMHO we really want it. For example, RHEL is always 64k. This is a
>>>> current guest limitation, to be improved in the future - either by
>>>> moving away from 512MB huge pages with 64k or by improving
>>>> alloc_contig_range().
>>>
>>> Even with 64k pages you may be able to have 2MB huge pages by setting
>>> default_hugepagesz=2M on the kernel command line.
>>
>> Yes, but not for THP, right? Last time I checked that move was not
>> performed yet - resulting in MAX_ORDER/pageblock_order in Linux
>> corresponding to 512 MB.
>>
> 
> Yes, I believe you're correct. At least on the machine I've booted with
> default_hugepagesz=2M, I see
> 
>  $ cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
>  536870912
> 
> (I'm not running a latest mainline kernel though.)

I remember some upstream discussions where people raised that switching
to 2 MB THP might be possible (implemented via cont bits in the
pagetables - similar to 2MB huge pages you mentioned). 512 MB really
sounds more like gigantic pages after all.
Jonathan Cameron Nov. 25, 2020, 2:56 p.m. UTC | #10
On Tue, 24 Nov 2020 20:17:35 +0100
David Hildenbrand <david@redhat.com> wrote:

> On 24.11.20 19:11, Jonathan Cameron wrote:
> > On Mon, 9 Nov 2020 20:47:09 +0100
> > David Hildenbrand <david@redhat.com> wrote:
> > 
> > +CC Eric based on similar query in other branch of the thread.
> >   
> >> On 05.11.20 18:43, Jonathan Cameron wrote:  
> >>> Basically a cut and paste job from the x86 support with the exception of
> >>> needing a larger block size as the Memory Block Size (MIN_SECTION_SIZE)
> >>> on ARM64 in Linux is 1G.
> >>>
> >>> Tested:
> >>> * In full emulation and with KVM on an arm64 server.
> >>> * cold and hotplug for the virtio-mem-pci device.
> >>> * Wide range of memory sizes, added at creation and later.
> >>> * Fairly basic memory usage of memory added.  Seems to function as normal.
> >>> * NUMA setup with virtio-mem-pci devices on each node.
> >>> * Simple migration test.
> >>>
> >>> Related kernel patch just enables the Kconfig item for ARM64 as an
> >>> alternative to x86 in drivers/virtio/Kconfig
> >>>
> >>> The original patches from David Hildenbrand stated that he thought it should
> >>> work for ARM64 but it wasn't enabled in the kernel [1]
> >>> It appears he was correct and everything 'just works'.
> >>>
> >>> The build system related stuff is intended to ensure virtio-mem support is
> >>> not built for arm32 (build will fail due no defined block size).
> >>> If there is a more elegant way to do this, please point me in the right
> >>> direction.    
> >>
> >> You might be aware of https://virtio-mem.gitlab.io/developer-guide.html 
> >> and the "issue" with 64k base pages - 512MB granularity. Similar as the 
> >> question from Auger, have you tried running arm64 with differing page 
> >> sizes in host/guest?
> >>  
> > 
> > Hi David,
> >   
> >> With recent kernels, you can use "memhp_default_state=online_movable" on 
> >> the kernel cmdline to make memory unplug more likely to succeed - 
> >> especially with 64k base pages. You just have to be sure to not hotplug 
> >> "too much memory" to a VM.  
> > 
> > Thanks for the pointer - that definitely simplifies testing.  Was getting a bit
> > tedious with out that.
> > 
> > As ever other stuff got in the way, so I only just got back to looking at this.
> > 
> > I've not done a particularly comprehensive set of tests yet, but things seem
> > to 'work' with mixed page sizes.
> > 
> > With 64K pages in general, you run into a problem with the device block_size being
> > smaller than the subblock_size.  I've just added a check for that into the  
> 
> "device block size smaller than subblock size" - that's very common,
> e.g.,  on x86-64.
> 
> E.g., device_block_size is 2MiB, subblock size 4MiB - until we improve
> that in the future in Linux guests.

Ah. I'd missed that quirk around MAX_ORDER.  It's also true of ARM64 with
4k pages.  As you can probably guess I'd forgotten to recompile my 4k test
kernel after adding that particular check. :(

Ah well. Given we are already in a situation where adding 2MiB doesn't actually
do anything useful, I guess it's not really a problem to merrily let the host
add less than the guest can make use of.  So we allow adding any multiple of
2MiB but reality is the guest isn't going to use anything other than 512MiB
chunks.

> 
> Or did you mean something else?
> 
> > virtio-mem kernel driver and have it fail to probe if that happens.  I don't
> > think such a setup makes any sense anyway so no loss there.  Should it make sense
> > to drop that restriction in the future we can deal with that then without breaking
> > backwards compatibility.
> > 
> > So the question is whether it makes sense to bother with virtio-mem support
> > at all on ARM64 with 64k pages given currently the minimum workable block_size
> > is 512MiB?  I guess there is an argument of virtio-mem being a possibly more
> > convenient interface than full memory HP.  Curious to hear what people think on
> > this?  
> 
> IMHO we really want it. For example, RHEL is always 64k. This is a
> current guest limitation, to be improved in the future - either by
> moving away from 512MB huge pages with 64k or by improving
> alloc_contig_range().

Sure.  Would be great if we do one day support 2MiB on 64k.

> 
> > 
> > 4K guest on 64K host seems fine and no such limit is needed - though there
> > may be performance issues doing that.  
> 
> Yeah, if one is lucky to get one of these 512 MiB huge pages at all :)

Not too hard on my 1TB test system that's running nothing much else, but agreed it
won't be trivial more generally.

> 
> > 
> > 64k guest on 4k host with 512MiB block size seems fine.
> > 
> > If there are any places anyone thinks need particular poking I'd appreciate a hint :)  
> 
> If things seem to work for now, that's great :) Thanks!
> 
Cool.  I'll run a few more comprehensive tests then send out the
trivial patch to enable the kernel option + v2 of the qemu support.

Jonathan
David Hildenbrand Nov. 25, 2020, 3:04 p.m. UTC | #11
> Ah. I'd missed that quirk around MAX_ORDER.  It's also true of ARM64 with
> 4k pages.  As you can probably guess I'd forgotten to recompile my 4k test
> kernel after adding that particular check. :(
> 
> Ah well. Given we are already in a situation where adding 2MiB doesn't actually
> do anything useful, I guess it's not really a problem to merrily let the host
> add less than the guest can make use of.  So we allow adding any multiple of
> 2MiB but reality is the guest isn't going to use anything other than 512MiB
> chunks.

Right, and the host can observe the change not happening when not
aligned to 512 MB. There are plans for a virtio-mem extension for the
guest to present a status - e.g., why the requested size cannot be
achieved (requested size not alignment, usable region too small,
ENOMEM/EBUSY when unplugging, ...).

[...]

>>>
>>> 4K guest on 64K host seems fine and no such limit is needed - though there
>>> may be performance issues doing that.  
>>
>> Yeah, if one is lucky to get one of these 512 MiB huge pages at all :)
> 
> Not too hard on my 1TB test system that's running nothing much else, but agreed it
> won't be trivial more generally.

Hehe, right ! (... and here I am, testing with 64GB machines ... :) )

It's more of an issue in the guest to get 512 MB without ZONE_MOVABLE to
unplug ... especially with smaller VMs.

> 
>>
>>>
>>> 64k guest on 4k host with 512MiB block size seems fine.
>>>
>>> If there are any places anyone thinks need particular poking I'd appreciate a hint :)  
>>
>> If things seem to work for now, that's great :) Thanks!
>>
> Cool.  I'll run a few more comprehensive tests then send out the
> trivial patch to enable the kernel option + v2 of the qemu support.

Perfect, thanks!
David Hildenbrand Nov. 25, 2020, 3:54 p.m. UTC | #12
>>>>
>>>> 64k guest on 4k host with 512MiB block size seems fine.
>>>>
>>>> If there are any places anyone thinks need particular poking I'd appreciate a hint :)  
>>>
>>> If things seem to work for now, that's great :) Thanks!
>>>
>> Cool.  I'll run a few more comprehensive tests then send out the
>> trivial patch to enable the kernel option + v2 of the qemu support.
> 
> Perfect, thanks!

Oh, btw, I have no idea what the state of vfio-pci + QEMU on arm64 is.
In case it's supposed to work, you could give

https://lkml.kernel.org/r/20201119153918.120976-1-david@redhat.com

to see what we're missing.

I added a short virtio-pci guide to

https://virtio-mem.gitlab.io/user-guide/user-guide-qemu.html
Jonathan Cameron Nov. 25, 2020, 4:11 p.m. UTC | #13
On Wed, 25 Nov 2020 16:54:53 +0100
David Hildenbrand <david@redhat.com> wrote:

> >>>>
> >>>> 64k guest on 4k host with 512MiB block size seems fine.
> >>>>
> >>>> If there are any places anyone thinks need particular poking I'd appreciate a hint :)    
> >>>
> >>> If things seem to work for now, that's great :) Thanks!
> >>>  
> >> Cool.  I'll run a few more comprehensive tests then send out the
> >> trivial patch to enable the kernel option + v2 of the qemu support.  
> > 
> > Perfect, thanks!  
> 
> Oh, btw, I have no idea what the state of vfio-pci + QEMU on arm64 is.
> In case it's supposed to work, you could give
> 
> https://lkml.kernel.org/r/20201119153918.120976-1-david@redhat.com
> 
> to see what we're missing.

vfio-pci works in general (and we use it a lot), so sure I'll give
this a test run.

> 
> I added a short virtio-pci guide to
> 
> https://virtio-mem.gitlab.io/user-guide/user-guide-qemu.html
> 

Thanks,

Jonathan
David Hildenbrand Nov. 25, 2020, 4:32 p.m. UTC | #14
On 25.11.20 17:11, Jonathan Cameron wrote:
> On Wed, 25 Nov 2020 16:54:53 +0100
> David Hildenbrand <david@redhat.com> wrote:
> 
>>>>>>
>>>>>> 64k guest on 4k host with 512MiB block size seems fine.
>>>>>>
>>>>>> If there are any places anyone thinks need particular poking I'd appreciate a hint :)    
>>>>>
>>>>> If things seem to work for now, that's great :) Thanks!
>>>>>  
>>>> Cool.  I'll run a few more comprehensive tests then send out the
>>>> trivial patch to enable the kernel option + v2 of the qemu support.  
>>>
>>> Perfect, thanks!  
>>
>> Oh, btw, I have no idea what the state of vfio-pci + QEMU on arm64 is.
>> In case it's supposed to work, you could give
>>
>> https://lkml.kernel.org/r/20201119153918.120976-1-david@redhat.com
>>
>> to see what we're missing.
> 
> vfio-pci works in general (and we use it a lot), so sure I'll give
> this a test run.

Cool.

In case you get it to run, please test with both "online_kernel" and
"online_movable" in the guest, and small boot memory (e.g., 2 GiB).

For example, on x86-64 I got my vfio-pci provided GPUs to consume
virtio-mem memory easily when starting with 2-4 GiB boot memory and
using "online_kernel". (I verified that when not creating the mappings,
IO errors can be observed and graphics are messed up).
Michael S. Tsirkin Dec. 2, 2020, 10:02 a.m. UTC | #15
On Wed, Nov 25, 2020 at 02:56:59PM +0000, Jonathan Cameron wrote:
> Cool.  I'll run a few more comprehensive tests then send out the
> trivial patch to enable the kernel option + v2 of the qemu support.

IIUC there will be another version of this patch, right?
Jonathan Cameron Dec. 2, 2020, 10:45 a.m. UTC | #16
On Wed, 2 Dec 2020 05:02:57 -0500
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Wed, Nov 25, 2020 at 02:56:59PM +0000, Jonathan Cameron wrote:
> > Cool.  I'll run a few more comprehensive tests then send out the
> > trivial patch to enable the kernel option + v2 of the qemu support.  
> 
> IIUC there will be another version of this patch, right?
> 

Yes.  Busy period so might be a little while yet to complete testing
before v2.

Jonathan
diff mbox series

Patch

diff --git a/default-configs/devices/aarch64-softmmu.mak b/default-configs/devices/aarch64-softmmu.mak
index 958b1e08e4..31d6128a29 100644
--- a/default-configs/devices/aarch64-softmmu.mak
+++ b/default-configs/devices/aarch64-softmmu.mak
@@ -6,3 +6,4 @@  include arm-softmmu.mak
 CONFIG_XLNX_ZYNQMP_ARM=y
 CONFIG_XLNX_VERSAL=y
 CONFIG_SBSA_REF=y
+CONFIG_ARCH_VIRTIO_MEM_SUPPORTED=y
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index fdf4464b94..eeae77eee9 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -20,6 +20,7 @@  config ARM_VIRT
     select PLATFORM_BUS
     select SMBIOS
     select VIRTIO_MMIO
+    select VIRTIO_MEM_SUPPORTED if ARCH_VIRTIO_MEM_SUPPORTED
     select ACPI_PCI
     select MEM_DEVICE
     select DIMM
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 8abb385d4e..6c96d71106 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -73,9 +73,11 @@ 
 #include "hw/acpi/acpi.h"
 #include "target/arm/internals.h"
 #include "hw/mem/pc-dimm.h"
+#include "hw/mem/memory-device.h"
 #include "hw/mem/nvdimm.h"
 #include "hw/acpi/generic_event_device.h"
 #include "hw/virtio/virtio-iommu.h"
+#include "hw/virtio/virtio-mem-pci.h"
 #include "hw/char/pl011.h"
 #include "qemu/guest-random.h"
 
@@ -2286,6 +2288,34 @@  static void virt_memory_plug(HotplugHandler *hotplug_dev,
                          dev, &error_abort);
 }
 
+static void virt_virtio_md_pci_pre_plug(HotplugHandler *hotplug_dev,
+                                      DeviceState *dev, Error **errp)
+{
+    HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
+    Error *local_err = NULL;
+
+    if (!hotplug_dev2 && dev->hotplugged) {
+        /*
+         * Without a bus hotplug handler, we cannot control the plug/unplug
+         * order. We should never reach this point when hotplugging,
+         * however, better add a safety net.
+         */
+        error_setg(errp, "hotplug of virtio-mem based memory devices not"
+                   " supported on this bus.");
+        return;
+    }
+    /*
+     * First, see if we can plug this memory device at all. If that
+     * succeeds, branch of to the actual hotplug handler.
+     */
+    memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL,
+                           &local_err);
+    if (!local_err && hotplug_dev2) {
+        hotplug_handler_pre_plug(hotplug_dev2, dev, &local_err);
+    }
+    error_propagate(errp, local_err);
+}
+
 static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
                                             DeviceState *dev, Error **errp)
 {
@@ -2293,6 +2323,8 @@  static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
 
     if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
         virt_memory_pre_plug(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
+        virt_virtio_md_pci_pre_plug(hotplug_dev, dev, errp);
     } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
         hwaddr db_start = 0, db_end = 0;
         char *resv_prop_str;
@@ -2322,6 +2354,27 @@  static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
     }
 }
 
+static void virt_virtio_md_pci_plug(HotplugHandler *hotplug_dev,
+                                  DeviceState *dev, Error **errp)
+{
+    HotplugHandler *hotplug_dev2 = qdev_get_bus_hotplug_handler(dev);
+    Error *local_err = NULL;
+
+    /*
+     * Plug the memory device first and then branch off to the actual
+     * hotplug handler. If that one fails, we can easily undo the memory
+     * device bits.
+     */
+    memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
+    if (hotplug_dev2) {
+        hotplug_handler_plug(hotplug_dev2, dev, &local_err);
+        if (local_err) {
+            memory_device_unplug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
+        }
+    }
+    error_propagate(errp, local_err);
+}
+
 static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
                                         DeviceState *dev, Error **errp)
 {
@@ -2336,6 +2389,9 @@  static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
     if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
         virt_memory_plug(hotplug_dev, dev, errp);
     }
+    if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
+        virt_virtio_md_pci_plug(hotplug_dev, dev, errp);
+    }
     if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
         PCIDevice *pdev = PCI_DEVICE(dev);
 
@@ -2363,6 +2419,11 @@  static void virt_dimm_unplug_request(HotplugHandler *hotplug_dev,
         goto out;
     }
 
+    if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
+        error_setg(&local_err,
+                   "virtio-mem based memory devices cannot be unplugged.");
+        goto out;
+    }
     hotplug_handler_unplug_request(HOTPLUG_HANDLER(vms->acpi_dev), dev,
                                    &local_err);
 out:
@@ -2413,7 +2474,8 @@  static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine,
                                                         DeviceState *dev)
 {
     if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE) ||
-       (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM))) {
+        object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI)) {
         return HOTPLUG_HANDLER(machine);
     }
     if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
index 0eda25c4e1..00dbf2939e 100644
--- a/hw/virtio/Kconfig
+++ b/hw/virtio/Kconfig
@@ -48,6 +48,10 @@  config VIRTIO_PMEM
     depends on VIRTIO_PMEM_SUPPORTED
     select MEM_DEVICE
 
+config ARCH_VIRTIO_MEM_SUPPORTED
+    bool
+    default n
+
 config VIRTIO_MEM_SUPPORTED
     bool
 
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 7c8ca9f28b..16f9de6ab6 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -53,6 +53,8 @@ 
  */
 #if defined(TARGET_X86_64) || defined(TARGET_I386)
 #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB))
+#elif defined(TARGET_AARCH64)
+#define VIRTIO_MEM_USABLE_EXTENT (2 * (1024 * MiB))
 #else
 #error VIRTIO_MEM_USABLE_EXTENT not defined
 #endif