diff mbox series

[v1,5/6] hw/arm/virt: Enable backup bitmap for dirty ring

Message ID 20230213003925.40158-6-gshan@redhat.com (mailing list archive)
State New, archived
Headers show
Series hw/arm/virt: Support dirty ring | expand

Commit Message

Gavin Shan Feb. 13, 2023, 12:39 a.m. UTC
When KVM device "kvm-arm-gicv3" or "arm-its-kvm" is used, we have to
enable the backup bitmap for the dirty ring. Otherwise, the migration
will fail because those two devices are using the backup bitmap to track
dirty guest memory, corresponding to various hardware tables.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
---
 hw/arm/virt.c        | 26 ++++++++++++++++++++++++++
 target/arm/kvm64.c   | 25 +++++++++++++++++++++++++
 target/arm/kvm_arm.h | 12 ++++++++++++
 3 files changed, 63 insertions(+)

Comments

Peter Maydell Feb. 21, 2023, 4:27 p.m. UTC | #1
On Mon, 13 Feb 2023 at 00:40, Gavin Shan <gshan@redhat.com> wrote:
>
> When KVM device "kvm-arm-gicv3" or "arm-its-kvm" is used, we have to
> enable the backup bitmap for the dirty ring. Otherwise, the migration
> will fail because those two devices are using the backup bitmap to track
> dirty guest memory, corresponding to various hardware tables.
>
> Signed-off-by: Gavin Shan <gshan@redhat.com>
> Reviewed-by: Juan Quintela <quintela@redhat.com>
> ---
>  hw/arm/virt.c        | 26 ++++++++++++++++++++++++++
>  target/arm/kvm64.c   | 25 +++++++++++++++++++++++++
>  target/arm/kvm_arm.h | 12 ++++++++++++
>  3 files changed, 63 insertions(+)
>
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 75f28947de..ea9bd98a65 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -2024,6 +2024,8 @@ static void machvirt_init(MachineState *machine)
>      VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine);
>      MachineClass *mc = MACHINE_GET_CLASS(machine);
>      const CPUArchIdList *possible_cpus;
> +    const char *gictype = NULL;
> +    const char *itsclass = NULL;
>      MemoryRegion *sysmem = get_system_memory();
>      MemoryRegion *secure_sysmem = NULL;
>      MemoryRegion *tag_sysmem = NULL;
> @@ -2071,6 +2073,30 @@ static void machvirt_init(MachineState *machine)
>       */
>      finalize_gic_version(vms);
>
> +    /*
> +     * When "kvm-arm-gicv3" or "arm-its-kvm" is used, the backup dirty
> +     * bitmap has to be enabled for KVM dirty ring, before any memory
> +     * slot is added. Otherwise, the migration will fail with the dirty
> +     * ring.
> +     */
> +    if (kvm_enabled()) {
> +        if (vms->gic_version != VIRT_GIC_VERSION_2) {
> +            gictype = gicv3_class_name();
> +        }
> +
> +        if (vms->gic_version != VIRT_GIC_VERSION_2 && vms->its) {
> +            itsclass = its_class_name();
> +        }
> +
> +        if (((gictype && !strcmp(gictype, "kvm-arm-gicv3")) ||
> +             (itsclass && !strcmp(itsclass, "arm-its-kvm"))) &&
> +            !kvm_arm_enable_dirty_ring_with_bitmap()) {
> +            error_report("Failed to enable the backup bitmap for "
> +                         "KVM dirty ring");
> +            exit(1);
> +        }
> +    }

Why does this need to be board-specific code? Is there
some way we can just do the right thing automatically?
Why does the GIC/ITS matter?

The kernel should already know whether we have asked it
to do something that needs this extra extension, so
I think we ought to be able in the generic "enable the
dirty ring" code say "if the kernel says we need this
extra thing, also enable this extra thing". Or if that's
too early, we can do the extra part in a generic hook a
bit later.

In the future there might be other things, presumably,
that need the backup bitmap, so it would be more future
proof not to need to also change QEMU to add extra
logic checks that duplicate the logic the kernel already has.

thanks
-- PMM
Gavin Shan Feb. 22, 2023, 4:35 a.m. UTC | #2
On 2/22/23 3:27 AM, Peter Maydell wrote:
> On Mon, 13 Feb 2023 at 00:40, Gavin Shan <gshan@redhat.com> wrote:
>>
>> When KVM device "kvm-arm-gicv3" or "arm-its-kvm" is used, we have to
>> enable the backup bitmap for the dirty ring. Otherwise, the migration
>> will fail because those two devices are using the backup bitmap to track
>> dirty guest memory, corresponding to various hardware tables.
>>
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> Reviewed-by: Juan Quintela <quintela@redhat.com>
>> ---
>>   hw/arm/virt.c        | 26 ++++++++++++++++++++++++++
>>   target/arm/kvm64.c   | 25 +++++++++++++++++++++++++
>>   target/arm/kvm_arm.h | 12 ++++++++++++
>>   3 files changed, 63 insertions(+)
>>
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 75f28947de..ea9bd98a65 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -2024,6 +2024,8 @@ static void machvirt_init(MachineState *machine)
>>       VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine);
>>       MachineClass *mc = MACHINE_GET_CLASS(machine);
>>       const CPUArchIdList *possible_cpus;
>> +    const char *gictype = NULL;
>> +    const char *itsclass = NULL;
>>       MemoryRegion *sysmem = get_system_memory();
>>       MemoryRegion *secure_sysmem = NULL;
>>       MemoryRegion *tag_sysmem = NULL;
>> @@ -2071,6 +2073,30 @@ static void machvirt_init(MachineState *machine)
>>        */
>>       finalize_gic_version(vms);
>>
>> +    /*
>> +     * When "kvm-arm-gicv3" or "arm-its-kvm" is used, the backup dirty
>> +     * bitmap has to be enabled for KVM dirty ring, before any memory
>> +     * slot is added. Otherwise, the migration will fail with the dirty
>> +     * ring.
>> +     */
>> +    if (kvm_enabled()) {
>> +        if (vms->gic_version != VIRT_GIC_VERSION_2) {
>> +            gictype = gicv3_class_name();
>> +        }
>> +
>> +        if (vms->gic_version != VIRT_GIC_VERSION_2 && vms->its) {
>> +            itsclass = its_class_name();
>> +        }
>> +
>> +        if (((gictype && !strcmp(gictype, "kvm-arm-gicv3")) ||
>> +             (itsclass && !strcmp(itsclass, "arm-its-kvm"))) &&
>> +            !kvm_arm_enable_dirty_ring_with_bitmap()) {
>> +            error_report("Failed to enable the backup bitmap for "
>> +                         "KVM dirty ring");
>> +            exit(1);
>> +        }
>> +    }
> 
> Why does this need to be board-specific code? Is there
> some way we can just do the right thing automatically?
> Why does the GIC/ITS matter?
> 
> The kernel should already know whether we have asked it
> to do something that needs this extra extension, so
> I think we ought to be able in the generic "enable the
> dirty ring" code say "if the kernel says we need this
> extra thing, also enable this extra thing". Or if that's
> too early, we can do the extra part in a generic hook a
> bit later.
> 
> In the future there might be other things, presumably,
> that need the backup bitmap, so it would be more future
> proof not to need to also change QEMU to add extra
> logic checks that duplicate the logic the kernel already has.
> 

When the dirty ring is enabled, a per-vcpu buffer is used to track the dirty pages.
The prerequisite to use the per-vcpu buffer is existing running VCPU context. There
are two cases where no running VCPU context exists and the backup bitmap extension
is needed, as we know until now: (a) save/restore GICv3 tables; (b) save/restore ITS
tables; These two cases are related to KVM device "kvm-arm-gicv3" and "arm-its-kvm",
which are only needed by virt machine at present. So we needn't the backup bitmap
extension for other boards.

The host kernel always exports the capability KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP
for ARM64. The capability isn't exported for x86 because we needn't it there. The
generic path to enable the extension would be in kvm_init(). I think the extension
is enabled unconditionally if it has been exported by host kernel. It means there
will be unnecessary overhead to synchronize the backup bitmap when the aforementioned
KVM devices aren't used, but the overhead should be very small and acceptable. The
only concern is host kernel has to allocate the backup bitmap, which is totally no
use. Please let me know your thoughts, Peter.


qemu_init
   qemu_create_machine
     :
     :
   configure_accelerators
     do_configure_accelerator
       accel_init_machine
         kvm_init                            // Where the dirty ring is eanbled. Would be best
           kvm_arch_init                     // place to enable the backup bitmap extension regardless
     :                                       // of 'kvm-arm-gicv3' and 'arm-its-kvm' are used
     :
   qmp_x_exit_preconfig
     qemu_init_board
       machine_run_board_init
         machvirt_init                      // memory slots are added here and where we know the KVM devices
     :                                      // are used
     :
   accel_setup_post                         // no backend for KVM yet, xen only

Note that the capability KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP can't be enabled if
the dirty ring isn't enabled or memory slots have been added.

Thanks,
Gavin
Peter Maydell Feb. 22, 2023, 3:54 p.m. UTC | #3
On Wed, 22 Feb 2023 at 04:36, Gavin Shan <gshan@redhat.com> wrote:
>
> On 2/22/23 3:27 AM, Peter Maydell wrote:
> > Why does this need to be board-specific code? Is there
> > some way we can just do the right thing automatically?
> > Why does the GIC/ITS matter?
> >
> > The kernel should already know whether we have asked it
> > to do something that needs this extra extension, so
> > I think we ought to be able in the generic "enable the
> > dirty ring" code say "if the kernel says we need this
> > extra thing, also enable this extra thing". Or if that's
> > too early, we can do the extra part in a generic hook a
> > bit later.
> >
> > In the future there might be other things, presumably,
> > that need the backup bitmap, so it would be more future
> > proof not to need to also change QEMU to add extra
> > logic checks that duplicate the logic the kernel already has.
> >
>
> When the dirty ring is enabled, a per-vcpu buffer is used to track the dirty pages.
> The prerequisite to use the per-vcpu buffer is existing running VCPU context. There
> are two cases where no running VCPU context exists and the backup bitmap extension
> is needed, as we know until now: (a) save/restore GICv3 tables; (b) save/restore ITS
> tables; These two cases are related to KVM device "kvm-arm-gicv3" and "arm-its-kvm",
> which are only needed by virt machine at present. So we needn't the backup bitmap
> extension for other boards.

But we might have to for other boards we add later. We shouldn't
put code in per-board if it's not really board specific.

Moreover, I think "we need the backup bitmap if the kernel is
using its GICv3 or ITS implementation" is a kernel implementation
detail. It seems to me that it would be cleaner if QEMU didn't
have to hardcode "we happen to know that these are the situations
when we need to do that". A better API would be "ask the kernel
'do we need this?' and enable it if it says 'yes'". The kernel
knows what its implementations of ITS and GICv3 (and perhaps
future in-kernel memory-using devices) require, after all.

thanks
-- PMM
Gavin Shan Feb. 23, 2023, 12:52 a.m. UTC | #4
On 2/23/23 2:54 AM, Peter Maydell wrote:
> On Wed, 22 Feb 2023 at 04:36, Gavin Shan <gshan@redhat.com> wrote:
>>
>> On 2/22/23 3:27 AM, Peter Maydell wrote:
>>> Why does this need to be board-specific code? Is there
>>> some way we can just do the right thing automatically?
>>> Why does the GIC/ITS matter?
>>>
>>> The kernel should already know whether we have asked it
>>> to do something that needs this extra extension, so
>>> I think we ought to be able in the generic "enable the
>>> dirty ring" code say "if the kernel says we need this
>>> extra thing, also enable this extra thing". Or if that's
>>> too early, we can do the extra part in a generic hook a
>>> bit later.
>>>
>>> In the future there might be other things, presumably,
>>> that need the backup bitmap, so it would be more future
>>> proof not to need to also change QEMU to add extra
>>> logic checks that duplicate the logic the kernel already has.
>>>
>>
>> When the dirty ring is enabled, a per-vcpu buffer is used to track the dirty pages.
>> The prerequisite to use the per-vcpu buffer is existing running VCPU context. There
>> are two cases where no running VCPU context exists and the backup bitmap extension
>> is needed, as we know until now: (a) save/restore GICv3 tables; (b) save/restore ITS
>> tables; These two cases are related to KVM device "kvm-arm-gicv3" and "arm-its-kvm",
>> which are only needed by virt machine at present. So we needn't the backup bitmap
>> extension for other boards.
> 
> But we might have to for other boards we add later. We shouldn't
> put code in per-board if it's not really board specific.
> 
> Moreover, I think "we need the backup bitmap if the kernel is
> using its GICv3 or ITS implementation" is a kernel implementation
> detail. It seems to me that it would be cleaner if QEMU didn't
> have to hardcode "we happen to know that these are the situations
> when we need to do that". A better API would be "ask the kernel
> 'do we need this?' and enable it if it says 'yes'". The kernel
> knows what its implementations of ITS and GICv3 (and perhaps
> future in-kernel memory-using devices) require, after all.
> 

Well, As we know so far, the backup bitmap extension is only required by 'kvm-arm-gicv3'
and 'arm-its-kvm' device. Those two devices are only used by virt machine at present.
So it's a board specific requirement. I'm not sure about the future. We may need to
enable the extension for other devices and other boards. That time, the requirement
isn't board specific any more. However, we're uncertain for the future.

In order to cover the future requirement, the extension is needed by other boards,
the best way I can figure out is to enable the extension in generic path in kvm_init()
if the extension is supported by the host kernel. In this way, the unnecessary overhead
is introduced for those boards where 'kvm-arm-vgic3' and 'arm-its-kvm' aren't used.
The overhead should be very small and acceptable. Note that the host kernel don't know
if 'kvm-arm-vgic3' or 'arm-its-kvm' device is needed by the board in kvm_init(), which
is the generic path.

The 'kvm-arm-vgic3' and 'arm-its-kvm' devices are created in machvirt_init(), where
the memory slots are also added. Prior to the function, host kernel doesn't know if
the extension is needed by QEMU. It means we have to enable the extension in machvirt_init(),
which is exactly what we're doing. The difference is QEMU decides to enable the extension
instead of being told to enable it by host kernel. Host kernel doesn't have the answer to
"Hey host kernel, do we need to enable the extension" until machvirt_init() where the devices
are created. Besides, machvirt_init() isn't the generic path if we want to enable the extension
for all possible boards. Further more, the extension can't be enabled if memory slots have been
added.

In summary, the best way I can figure out is to enable the extension in kvm_init() if it
has been supported by host kernel, to cover all possible boards for future cases. Otherwise,
we keep what we're doing to enable the extension in machvirt_init(). Please let me know your
thoughts, Peter :)

Thanks,
Gavin
Peter Maydell Feb. 23, 2023, 11:51 a.m. UTC | #5
On Thu, 23 Feb 2023 at 00:52, Gavin Shan <gshan@redhat.com> wrote:
>
> On 2/23/23 2:54 AM, Peter Maydell wrote:
> > But we might have to for other boards we add later. We shouldn't
> > put code in per-board if it's not really board specific.
> >
> > Moreover, I think "we need the backup bitmap if the kernel is
> > using its GICv3 or ITS implementation" is a kernel implementation
> > detail. It seems to me that it would be cleaner if QEMU didn't
> > have to hardcode "we happen to know that these are the situations
> > when we need to do that". A better API would be "ask the kernel
> > 'do we need this?' and enable it if it says 'yes'". The kernel
> > knows what its implementations of ITS and GICv3 (and perhaps
> > future in-kernel memory-using devices) require, after all.
> >
>
> Well, As we know so far, the backup bitmap extension is only required by 'kvm-arm-gicv3'
> and 'arm-its-kvm' device. Those two devices are only used by virt machine at present.
> So it's a board specific requirement. I'm not sure about the future. We may need to
> enable the extension for other devices and other boards. That time, the requirement
> isn't board specific any more. However, we're uncertain for the future.

Most boards using KVM are likely to want a GICv3, and
probably an ITS too. A board with no interrupt controller
is useless, and the GICv2 is obsolete.

> In order to cover the future requirement, the extension is needed by other boards,
> the best way I can figure out is to enable the extension in generic path in kvm_init()
> if the extension is supported by the host kernel. In this way, the unnecessary overhead
> is introduced for those boards where 'kvm-arm-vgic3' and 'arm-its-kvm' aren't used.
> The overhead should be very small and acceptable. Note that the host kernel don't know
> if 'kvm-arm-vgic3' or 'arm-its-kvm' device is needed by the board in kvm_init(), which
> is the generic path.

We can have a generic hook that happens after board init is
done, if we want to do non-board-specific stuff that happens
later. However I suspect that anybody who cares about migration
performance is likely using a GICv3 at least anyway,
so "enable always" should be fine.

thanks
-- PMM
Gavin Shan Feb. 24, 2023, 10:19 a.m. UTC | #6
On 2/23/23 10:51 PM, Peter Maydell wrote:
> On Thu, 23 Feb 2023 at 00:52, Gavin Shan <gshan@redhat.com> wrote:
>>
>> On 2/23/23 2:54 AM, Peter Maydell wrote:
>>> But we might have to for other boards we add later. We shouldn't
>>> put code in per-board if it's not really board specific.
>>>
>>> Moreover, I think "we need the backup bitmap if the kernel is
>>> using its GICv3 or ITS implementation" is a kernel implementation
>>> detail. It seems to me that it would be cleaner if QEMU didn't
>>> have to hardcode "we happen to know that these are the situations
>>> when we need to do that". A better API would be "ask the kernel
>>> 'do we need this?' and enable it if it says 'yes'". The kernel
>>> knows what its implementations of ITS and GICv3 (and perhaps
>>> future in-kernel memory-using devices) require, after all.
>>>
>>
>> Well, As we know so far, the backup bitmap extension is only required by 'kvm-arm-gicv3'
>> and 'arm-its-kvm' device. Those two devices are only used by virt machine at present.
>> So it's a board specific requirement. I'm not sure about the future. We may need to
>> enable the extension for other devices and other boards. That time, the requirement
>> isn't board specific any more. However, we're uncertain for the future.
> 
> Most boards using KVM are likely to want a GICv3, and
> probably an ITS too. A board with no interrupt controller
> is useless, and the GICv2 is obsolete.
> 

Yes, exactly.

>> In order to cover the future requirement, the extension is needed by other boards,
>> the best way I can figure out is to enable the extension in generic path in kvm_init()
>> if the extension is supported by the host kernel. In this way, the unnecessary overhead
>> is introduced for those boards where 'kvm-arm-vgic3' and 'arm-its-kvm' aren't used.
>> The overhead should be very small and acceptable. Note that the host kernel don't know
>> if 'kvm-arm-vgic3' or 'arm-its-kvm' device is needed by the board in kvm_init(), which
>> is the generic path.
> 
> We can have a generic hook that happens after board init is
> done, if we want to do non-board-specific stuff that happens
> later. However I suspect that anybody who cares about migration
> performance is likely using a GICv3 at least anyway,
> so "enable always" should be fine.
> 

Ok, Thanks, Peter. I will enable it always in next revision. The overhead
is very small and acceptable.

Thanks,
Gavin
diff mbox series

Patch

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 75f28947de..ea9bd98a65 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2024,6 +2024,8 @@  static void machvirt_init(MachineState *machine)
     VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine);
     MachineClass *mc = MACHINE_GET_CLASS(machine);
     const CPUArchIdList *possible_cpus;
+    const char *gictype = NULL;
+    const char *itsclass = NULL;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *secure_sysmem = NULL;
     MemoryRegion *tag_sysmem = NULL;
@@ -2071,6 +2073,30 @@  static void machvirt_init(MachineState *machine)
      */
     finalize_gic_version(vms);
 
+    /*
+     * When "kvm-arm-gicv3" or "arm-its-kvm" is used, the backup dirty
+     * bitmap has to be enabled for KVM dirty ring, before any memory
+     * slot is added. Otherwise, the migration will fail with the dirty
+     * ring.
+     */
+    if (kvm_enabled()) {
+        if (vms->gic_version != VIRT_GIC_VERSION_2) {
+            gictype = gicv3_class_name();
+        }
+
+        if (vms->gic_version != VIRT_GIC_VERSION_2 && vms->its) {
+            itsclass = its_class_name();
+        }
+
+        if (((gictype && !strcmp(gictype, "kvm-arm-gicv3")) ||
+             (itsclass && !strcmp(itsclass, "arm-its-kvm"))) &&
+            !kvm_arm_enable_dirty_ring_with_bitmap()) {
+            error_report("Failed to enable the backup bitmap for "
+                         "KVM dirty ring");
+            exit(1);
+        }
+    }
+
     if (vms->secure) {
         /*
          * The Secure view of the world is the same as the NonSecure,
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 1197253d12..91960e1cd3 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -764,6 +764,31 @@  bool kvm_arm_steal_time_supported(void)
     return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME);
 }
 
+bool kvm_arm_enable_dirty_ring_with_bitmap(void)
+{
+    int ret;
+
+    /* No need to enable the backup bitmap if dirty ring isn't enabled */
+    if (!kvm_dirty_ring_enabled()) {
+        return true;
+    }
+
+    ret = kvm_check_extension(kvm_state, KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP);
+    if (!ret) {
+        return false;
+    }
+
+    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP,
+                            0, 1);
+    if (ret) {
+        return false;
+    }
+
+    kvm_state->kvm_dirty_ring_with_bitmap = true;
+
+    return true;
+}
+
 QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
 
 uint32_t kvm_arm_sve_get_vls(CPUState *cs)
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 99017b635c..402281e61a 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -282,6 +282,13 @@  void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp);
  */
 bool kvm_arm_steal_time_supported(void);
 
+/**
+ * kvm_arm_enable_dirty_ring_with_bitmap:
+ * Returns: true if KVM dirty ring's backup bitmap is enabled
+ * and false otherwise.
+ */
+bool kvm_arm_enable_dirty_ring_with_bitmap(void);
+
 /**
  * kvm_arm_aarch32_supported:
  *
@@ -395,6 +402,11 @@  static inline bool kvm_arm_steal_time_supported(void)
     return false;
 }
 
+static inline bool kvm_arm_enable_dirty_ring_with_bitmap(void)
+{
+    return false;
+}
+
 /*
  * These functions should never actually be called without KVM support.
  */