diff mbox

[v4,03/21] target/arm: Reorganize PMCCNTR accesses

Message ID 1523997485-1905-4-git-send-email-alindsay@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Aaron Lindsay April 17, 2018, 8:37 p.m. UTC
pmccntr_read and pmccntr_write contained duplicate code that was already
being handled by pmccntr_sync. Consolidate the duplicated code into two
functions: pmccntr_op_start and pmccntr_op_finish. Add a companion to
c15_ccnt in CPUARMState so that we can simultaneously save both the
architectural register value and the last underlying cycle count - this
ensure time isn't lost and will also allow us to access the 'old'
architectural register value in order to detect overflows in later
patches.

Signed-off-by: Aaron Lindsay <alindsay@codeaurora.org>
---
 target/arm/cpu.h    |  28 ++++++++++-----
 target/arm/helper.c | 100 ++++++++++++++++++++++++++++------------------------
 2 files changed, 73 insertions(+), 55 deletions(-)

Comments

Peter Maydell April 20, 2018, 10:17 a.m. UTC | #1
On 17 April 2018 at 21:37, Aaron Lindsay <alindsay@codeaurora.org> wrote:
> pmccntr_read and pmccntr_write contained duplicate code that was already
> being handled by pmccntr_sync. Consolidate the duplicated code into two
> functions: pmccntr_op_start and pmccntr_op_finish. Add a companion to
> c15_ccnt in CPUARMState so that we can simultaneously save both the
> architectural register value and the last underlying cycle count - this
> ensure time isn't lost and will also allow us to access the 'old'
> architectural register value in order to detect overflows in later
> patches.
>
> Signed-off-by: Aaron Lindsay <alindsay@codeaurora.org>
> ---
>  target/arm/cpu.h    |  28 ++++++++++-----
>  target/arm/helper.c | 100 ++++++++++++++++++++++++++++------------------------
>  2 files changed, 73 insertions(+), 55 deletions(-)
>
> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index 19a0c03..04041db 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -454,10 +454,20 @@ typedef struct CPUARMState {
>          uint64_t oslsr_el1; /* OS Lock Status */
>          uint64_t mdcr_el2;
>          uint64_t mdcr_el3;
> -        /* If the counter is enabled, this stores the last time the counter
> -         * was reset. Otherwise it stores the counter value
> +        /* Stores the architectural value of the counter *the last time it was
> +         * updated* by pmccntr_op_start. Accesses should always be surrounded
> +         * by pmccntr_op_start/pmccntr_op_finish to guarantee the latest
> +         * architecturally-corect value is being read/set.
>           */
>          uint64_t c15_ccnt;
> +        /* Stores the delta between the architectural value and the underlying
> +         * cycle count during normal operation. It is used to update c15_ccnt
> +         * to be the correct architectural value before accesses. During
> +         * accesses, c15_ccnt_delta contains the underlying count being used
> +         * for the access, after which it reverts to the delta value in
> +         * pmccntr_op_finish.
> +         */
> +        uint64_t c15_ccnt_delta;

So the key question here is: how does this work for VM migration?

thanks
-- PMM
Peter Maydell April 20, 2018, 10:41 a.m. UTC | #2
On 17 April 2018 at 21:37, Aaron Lindsay <alindsay@codeaurora.org> wrote:
> pmccntr_read and pmccntr_write contained duplicate code that was already
> being handled by pmccntr_sync. Consolidate the duplicated code into two
> functions: pmccntr_op_start and pmccntr_op_finish. Add a companion to
> c15_ccnt in CPUARMState so that we can simultaneously save both the
> architectural register value and the last underlying cycle count - this
> ensure time isn't lost and will also allow us to access the 'old'
> architectural register value in order to detect overflows in later
> patches.

Can we handle the overflow case by setting up a timer that will
expire when the counter would overflow? We'll want that anyway
to be able to signal an interrupt on overflows. That would avoid
the need to have an extra field in the CPU state structure, I think.
Since PMCCNTR_EL0 is 64 bits it's not going to overflow very often
so the slight overhead of a timer firing is not going to be a
problem...

thanks
-- PMM
Aaron Lindsay June 22, 2018, 1:50 p.m. UTC | #3
On Apr 20 11:17, Peter Maydell wrote:
> On 17 April 2018 at 21:37, Aaron Lindsay <alindsay@codeaurora.org> wrote:
> > pmccntr_read and pmccntr_write contained duplicate code that was already
> > being handled by pmccntr_sync. Consolidate the duplicated code into two
> > functions: pmccntr_op_start and pmccntr_op_finish. Add a companion to
> > c15_ccnt in CPUARMState so that we can simultaneously save both the
> > architectural register value and the last underlying cycle count - this
> > ensure time isn't lost and will also allow us to access the 'old'
> > architectural register value in order to detect overflows in later
> > patches.
> >
> > Signed-off-by: Aaron Lindsay <alindsay@codeaurora.org>
> > ---
> >  target/arm/cpu.h    |  28 ++++++++++-----
> >  target/arm/helper.c | 100 ++++++++++++++++++++++++++++------------------------
> >  2 files changed, 73 insertions(+), 55 deletions(-)
> >
> > diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> > index 19a0c03..04041db 100644
> > --- a/target/arm/cpu.h
> > +++ b/target/arm/cpu.h
> > @@ -454,10 +454,20 @@ typedef struct CPUARMState {
> >          uint64_t oslsr_el1; /* OS Lock Status */
> >          uint64_t mdcr_el2;
> >          uint64_t mdcr_el3;
> > -        /* If the counter is enabled, this stores the last time the counter
> > -         * was reset. Otherwise it stores the counter value
> > +        /* Stores the architectural value of the counter *the last time it was
> > +         * updated* by pmccntr_op_start. Accesses should always be surrounded
> > +         * by pmccntr_op_start/pmccntr_op_finish to guarantee the latest
> > +         * architecturally-corect value is being read/set.
> >           */
> >          uint64_t c15_ccnt;
> > +        /* Stores the delta between the architectural value and the underlying
> > +         * cycle count during normal operation. It is used to update c15_ccnt
> > +         * to be the correct architectural value before accesses. During
> > +         * accesses, c15_ccnt_delta contains the underlying count being used
> > +         * for the access, after which it reverts to the delta value in
> > +         * pmccntr_op_finish.
> > +         */
> > +        uint64_t c15_ccnt_delta;
> 
> So the key question here is: how does this work for VM migration?

To be honest, I'm not sure I fully understand the things I need to be
looking out for with VM migration.

My guess, though, is that this current implementation is not sufficient.
Perhaps there needs to be logic to ensure that c15_ccnt is the current
architectural value before migration and also to setup c15_ccnt_delta to
be the delta between that architectural value and the underlying cycle
count upon inbound migration. Does that sound like an approach which
would fit well within the rest of the migration framework?

-Aaron
Peter Maydell June 22, 2018, 2:08 p.m. UTC | #4
On 22 June 2018 at 14:50, Aaron Lindsay <alindsay@codeaurora.org> wrote:
> On Apr 20 11:17, Peter Maydell wrote:
>> On 17 April 2018 at 21:37, Aaron Lindsay <alindsay@codeaurora.org> wrote:
>> > pmccntr_read and pmccntr_write contained duplicate code that was already
>> > being handled by pmccntr_sync. Consolidate the duplicated code into two
>> > functions: pmccntr_op_start and pmccntr_op_finish. Add a companion to
>> > c15_ccnt in CPUARMState so that we can simultaneously save both the
>> > architectural register value and the last underlying cycle count - this
>> > ensure time isn't lost and will also allow us to access the 'old'
>> > architectural register value in order to detect overflows in later
>> > patches.
>> >
>> > Signed-off-by: Aaron Lindsay <alindsay@codeaurora.org>

>> > -        /* If the counter is enabled, this stores the last time the counter
>> > -         * was reset. Otherwise it stores the counter value
>> > +        /* Stores the architectural value of the counter *the last time it was
>> > +         * updated* by pmccntr_op_start. Accesses should always be surrounded
>> > +         * by pmccntr_op_start/pmccntr_op_finish to guarantee the latest
>> > +         * architecturally-corect value is being read/set.
>> >           */
>> >          uint64_t c15_ccnt;
>> > +        /* Stores the delta between the architectural value and the underlying
>> > +         * cycle count during normal operation. It is used to update c15_ccnt
>> > +         * to be the correct architectural value before accesses. During
>> > +         * accesses, c15_ccnt_delta contains the underlying count being used
>> > +         * for the access, after which it reverts to the delta value in
>> > +         * pmccntr_op_finish.
>> > +         */
>> > +        uint64_t c15_ccnt_delta;
>>
>> So the key question here is: how does this work for VM migration?
>
> To be honest, I'm not sure I fully understand the things I need to be
> looking out for with VM migration.
>
> My guess, though, is that this current implementation is not sufficient.
> Perhaps there needs to be logic to ensure that c15_ccnt is the current
> architectural value before migration and also to setup c15_ccnt_delta to
> be the delta between that architectural value and the underlying cycle
> count upon inbound migration. Does that sound like an approach which
> would fit well within the rest of the migration framework?

You need to deal with two different situations:
 (1) migration from an older QEMU which doesn't have this patchset
 (2) migration from a QEMU with this patchset to one with this patchset

Either:
 (a) all the architectural state can be expressed in our existing
state fields in whatever the previous format was -- in this case
you just need to ensure that cpu_pre_save() and cpu_post_load()
put the state there and unpack it again
 (b) we were missing some architectural state and really do need
to transfer more over the wire than we were before -- in this case
you need to add a new subsection to the vmstate which has the fields
that contain that new state, and give the subsection a suitable 'needed'
function to indicate when the subsection should be transferred plus
pre_load and post_load functions that allow us to cope correctly with
the case of the older QEMU that doesn't send the subsection.

thanks
-- PMM
Aaron Lindsay June 22, 2018, 8:36 p.m. UTC | #5
On Jun 22 15:08, Peter Maydell wrote:
> On 22 June 2018 at 14:50, Aaron Lindsay <alindsay@codeaurora.org> wrote:
> > On Apr 20 11:17, Peter Maydell wrote:
> >> On 17 April 2018 at 21:37, Aaron Lindsay <alindsay@codeaurora.org> wrote:
> >> > pmccntr_read and pmccntr_write contained duplicate code that was already
> >> > being handled by pmccntr_sync. Consolidate the duplicated code into two
> >> > functions: pmccntr_op_start and pmccntr_op_finish. Add a companion to
> >> > c15_ccnt in CPUARMState so that we can simultaneously save both the
> >> > architectural register value and the last underlying cycle count - this
> >> > ensure time isn't lost and will also allow us to access the 'old'
> >> > architectural register value in order to detect overflows in later
> >> > patches.
> >> >
> >> > Signed-off-by: Aaron Lindsay <alindsay@codeaurora.org>
> 
> >> > -        /* If the counter is enabled, this stores the last time the counter
> >> > -         * was reset. Otherwise it stores the counter value
> >> > +        /* Stores the architectural value of the counter *the last time it was
> >> > +         * updated* by pmccntr_op_start. Accesses should always be surrounded
> >> > +         * by pmccntr_op_start/pmccntr_op_finish to guarantee the latest
> >> > +         * architecturally-corect value is being read/set.
> >> >           */
> >> >          uint64_t c15_ccnt;
> >> > +        /* Stores the delta between the architectural value and the underlying
> >> > +         * cycle count during normal operation. It is used to update c15_ccnt
> >> > +         * to be the correct architectural value before accesses. During
> >> > +         * accesses, c15_ccnt_delta contains the underlying count being used
> >> > +         * for the access, after which it reverts to the delta value in
> >> > +         * pmccntr_op_finish.
> >> > +         */
> >> > +        uint64_t c15_ccnt_delta;
> >>
> >> So the key question here is: how does this work for VM migration?
> >
> > To be honest, I'm not sure I fully understand the things I need to be
> > looking out for with VM migration.
> >
> > My guess, though, is that this current implementation is not sufficient.
> > Perhaps there needs to be logic to ensure that c15_ccnt is the current
> > architectural value before migration and also to setup c15_ccnt_delta to
> > be the delta between that architectural value and the underlying cycle
> > count upon inbound migration. Does that sound like an approach which
> > would fit well within the rest of the migration framework?
> 
> You need to deal with two different situations:
>  (1) migration from an older QEMU which doesn't have this patchset
>  (2) migration from a QEMU with this patchset to one with this patchset
> 
> Either:
>  (a) all the architectural state can be expressed in our existing
> state fields in whatever the previous format was -- in this case
> you just need to ensure that cpu_pre_save() and cpu_post_load()
> put the state there and unpack it again
>  (b) we were missing some architectural state and really do need
> to transfer more over the wire than we were before -- in this case
> you need to add a new subsection to the vmstate which has the fields
> that contain that new state, and give the subsection a suitable 'needed'
> function to indicate when the subsection should be transferred plus
> pre_load and post_load functions that allow us to cope correctly with
> the case of the older QEMU that doesn't send the subsection.

Okay, thanks! I didn't manage to get to this before v5, but look into it
more for v6.

-Aaron
diff mbox

Patch

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 19a0c03..04041db 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -454,10 +454,20 @@  typedef struct CPUARMState {
         uint64_t oslsr_el1; /* OS Lock Status */
         uint64_t mdcr_el2;
         uint64_t mdcr_el3;
-        /* If the counter is enabled, this stores the last time the counter
-         * was reset. Otherwise it stores the counter value
+        /* Stores the architectural value of the counter *the last time it was
+         * updated* by pmccntr_op_start. Accesses should always be surrounded
+         * by pmccntr_op_start/pmccntr_op_finish to guarantee the latest
+         * architecturally-corect value is being read/set.
          */
         uint64_t c15_ccnt;
+        /* Stores the delta between the architectural value and the underlying
+         * cycle count during normal operation. It is used to update c15_ccnt
+         * to be the correct architectural value before accesses. During
+         * accesses, c15_ccnt_delta contains the underlying count being used
+         * for the access, after which it reverts to the delta value in
+         * pmccntr_op_finish.
+         */
+        uint64_t c15_ccnt_delta;
         uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
         uint64_t vpidr_el2; /* Virtualization Processor ID Register */
         uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
@@ -890,15 +900,17 @@  int cpu_arm_signal_handler(int host_signum, void *pinfo,
                            void *puc);
 
 /**
- * pmccntr_sync
+ * pmccntr_op_start/finish
  * @env: CPUARMState
  *
- * Synchronises the counter in the PMCCNTR. This must always be called twice,
- * once before any action that might affect the timer and again afterwards.
- * The function is used to swap the state of the register if required.
- * This only happens when not in user mode (!CONFIG_USER_ONLY)
+ * Convert the counter in the PMCCNTR between its delta form (the typical mode
+ * when it's enabled) and the guest-visible value. These two calls must always
+ * surround any action which might affect the counter, and the return value
+ * from pmccntr_op_start must be supplied as the second argument to
+ * pmccntr_op_finish.
  */
-void pmccntr_sync(CPUARMState *env);
+void pmccntr_op_start(CPUARMState *env);
+void pmccntr_op_finish(CPUARMState *env);
 
 /* SCTLR bit meanings. Several bits have been reused in newer
  * versions of the architecture; in that case we define constants
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 83ea8f4..f6269a2 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1000,28 +1000,53 @@  static inline bool arm_ccnt_enabled(CPUARMState *env)
 
     return true;
 }
-
-void pmccntr_sync(CPUARMState *env)
+/*
+ * Ensure c15_ccnt is the guest-visible count so that operations such as
+ * enabling/disabling the counter or filtering, modifying the count itself,
+ * etc. can be done logically. This is essentially a no-op if the counter is
+ * not enabled at the time of the call.
+ */
+void pmccntr_op_start(CPUARMState *env)
 {
-    uint64_t temp_ticks;
-
-    temp_ticks = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+    uint64_t cycles = 0;
+    cycles = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
                           ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
 
-    if (env->cp15.c9_pmcr & PMCRD) {
-        /* Increment once every 64 processor clock cycles */
-        temp_ticks /= 64;
-    }
-
     if (arm_ccnt_enabled(env)) {
-        env->cp15.c15_ccnt = temp_ticks - env->cp15.c15_ccnt;
+        uint64_t eff_cycles = cycles;
+        if (env->cp15.c9_pmcr & PMCRD) {
+            /* Increment once every 64 processor clock cycles */
+            eff_cycles /= 64;
+        }
+
+        env->cp15.c15_ccnt = eff_cycles - env->cp15.c15_ccnt_delta;
+    }
+    env->cp15.c15_ccnt_delta = cycles;
+}
+
+/*
+ * If PMCCNTR is enabled, recalculate the delta between the clock and the
+ * guest-visible count. A call to pmccntr_op_finish should follow every call to
+ * pmccntr_op_start.
+ */
+void pmccntr_op_finish(CPUARMState *env)
+{
+    if (arm_ccnt_enabled(env)) {
+        uint64_t prev_cycles = env->cp15.c15_ccnt_delta;
+
+        if (env->cp15.c9_pmcr & PMCRD) {
+            /* Increment once every 64 processor clock cycles */
+            prev_cycles /= 64;
+        }
+
+        env->cp15.c15_ccnt_delta = prev_cycles - env->cp15.c15_ccnt;
     }
 }
 
 static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t value)
 {
-    pmccntr_sync(env);
+    pmccntr_op_start(env);
 
     if (value & PMCRC) {
         /* The counter has been reset */
@@ -1032,26 +1057,16 @@  static void pmcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
     env->cp15.c9_pmcr &= ~0x39;
     env->cp15.c9_pmcr |= (value & 0x39);
 
-    pmccntr_sync(env);
+    pmccntr_op_finish(env);
 }
 
 static uint64_t pmccntr_read(CPUARMState *env, const ARMCPRegInfo *ri)
 {
-    uint64_t total_ticks;
-
-    if (!arm_ccnt_enabled(env)) {
-        /* Counter is disabled, do not change value */
-        return env->cp15.c15_ccnt;
-    }
-
-    total_ticks = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
-                           ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
-
-    if (env->cp15.c9_pmcr & PMCRD) {
-        /* Increment once every 64 processor clock cycles */
-        total_ticks /= 64;
-    }
-    return total_ticks - env->cp15.c15_ccnt;
+    uint64_t ret;
+    pmccntr_op_start(env);
+    ret = env->cp15.c15_ccnt;
+    pmccntr_op_finish(env);
+    return ret;
 }
 
 static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1068,22 +1083,9 @@  static void pmselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
 static void pmccntr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                         uint64_t value)
 {
-    uint64_t total_ticks;
-
-    if (!arm_ccnt_enabled(env)) {
-        /* Counter is disabled, set the absolute value */
-        env->cp15.c15_ccnt = value;
-        return;
-    }
-
-    total_ticks = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
-                           ARM_CPU_FREQ, NANOSECONDS_PER_SECOND);
-
-    if (env->cp15.c9_pmcr & PMCRD) {
-        /* Increment once every 64 processor clock cycles */
-        total_ticks /= 64;
-    }
-    env->cp15.c15_ccnt = total_ticks - value;
+    pmccntr_op_start(env);
+    env->cp15.c15_ccnt = value;
+    pmccntr_op_finish(env);
 }
 
 static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1096,7 +1098,11 @@  static void pmccntr_write32(CPUARMState *env, const ARMCPRegInfo *ri,
 
 #else /* CONFIG_USER_ONLY */
 
-void pmccntr_sync(CPUARMState *env)
+void pmccntr_op_start(CPUARMState *env)
+{
+}
+
+void pmccntr_op_finish(CPUARMState *env)
 {
 }
 
@@ -1105,9 +1111,9 @@  void pmccntr_sync(CPUARMState *env)
 static void pmccfiltr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
 {
-    pmccntr_sync(env);
+    pmccntr_op_start(env);
     env->cp15.pmccfiltr_el0 = value & 0x7E000000;
-    pmccntr_sync(env);
+    pmccntr_op_finish(env);
 }
 
 static void pmcntenset_write(CPUARMState *env, const ARMCPRegInfo *ri,