diff mbox series

[12/19] target/ppc/pmu_book3s_helper.c: enable PMC1 counter negative EBB

Message ID 20210809131057.1694145-13-danielhb413@gmail.com (mailing list archive)
State New, archived
Headers show
Series PMU-EBB support for PPC64 TCG | expand

Commit Message

Daniel Henrique Barboza Aug. 9, 2021, 1:10 p.m. UTC
This patch starts the counter negative EBB support by enabling PMC1
counter negative condition.

A counter negative condition happens when a performance monitor counter
reaches the value 0x80000000. When that happens, if a counter negative
condition is enabled in that counter, a performance monitor alert is
triggered. For PMC1, this condition is enabled by MMCR0_PMC1CE.

An icount-based logic is used to predict when we need to wake up the timer
to trigger the alert in both PM_INST_CMPL (0x2) and PM_CYC (0x1E) events.
The timer callback will then trigger a PPC_INTERRUPT_PMC which will become a
event-based exception later.

Some EBB powerpc kernel selftests are passing after this patch, but a
substancial amount of them relies on other PMCs to be enabled and events
that we don't support at this moment. We'll address that in the next
patches.

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 target/ppc/cpu.h               |   1 +
 target/ppc/pmu_book3s_helper.c | 127 +++++++++++++++++++++++----------
 2 files changed, 92 insertions(+), 36 deletions(-)

Comments

David Gibson Aug. 10, 2021, 4:01 a.m. UTC | #1
On Mon, Aug 09, 2021 at 10:10:50AM -0300, Daniel Henrique Barboza wrote:
> This patch starts the counter negative EBB support by enabling PMC1
> counter negative condition.
> 
> A counter negative condition happens when a performance monitor counter
> reaches the value 0x80000000. When that happens, if a counter negative
> condition is enabled in that counter, a performance monitor alert is
> triggered. For PMC1, this condition is enabled by MMCR0_PMC1CE.
> 
> An icount-based logic is used to predict when we need to wake up the timer
> to trigger the alert in both PM_INST_CMPL (0x2) and PM_CYC (0x1E) events.
> The timer callback will then trigger a PPC_INTERRUPT_PMC which will become a
> event-based exception later.
> 
> Some EBB powerpc kernel selftests are passing after this patch, but a
> substancial amount of them relies on other PMCs to be enabled and events
> that we don't support at this moment. We'll address that in the next
> patches.
> 
> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> ---
>  target/ppc/cpu.h               |   1 +
>  target/ppc/pmu_book3s_helper.c | 127 +++++++++++++++++++++++----------
>  2 files changed, 92 insertions(+), 36 deletions(-)
> 
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index 1d38b8cf7a..5c81d459f4 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -350,6 +350,7 @@ typedef struct ppc_v3_pate_t {
>  #define MMCR0_EBE   PPC_BIT(43)         /* Perf Monitor EBB Enable */
>  #define MMCR0_FCECE PPC_BIT(38)         /* FC on Enabled Cond or Event */
>  #define MMCR0_PMCC  PPC_BITMASK(44, 45) /* PMC Control */
> +#define MMCR0_PMC1CE PPC_BIT(48)
>  
>  #define MMCR1_PMC1SEL_SHIFT (63 - 39)
>  #define MMCR1_PMC1SEL PPC_BITMASK(32, 39)
> diff --git a/target/ppc/pmu_book3s_helper.c b/target/ppc/pmu_book3s_helper.c
> index 43cc0eb722..58ae65e22b 100644
> --- a/target/ppc/pmu_book3s_helper.c
> +++ b/target/ppc/pmu_book3s_helper.c
> @@ -25,6 +25,7 @@
>   * and SPAPR code.
>   */
>  #define PPC_CPU_FREQ 1000000000
> +#define COUNTER_NEGATIVE_VAL 0x80000000
>  
>  static uint64_t get_cycles(uint64_t icount_delta)
>  {
> @@ -32,22 +33,9 @@ static uint64_t get_cycles(uint64_t icount_delta)
>                      NANOSECONDS_PER_SECOND);
>  }
>  
> -static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
> -                                    uint64_t icount_delta)
> -{
> -    env->spr[sprn] += icount_delta;
> -}
> -
> -static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
> -                              uint64_t icount_delta)
> -{
> -    env->spr[sprn] += get_cycles(icount_delta);
> -}
> -
> -static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
> -                                        uint64_t icount_delta)
> +static uint8_t get_PMC_event(CPUPPCState *env, int sprn)
>  {
> -    int event;
> +    int event = 0x0;
>  
>      switch (sprn) {
>      case SPR_POWER_PMC1:
> @@ -65,11 +53,35 @@ static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
>      case SPR_POWER_PMC4:
>          event = MMCR1_PMC4SEL & env->spr[SPR_POWER_MMCR1];
>          break;
> +    case SPR_POWER_PMC5:
> +        event = 0x2;
> +        break;
> +    case SPR_POWER_PMC6:
> +        event = 0x1E;
> +        break;

This looks like a nice cleanup that would be better folded into an
earlier patch.

>      default:
> -        return;
> +        break;
>      }
>  
> -    switch (event) {
> +    return event;
> +}
> +
> +static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
> +                                    uint64_t icount_delta)
> +{
> +    env->spr[sprn] += icount_delta;
> +}
> +
> +static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
> +                              uint64_t icount_delta)
> +{
> +    env->spr[sprn] += get_cycles(icount_delta);
> +}
> +
> +static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
> +                                        uint64_t icount_delta)
> +{
> +    switch (get_PMC_event(env, sprn)) {
>      case 0x2:
>          update_PMC_PM_INST_CMPL(env, sprn, icount_delta);
>          break;
> @@ -99,30 +111,57 @@ static void update_PMCs(CPUPPCState *env, uint64_t icount_delta)
>      update_PMC_PM_CYC(env, SPR_POWER_PMC6, icount_delta);
>  }
>  
> +static void set_PMU_excp_timer(CPUPPCState *env)
> +{
> +    uint64_t timeout, now, remaining_val;
> +
> +    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMC1CE)) {
> +        return;
> +    }
> +
> +    remaining_val = COUNTER_NEGATIVE_VAL - env->spr[SPR_POWER_PMC1];
> +
> +    switch (get_PMC_event(env, SPR_POWER_PMC1)) {
> +    case 0x2:
> +        timeout = icount_to_ns(remaining_val);
> +        break;
> +    case 0x1e:
> +        timeout = muldiv64(remaining_val, NANOSECONDS_PER_SECOND,
> +                           PPC_CPU_FREQ);

So.. this appears to be simulating to the guest that cycles are
occurring at a constant rate, consistent with the advertised CPU
frequency.  Which sounds right, except... it's not clear to me that
you're using the same logic to generate the values you read from the
cycles PMC (in that case it shouldn't need to reference icount at all,
right?).

> +        break;
> +    default:
> +        return;
> +    }
> +
> +    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> +
> +    timer_mod(env->pmu_intr_timer, now + timeout);
> +}
> +
>  static void cpu_ppc_pmu_timer_cb(void *opaque)
>  {
>      PowerPCCPU *cpu = opaque;
>      CPUPPCState *env = &cpu->env;
> -    uint64_t mmcr0;
> -
> -    mmcr0 = env->spr[SPR_POWER_MMCR0];
> -    if (env->spr[SPR_POWER_MMCR0] & MMCR0_EBE) {
> -        /* freeeze counters if needed */
> -        if (mmcr0 & MMCR0_FCECE) {
> -            mmcr0 &= ~MMCR0_FCECE;
> -            mmcr0 |= MMCR0_FC;
> -        }
> +    uint64_t icount_delta = (uint64_t)icount_get_raw() - env->pmu_base_icount;
>  
> -        /* Clear PMAE and set PMAO */
> -        if (mmcr0 & MMCR0_PMAE) {
> -            mmcr0 &= ~MMCR0_PMAE;
> -            mmcr0 |= MMCR0_PMAO;
> -        }
> -        env->spr[SPR_POWER_MMCR0] = mmcr0;
> +    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_EBE)) {
> +        return;
> +    }
> +
> +    update_PMCs(env, icount_delta);
> +
> +    if (env->spr[SPR_POWER_MMCR0] & MMCR0_FCECE) {
> +        env->spr[SPR_POWER_MMCR0] &= ~MMCR0_FCECE;
> +        env->spr[SPR_POWER_MMCR0] |= MMCR0_FC;
> +    }
>  
> -        /* Fire the PMC hardware exception */
> -        ppc_set_irq(cpu, PPC_INTERRUPT_PMC, 1);
> +    if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE) {
> +        env->spr[SPR_POWER_MMCR0] &= ~MMCR0_PMAE;
> +        env->spr[SPR_POWER_MMCR0] |= MMCR0_PMAO;
>      }
> +
> +    /* Fire the PMC hardware exception */
> +    ppc_set_irq(cpu, PPC_INTERRUPT_PMC, 1);
>  }
>  
>  void cpu_ppc_pmu_timer_init(CPUPPCState *env)
> @@ -134,12 +173,19 @@ void cpu_ppc_pmu_timer_init(CPUPPCState *env)
>      env->pmu_intr_timer = timer;
>  }
>  
> +static bool mmcr0_counter_neg_cond_enabled(uint64_t mmcr0)
> +{
> +    return mmcr0 & MMCR0_PMC1CE;
> +}
> +
>  void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
>  {
>      uint64_t curr_icount = (uint64_t)icount_get_raw();
>      bool curr_FC = env->spr[SPR_POWER_MMCR0] & MMCR0_FC;
>      bool new_FC = value & MMCR0_FC;
>  
> +    env->spr[SPR_POWER_MMCR0] = value;
> +
>      /*
>       * In an frozen count (FC) bit change:
>       *
> @@ -163,10 +209,19 @@ void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
>               * until the freeze.
>               */
>              update_PMCs(env, icount_delta);
> +
> +            /* delete pending timer */
> +            timer_del(env->pmu_intr_timer);
>          } else {
>              env->pmu_base_icount = curr_icount;
> +
> +            /*
> +             * Start performance monitor alert timer for counter negative
> +             * events, if needed.
> +             */
> +            if (mmcr0_counter_neg_cond_enabled(env->spr[SPR_POWER_MMCR0])) {
> +                set_PMU_excp_timer(env);
> +            }
>          }
>      }
> -
> -    env->spr[SPR_POWER_MMCR0] = value;
>  }
Daniel Henrique Barboza Aug. 10, 2021, 8:26 p.m. UTC | #2
On 8/10/21 1:01 AM, David Gibson wrote:
> On Mon, Aug 09, 2021 at 10:10:50AM -0300, Daniel Henrique Barboza wrote:
>> This patch starts the counter negative EBB support by enabling PMC1
>> counter negative condition.
>>
>> A counter negative condition happens when a performance monitor counter
>> reaches the value 0x80000000. When that happens, if a counter negative
>> condition is enabled in that counter, a performance monitor alert is
>> triggered. For PMC1, this condition is enabled by MMCR0_PMC1CE.
>>
>> An icount-based logic is used to predict when we need to wake up the timer
>> to trigger the alert in both PM_INST_CMPL (0x2) and PM_CYC (0x1E) events.
>> The timer callback will then trigger a PPC_INTERRUPT_PMC which will become a
>> event-based exception later.
>>
>> Some EBB powerpc kernel selftests are passing after this patch, but a
>> substancial amount of them relies on other PMCs to be enabled and events
>> that we don't support at this moment. We'll address that in the next
>> patches.
>>
>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
>> ---
>>   target/ppc/cpu.h               |   1 +
>>   target/ppc/pmu_book3s_helper.c | 127 +++++++++++++++++++++++----------
>>   2 files changed, 92 insertions(+), 36 deletions(-)
>>
>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>> index 1d38b8cf7a..5c81d459f4 100644
>> --- a/target/ppc/cpu.h
>> +++ b/target/ppc/cpu.h
>> @@ -350,6 +350,7 @@ typedef struct ppc_v3_pate_t {
>>   #define MMCR0_EBE   PPC_BIT(43)         /* Perf Monitor EBB Enable */
>>   #define MMCR0_FCECE PPC_BIT(38)         /* FC on Enabled Cond or Event */
>>   #define MMCR0_PMCC  PPC_BITMASK(44, 45) /* PMC Control */
>> +#define MMCR0_PMC1CE PPC_BIT(48)
>>   
>>   #define MMCR1_PMC1SEL_SHIFT (63 - 39)
>>   #define MMCR1_PMC1SEL PPC_BITMASK(32, 39)
>> diff --git a/target/ppc/pmu_book3s_helper.c b/target/ppc/pmu_book3s_helper.c
>> index 43cc0eb722..58ae65e22b 100644
>> --- a/target/ppc/pmu_book3s_helper.c
>> +++ b/target/ppc/pmu_book3s_helper.c
>> @@ -25,6 +25,7 @@
>>    * and SPAPR code.
>>    */
>>   #define PPC_CPU_FREQ 1000000000
>> +#define COUNTER_NEGATIVE_VAL 0x80000000
>>   
>>   static uint64_t get_cycles(uint64_t icount_delta)
>>   {
>> @@ -32,22 +33,9 @@ static uint64_t get_cycles(uint64_t icount_delta)
>>                       NANOSECONDS_PER_SECOND);
>>   }
>>   
>> -static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
>> -                                    uint64_t icount_delta)
>> -{
>> -    env->spr[sprn] += icount_delta;
>> -}
>> -
>> -static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
>> -                              uint64_t icount_delta)
>> -{
>> -    env->spr[sprn] += get_cycles(icount_delta);
>> -}
>> -
>> -static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
>> -                                        uint64_t icount_delta)
>> +static uint8_t get_PMC_event(CPUPPCState *env, int sprn)
>>   {
>> -    int event;
>> +    int event = 0x0;
>>   
>>       switch (sprn) {
>>       case SPR_POWER_PMC1:
>> @@ -65,11 +53,35 @@ static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
>>       case SPR_POWER_PMC4:
>>           event = MMCR1_PMC4SEL & env->spr[SPR_POWER_MMCR1];
>>           break;
>> +    case SPR_POWER_PMC5:
>> +        event = 0x2;
>> +        break;
>> +    case SPR_POWER_PMC6:
>> +        event = 0x1E;
>> +        break;
> 
> This looks like a nice cleanup that would be better folded into an
> earlier patch.
> 
>>       default:
>> -        return;
>> +        break;
>>       }
>>   
>> -    switch (event) {
>> +    return event;
>> +}
>> +
>> +static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
>> +                                    uint64_t icount_delta)
>> +{
>> +    env->spr[sprn] += icount_delta;
>> +}
>> +
>> +static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
>> +                              uint64_t icount_delta)
>> +{
>> +    env->spr[sprn] += get_cycles(icount_delta);
>> +}
>> +
>> +static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
>> +                                        uint64_t icount_delta)
>> +{
>> +    switch (get_PMC_event(env, sprn)) {
>>       case 0x2:
>>           update_PMC_PM_INST_CMPL(env, sprn, icount_delta);
>>           break;
>> @@ -99,30 +111,57 @@ static void update_PMCs(CPUPPCState *env, uint64_t icount_delta)
>>       update_PMC_PM_CYC(env, SPR_POWER_PMC6, icount_delta);
>>   }
>>   
>> +static void set_PMU_excp_timer(CPUPPCState *env)
>> +{
>> +    uint64_t timeout, now, remaining_val;
>> +
>> +    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMC1CE)) {
>> +        return;
>> +    }
>> +
>> +    remaining_val = COUNTER_NEGATIVE_VAL - env->spr[SPR_POWER_PMC1];
>> +
>> +    switch (get_PMC_event(env, SPR_POWER_PMC1)) {
>> +    case 0x2:
>> +        timeout = icount_to_ns(remaining_val);
>> +        break;
>> +    case 0x1e:
>> +        timeout = muldiv64(remaining_val, NANOSECONDS_PER_SECOND,
>> +                           PPC_CPU_FREQ);
> 
> So.. this appears to be simulating to the guest that cycles are
> occurring at a constant rate, consistent with the advertised CPU
> frequency.  Which sounds right, except... it's not clear to me that
> you're using the same logic to generate the values you read from the
> cycles PMC (in that case it shouldn't need to reference icount at all,
> right?).

'remaining_val' meaning depends on the event being sampled in the PMC
in that moment. PMCs 1 to 4 can have a multitude of events, PMC5 is always
count instructions and PMC6 is always counting cycles.

For 0x02, env->spr[SPR_POWER_PMC1] contains instructions. remaining_val is
the remaining insns for the counter negative condition, and icount_to_ns()
is the timeout estimation for that. The value of the PMC1 will be set
via update_PMC_PM_INST_CMPL(), which in turn is just a matter of summing
the elapsed icount delta between start and freeze into the PMC.

For 0x1e, env->spr[SPR_POWER_PMC1] contains cycles. remaining_val is
the remaining cycles for counter negative cycles, and this muldiv64 calc
is the timeout estimation in this case. The PMC value is set via
update_PMC_PM_CYC(), which in turn does a math with the current icount
delta in get_cycles(icount_delta) to get the current PMC value.

All the metrics implemented in this PMU relies on 'icount_delta', the
amount of icount units between the change states of MMCR0_FC (and other
freeze counter bits like patch 17).


Thanks,


Daniel



> 
>> +        break;
>> +    default:
>> +        return;
>> +    }
>> +
>> +    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
>> +
>> +    timer_mod(env->pmu_intr_timer, now + timeout);
>> +}
>> +
>>   static void cpu_ppc_pmu_timer_cb(void *opaque)
>>   {
>>       PowerPCCPU *cpu = opaque;
>>       CPUPPCState *env = &cpu->env;
>> -    uint64_t mmcr0;
>> -
>> -    mmcr0 = env->spr[SPR_POWER_MMCR0];
>> -    if (env->spr[SPR_POWER_MMCR0] & MMCR0_EBE) {
>> -        /* freeeze counters if needed */
>> -        if (mmcr0 & MMCR0_FCECE) {
>> -            mmcr0 &= ~MMCR0_FCECE;
>> -            mmcr0 |= MMCR0_FC;
>> -        }
>> +    uint64_t icount_delta = (uint64_t)icount_get_raw() - env->pmu_base_icount;
>>   
>> -        /* Clear PMAE and set PMAO */
>> -        if (mmcr0 & MMCR0_PMAE) {
>> -            mmcr0 &= ~MMCR0_PMAE;
>> -            mmcr0 |= MMCR0_PMAO;
>> -        }
>> -        env->spr[SPR_POWER_MMCR0] = mmcr0;
>> +    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_EBE)) {
>> +        return;
>> +    }
>> +
>> +    update_PMCs(env, icount_delta);
>> +
>> +    if (env->spr[SPR_POWER_MMCR0] & MMCR0_FCECE) {
>> +        env->spr[SPR_POWER_MMCR0] &= ~MMCR0_FCECE;
>> +        env->spr[SPR_POWER_MMCR0] |= MMCR0_FC;
>> +    }
>>   
>> -        /* Fire the PMC hardware exception */
>> -        ppc_set_irq(cpu, PPC_INTERRUPT_PMC, 1);
>> +    if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE) {
>> +        env->spr[SPR_POWER_MMCR0] &= ~MMCR0_PMAE;
>> +        env->spr[SPR_POWER_MMCR0] |= MMCR0_PMAO;
>>       }
>> +
>> +    /* Fire the PMC hardware exception */
>> +    ppc_set_irq(cpu, PPC_INTERRUPT_PMC, 1);
>>   }
>>   
>>   void cpu_ppc_pmu_timer_init(CPUPPCState *env)
>> @@ -134,12 +173,19 @@ void cpu_ppc_pmu_timer_init(CPUPPCState *env)
>>       env->pmu_intr_timer = timer;
>>   }
>>   
>> +static bool mmcr0_counter_neg_cond_enabled(uint64_t mmcr0)
>> +{
>> +    return mmcr0 & MMCR0_PMC1CE;
>> +}
>> +
>>   void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
>>   {
>>       uint64_t curr_icount = (uint64_t)icount_get_raw();
>>       bool curr_FC = env->spr[SPR_POWER_MMCR0] & MMCR0_FC;
>>       bool new_FC = value & MMCR0_FC;
>>   
>> +    env->spr[SPR_POWER_MMCR0] = value;
>> +
>>       /*
>>        * In an frozen count (FC) bit change:
>>        *
>> @@ -163,10 +209,19 @@ void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
>>                * until the freeze.
>>                */
>>               update_PMCs(env, icount_delta);
>> +
>> +            /* delete pending timer */
>> +            timer_del(env->pmu_intr_timer);
>>           } else {
>>               env->pmu_base_icount = curr_icount;
>> +
>> +            /*
>> +             * Start performance monitor alert timer for counter negative
>> +             * events, if needed.
>> +             */
>> +            if (mmcr0_counter_neg_cond_enabled(env->spr[SPR_POWER_MMCR0])) {
>> +                set_PMU_excp_timer(env);
>> +            }
>>           }
>>       }
>> -
>> -    env->spr[SPR_POWER_MMCR0] = value;
>>   }
>
David Gibson Aug. 11, 2021, 3:40 a.m. UTC | #3
On Tue, Aug 10, 2021 at 05:26:09PM -0300, Daniel Henrique Barboza wrote:
> 
> 
> On 8/10/21 1:01 AM, David Gibson wrote:
> > On Mon, Aug 09, 2021 at 10:10:50AM -0300, Daniel Henrique Barboza wrote:
> > > This patch starts the counter negative EBB support by enabling PMC1
> > > counter negative condition.
> > > 
> > > A counter negative condition happens when a performance monitor counter
> > > reaches the value 0x80000000. When that happens, if a counter negative
> > > condition is enabled in that counter, a performance monitor alert is
> > > triggered. For PMC1, this condition is enabled by MMCR0_PMC1CE.
> > > 
> > > An icount-based logic is used to predict when we need to wake up the timer
> > > to trigger the alert in both PM_INST_CMPL (0x2) and PM_CYC (0x1E) events.
> > > The timer callback will then trigger a PPC_INTERRUPT_PMC which will become a
> > > event-based exception later.
> > > 
> > > Some EBB powerpc kernel selftests are passing after this patch, but a
> > > substancial amount of them relies on other PMCs to be enabled and events
> > > that we don't support at this moment. We'll address that in the next
> > > patches.
> > > 
> > > Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> > > ---
> > >   target/ppc/cpu.h               |   1 +
> > >   target/ppc/pmu_book3s_helper.c | 127 +++++++++++++++++++++++----------
> > >   2 files changed, 92 insertions(+), 36 deletions(-)
> > > 
> > > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> > > index 1d38b8cf7a..5c81d459f4 100644
> > > --- a/target/ppc/cpu.h
> > > +++ b/target/ppc/cpu.h
> > > @@ -350,6 +350,7 @@ typedef struct ppc_v3_pate_t {
> > >   #define MMCR0_EBE   PPC_BIT(43)         /* Perf Monitor EBB Enable */
> > >   #define MMCR0_FCECE PPC_BIT(38)         /* FC on Enabled Cond or Event */
> > >   #define MMCR0_PMCC  PPC_BITMASK(44, 45) /* PMC Control */
> > > +#define MMCR0_PMC1CE PPC_BIT(48)
> > >   #define MMCR1_PMC1SEL_SHIFT (63 - 39)
> > >   #define MMCR1_PMC1SEL PPC_BITMASK(32, 39)
> > > diff --git a/target/ppc/pmu_book3s_helper.c b/target/ppc/pmu_book3s_helper.c
> > > index 43cc0eb722..58ae65e22b 100644
> > > --- a/target/ppc/pmu_book3s_helper.c
> > > +++ b/target/ppc/pmu_book3s_helper.c
> > > @@ -25,6 +25,7 @@
> > >    * and SPAPR code.
> > >    */
> > >   #define PPC_CPU_FREQ 1000000000
> > > +#define COUNTER_NEGATIVE_VAL 0x80000000
> > >   static uint64_t get_cycles(uint64_t icount_delta)
> > >   {
> > > @@ -32,22 +33,9 @@ static uint64_t get_cycles(uint64_t icount_delta)
> > >                       NANOSECONDS_PER_SECOND);
> > >   }
> > > -static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
> > > -                                    uint64_t icount_delta)
> > > -{
> > > -    env->spr[sprn] += icount_delta;
> > > -}
> > > -
> > > -static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
> > > -                              uint64_t icount_delta)
> > > -{
> > > -    env->spr[sprn] += get_cycles(icount_delta);
> > > -}
> > > -
> > > -static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
> > > -                                        uint64_t icount_delta)
> > > +static uint8_t get_PMC_event(CPUPPCState *env, int sprn)
> > >   {
> > > -    int event;
> > > +    int event = 0x0;
> > >       switch (sprn) {
> > >       case SPR_POWER_PMC1:
> > > @@ -65,11 +53,35 @@ static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
> > >       case SPR_POWER_PMC4:
> > >           event = MMCR1_PMC4SEL & env->spr[SPR_POWER_MMCR1];
> > >           break;
> > > +    case SPR_POWER_PMC5:
> > > +        event = 0x2;
> > > +        break;
> > > +    case SPR_POWER_PMC6:
> > > +        event = 0x1E;
> > > +        break;
> > 
> > This looks like a nice cleanup that would be better folded into an
> > earlier patch.
> > 
> > >       default:
> > > -        return;
> > > +        break;
> > >       }
> > > -    switch (event) {
> > > +    return event;
> > > +}
> > > +
> > > +static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
> > > +                                    uint64_t icount_delta)
> > > +{
> > > +    env->spr[sprn] += icount_delta;
> > > +}
> > > +
> > > +static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
> > > +                              uint64_t icount_delta)
> > > +{
> > > +    env->spr[sprn] += get_cycles(icount_delta);
> > > +}
> > > +
> > > +static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
> > > +                                        uint64_t icount_delta)
> > > +{
> > > +    switch (get_PMC_event(env, sprn)) {
> > >       case 0x2:
> > >           update_PMC_PM_INST_CMPL(env, sprn, icount_delta);
> > >           break;
> > > @@ -99,30 +111,57 @@ static void update_PMCs(CPUPPCState *env, uint64_t icount_delta)
> > >       update_PMC_PM_CYC(env, SPR_POWER_PMC6, icount_delta);
> > >   }
> > > +static void set_PMU_excp_timer(CPUPPCState *env)
> > > +{
> > > +    uint64_t timeout, now, remaining_val;
> > > +
> > > +    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMC1CE)) {
> > > +        return;
> > > +    }
> > > +
> > > +    remaining_val = COUNTER_NEGATIVE_VAL - env->spr[SPR_POWER_PMC1];
> > > +
> > > +    switch (get_PMC_event(env, SPR_POWER_PMC1)) {
> > > +    case 0x2:
> > > +        timeout = icount_to_ns(remaining_val);
> > > +        break;
> > > +    case 0x1e:
> > > +        timeout = muldiv64(remaining_val, NANOSECONDS_PER_SECOND,
> > > +                           PPC_CPU_FREQ);
> > 
> > So.. this appears to be simulating to the guest that cycles are
> > occurring at a constant rate, consistent with the advertised CPU
> > frequency.  Which sounds right, except... it's not clear to me that
> > you're using the same logic to generate the values you read from the
> > cycles PMC (in that case it shouldn't need to reference icount at all,
> > right?).
> 
> 'remaining_val' meaning depends on the event being sampled in the PMC
> in that moment. PMCs 1 to 4 can have a multitude of events, PMC5 is always
> count instructions and PMC6 is always counting cycles.
> 
> For 0x02, env->spr[SPR_POWER_PMC1] contains instructions. remaining_val is
> the remaining insns for the counter negative condition, and icount_to_ns()
> is the timeout estimation for that. The value of the PMC1 will be set
> via update_PMC_PM_INST_CMPL(), which in turn is just a matter of summing
> the elapsed icount delta between start and freeze into the PMC.
> 
> For 0x1e, env->spr[SPR_POWER_PMC1] contains cycles. remaining_val is
> the remaining cycles for counter negative cycles, and this muldiv64 calc
> is the timeout estimation in this case. The PMC value is set via
> update_PMC_PM_CYC(), which in turn does a math with the current icount
> delta in get_cycles(icount_delta) to get the current PMC value.
> 
> All the metrics implemented in this PMU relies on 'icount_delta', the
> amount of icount units between the change states of MMCR0_FC (and other
> freeze counter bits like patch 17).

Ah, sorry, I missed that the PMC value (and therefore remaining val)
was based on the icount delta.

So.. that's consistent, but what is the justification for using
something based on icount for cycles, rather than something based on time?
Daniel Henrique Barboza Aug. 11, 2021, 11:18 a.m. UTC | #4
On 8/11/21 12:40 AM, David Gibson wrote:
> On Tue, Aug 10, 2021 at 05:26:09PM -0300, Daniel Henrique Barboza wrote:
>>
>>
>> On 8/10/21 1:01 AM, David Gibson wrote:
>>> On Mon, Aug 09, 2021 at 10:10:50AM -0300, Daniel Henrique Barboza wrote:
>>>> This patch starts the counter negative EBB support by enabling PMC1
>>>> counter negative condition.
>>>>
>>>> A counter negative condition happens when a performance monitor counter
>>>> reaches the value 0x80000000. When that happens, if a counter negative
>>>> condition is enabled in that counter, a performance monitor alert is
>>>> triggered. For PMC1, this condition is enabled by MMCR0_PMC1CE.
>>>>
>>>> An icount-based logic is used to predict when we need to wake up the timer
>>>> to trigger the alert in both PM_INST_CMPL (0x2) and PM_CYC (0x1E) events.
>>>> The timer callback will then trigger a PPC_INTERRUPT_PMC which will become a
>>>> event-based exception later.
>>>>
>>>> Some EBB powerpc kernel selftests are passing after this patch, but a
>>>> substancial amount of them relies on other PMCs to be enabled and events
>>>> that we don't support at this moment. We'll address that in the next
>>>> patches.
>>>>
>>>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
>>>> ---
>>>>    target/ppc/cpu.h               |   1 +
>>>>    target/ppc/pmu_book3s_helper.c | 127 +++++++++++++++++++++++----------
>>>>    2 files changed, 92 insertions(+), 36 deletions(-)
>>>>
>>>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>>>> index 1d38b8cf7a..5c81d459f4 100644
>>>> --- a/target/ppc/cpu.h
>>>> +++ b/target/ppc/cpu.h
>>>> @@ -350,6 +350,7 @@ typedef struct ppc_v3_pate_t {
>>>>    #define MMCR0_EBE   PPC_BIT(43)         /* Perf Monitor EBB Enable */
>>>>    #define MMCR0_FCECE PPC_BIT(38)         /* FC on Enabled Cond or Event */
>>>>    #define MMCR0_PMCC  PPC_BITMASK(44, 45) /* PMC Control */
>>>> +#define MMCR0_PMC1CE PPC_BIT(48)
>>>>    #define MMCR1_PMC1SEL_SHIFT (63 - 39)
>>>>    #define MMCR1_PMC1SEL PPC_BITMASK(32, 39)
>>>> diff --git a/target/ppc/pmu_book3s_helper.c b/target/ppc/pmu_book3s_helper.c
>>>> index 43cc0eb722..58ae65e22b 100644
>>>> --- a/target/ppc/pmu_book3s_helper.c
>>>> +++ b/target/ppc/pmu_book3s_helper.c
>>>> @@ -25,6 +25,7 @@
>>>>     * and SPAPR code.
>>>>     */
>>>>    #define PPC_CPU_FREQ 1000000000
>>>> +#define COUNTER_NEGATIVE_VAL 0x80000000
>>>>    static uint64_t get_cycles(uint64_t icount_delta)
>>>>    {
>>>> @@ -32,22 +33,9 @@ static uint64_t get_cycles(uint64_t icount_delta)
>>>>                        NANOSECONDS_PER_SECOND);
>>>>    }
>>>> -static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
>>>> -                                    uint64_t icount_delta)
>>>> -{
>>>> -    env->spr[sprn] += icount_delta;
>>>> -}
>>>> -
>>>> -static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
>>>> -                              uint64_t icount_delta)
>>>> -{
>>>> -    env->spr[sprn] += get_cycles(icount_delta);
>>>> -}
>>>> -
>>>> -static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
>>>> -                                        uint64_t icount_delta)
>>>> +static uint8_t get_PMC_event(CPUPPCState *env, int sprn)
>>>>    {
>>>> -    int event;
>>>> +    int event = 0x0;
>>>>        switch (sprn) {
>>>>        case SPR_POWER_PMC1:
>>>> @@ -65,11 +53,35 @@ static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
>>>>        case SPR_POWER_PMC4:
>>>>            event = MMCR1_PMC4SEL & env->spr[SPR_POWER_MMCR1];
>>>>            break;
>>>> +    case SPR_POWER_PMC5:
>>>> +        event = 0x2;
>>>> +        break;
>>>> +    case SPR_POWER_PMC6:
>>>> +        event = 0x1E;
>>>> +        break;
>>>
>>> This looks like a nice cleanup that would be better folded into an
>>> earlier patch.
>>>
>>>>        default:
>>>> -        return;
>>>> +        break;
>>>>        }
>>>> -    switch (event) {
>>>> +    return event;
>>>> +}
>>>> +
>>>> +static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
>>>> +                                    uint64_t icount_delta)
>>>> +{
>>>> +    env->spr[sprn] += icount_delta;
>>>> +}
>>>> +
>>>> +static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
>>>> +                              uint64_t icount_delta)
>>>> +{
>>>> +    env->spr[sprn] += get_cycles(icount_delta);
>>>> +}
>>>> +
>>>> +static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
>>>> +                                        uint64_t icount_delta)
>>>> +{
>>>> +    switch (get_PMC_event(env, sprn)) {
>>>>        case 0x2:
>>>>            update_PMC_PM_INST_CMPL(env, sprn, icount_delta);
>>>>            break;
>>>> @@ -99,30 +111,57 @@ static void update_PMCs(CPUPPCState *env, uint64_t icount_delta)
>>>>        update_PMC_PM_CYC(env, SPR_POWER_PMC6, icount_delta);
>>>>    }
>>>> +static void set_PMU_excp_timer(CPUPPCState *env)
>>>> +{
>>>> +    uint64_t timeout, now, remaining_val;
>>>> +
>>>> +    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMC1CE)) {
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    remaining_val = COUNTER_NEGATIVE_VAL - env->spr[SPR_POWER_PMC1];
>>>> +
>>>> +    switch (get_PMC_event(env, SPR_POWER_PMC1)) {
>>>> +    case 0x2:
>>>> +        timeout = icount_to_ns(remaining_val);
>>>> +        break;
>>>> +    case 0x1e:
>>>> +        timeout = muldiv64(remaining_val, NANOSECONDS_PER_SECOND,
>>>> +                           PPC_CPU_FREQ);
>>>
>>> So.. this appears to be simulating to the guest that cycles are
>>> occurring at a constant rate, consistent with the advertised CPU
>>> frequency.  Which sounds right, except... it's not clear to me that
>>> you're using the same logic to generate the values you read from the
>>> cycles PMC (in that case it shouldn't need to reference icount at all,
>>> right?).
>>
>> 'remaining_val' meaning depends on the event being sampled in the PMC
>> in that moment. PMCs 1 to 4 can have a multitude of events, PMC5 is always
>> count instructions and PMC6 is always counting cycles.
>>
>> For 0x02, env->spr[SPR_POWER_PMC1] contains instructions. remaining_val is
>> the remaining insns for the counter negative condition, and icount_to_ns()
>> is the timeout estimation for that. The value of the PMC1 will be set
>> via update_PMC_PM_INST_CMPL(), which in turn is just a matter of summing
>> the elapsed icount delta between start and freeze into the PMC.
>>
>> For 0x1e, env->spr[SPR_POWER_PMC1] contains cycles. remaining_val is
>> the remaining cycles for counter negative cycles, and this muldiv64 calc
>> is the timeout estimation in this case. The PMC value is set via
>> update_PMC_PM_CYC(), which in turn does a math with the current icount
>> delta in get_cycles(icount_delta) to get the current PMC value.
>>
>> All the metrics implemented in this PMU relies on 'icount_delta', the
>> amount of icount units between the change states of MMCR0_FC (and other
>> freeze counter bits like patch 17).
> 
> Ah, sorry, I missed that the PMC value (and therefore remaining val)
> was based on the icount delta.
> 
> So.. that's consistent, but what is the justification for using
> something based on icount for cycles, rather than something based on time?


We could calculate the cycles via time granted that the clock we're using
is fixed in 1Ghz, so 1ns => 1 cycle. For that, we would need a similar mechanic
to what we already do with icount - get a time_base, cycles would be calculated
via a time_delta, etc.

However, and commenting a bit on Richard's review in patch 08, the cycle
calculation we're doing is just returning icount_to_ns(icount_delta) because
PPC_CPU_FREQ and NANOSECONDS_PER_SECOND are the same value. So, given that the
conditions in which we would need to store and calculate a time delta is the
same as what we're already doing with icount today, isn't
cycles = icount_to_ns(icount_delta) = time_delta?

I mean, nothing is stopping us from calculating cycles using time, but in the
end we would do the same thing we're already doing today.


Thanks,


Daniel
>
David Gibson Aug. 12, 2021, 3:39 a.m. UTC | #5
On Wed, Aug 11, 2021 at 08:18:29AM -0300, Daniel Henrique Barboza wrote:
> 
> 
> On 8/11/21 12:40 AM, David Gibson wrote:
> > On Tue, Aug 10, 2021 at 05:26:09PM -0300, Daniel Henrique Barboza wrote:
> > > 
> > > 
> > > On 8/10/21 1:01 AM, David Gibson wrote:
> > > > On Mon, Aug 09, 2021 at 10:10:50AM -0300, Daniel Henrique Barboza wrote:
> > > > > This patch starts the counter negative EBB support by enabling PMC1
> > > > > counter negative condition.
> > > > > 
> > > > > A counter negative condition happens when a performance monitor counter
> > > > > reaches the value 0x80000000. When that happens, if a counter negative
> > > > > condition is enabled in that counter, a performance monitor alert is
> > > > > triggered. For PMC1, this condition is enabled by MMCR0_PMC1CE.
> > > > > 
> > > > > An icount-based logic is used to predict when we need to wake up the timer
> > > > > to trigger the alert in both PM_INST_CMPL (0x2) and PM_CYC (0x1E) events.
> > > > > The timer callback will then trigger a PPC_INTERRUPT_PMC which will become a
> > > > > event-based exception later.
> > > > > 
> > > > > Some EBB powerpc kernel selftests are passing after this patch, but a
> > > > > substancial amount of them relies on other PMCs to be enabled and events
> > > > > that we don't support at this moment. We'll address that in the next
> > > > > patches.
> > > > > 
> > > > > Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
> > > > > ---
> > > > >    target/ppc/cpu.h               |   1 +
> > > > >    target/ppc/pmu_book3s_helper.c | 127 +++++++++++++++++++++++----------
> > > > >    2 files changed, 92 insertions(+), 36 deletions(-)
> > > > > 
> > > > > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> > > > > index 1d38b8cf7a..5c81d459f4 100644
> > > > > --- a/target/ppc/cpu.h
> > > > > +++ b/target/ppc/cpu.h
> > > > > @@ -350,6 +350,7 @@ typedef struct ppc_v3_pate_t {
> > > > >    #define MMCR0_EBE   PPC_BIT(43)         /* Perf Monitor EBB Enable */
> > > > >    #define MMCR0_FCECE PPC_BIT(38)         /* FC on Enabled Cond or Event */
> > > > >    #define MMCR0_PMCC  PPC_BITMASK(44, 45) /* PMC Control */
> > > > > +#define MMCR0_PMC1CE PPC_BIT(48)
> > > > >    #define MMCR1_PMC1SEL_SHIFT (63 - 39)
> > > > >    #define MMCR1_PMC1SEL PPC_BITMASK(32, 39)
> > > > > diff --git a/target/ppc/pmu_book3s_helper.c b/target/ppc/pmu_book3s_helper.c
> > > > > index 43cc0eb722..58ae65e22b 100644
> > > > > --- a/target/ppc/pmu_book3s_helper.c
> > > > > +++ b/target/ppc/pmu_book3s_helper.c
> > > > > @@ -25,6 +25,7 @@
> > > > >     * and SPAPR code.
> > > > >     */
> > > > >    #define PPC_CPU_FREQ 1000000000
> > > > > +#define COUNTER_NEGATIVE_VAL 0x80000000
> > > > >    static uint64_t get_cycles(uint64_t icount_delta)
> > > > >    {
> > > > > @@ -32,22 +33,9 @@ static uint64_t get_cycles(uint64_t icount_delta)
> > > > >                        NANOSECONDS_PER_SECOND);
> > > > >    }
> > > > > -static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
> > > > > -                                    uint64_t icount_delta)
> > > > > -{
> > > > > -    env->spr[sprn] += icount_delta;
> > > > > -}
> > > > > -
> > > > > -static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
> > > > > -                              uint64_t icount_delta)
> > > > > -{
> > > > > -    env->spr[sprn] += get_cycles(icount_delta);
> > > > > -}
> > > > > -
> > > > > -static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
> > > > > -                                        uint64_t icount_delta)
> > > > > +static uint8_t get_PMC_event(CPUPPCState *env, int sprn)
> > > > >    {
> > > > > -    int event;
> > > > > +    int event = 0x0;
> > > > >        switch (sprn) {
> > > > >        case SPR_POWER_PMC1:
> > > > > @@ -65,11 +53,35 @@ static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
> > > > >        case SPR_POWER_PMC4:
> > > > >            event = MMCR1_PMC4SEL & env->spr[SPR_POWER_MMCR1];
> > > > >            break;
> > > > > +    case SPR_POWER_PMC5:
> > > > > +        event = 0x2;
> > > > > +        break;
> > > > > +    case SPR_POWER_PMC6:
> > > > > +        event = 0x1E;
> > > > > +        break;
> > > > 
> > > > This looks like a nice cleanup that would be better folded into an
> > > > earlier patch.
> > > > 
> > > > >        default:
> > > > > -        return;
> > > > > +        break;
> > > > >        }
> > > > > -    switch (event) {
> > > > > +    return event;
> > > > > +}
> > > > > +
> > > > > +static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
> > > > > +                                    uint64_t icount_delta)
> > > > > +{
> > > > > +    env->spr[sprn] += icount_delta;
> > > > > +}
> > > > > +
> > > > > +static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
> > > > > +                              uint64_t icount_delta)
> > > > > +{
> > > > > +    env->spr[sprn] += get_cycles(icount_delta);
> > > > > +}
> > > > > +
> > > > > +static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
> > > > > +                                        uint64_t icount_delta)
> > > > > +{
> > > > > +    switch (get_PMC_event(env, sprn)) {
> > > > >        case 0x2:
> > > > >            update_PMC_PM_INST_CMPL(env, sprn, icount_delta);
> > > > >            break;
> > > > > @@ -99,30 +111,57 @@ static void update_PMCs(CPUPPCState *env, uint64_t icount_delta)
> > > > >        update_PMC_PM_CYC(env, SPR_POWER_PMC6, icount_delta);
> > > > >    }
> > > > > +static void set_PMU_excp_timer(CPUPPCState *env)
> > > > > +{
> > > > > +    uint64_t timeout, now, remaining_val;
> > > > > +
> > > > > +    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMC1CE)) {
> > > > > +        return;
> > > > > +    }
> > > > > +
> > > > > +    remaining_val = COUNTER_NEGATIVE_VAL - env->spr[SPR_POWER_PMC1];
> > > > > +
> > > > > +    switch (get_PMC_event(env, SPR_POWER_PMC1)) {
> > > > > +    case 0x2:
> > > > > +        timeout = icount_to_ns(remaining_val);
> > > > > +        break;
> > > > > +    case 0x1e:
> > > > > +        timeout = muldiv64(remaining_val, NANOSECONDS_PER_SECOND,
> > > > > +                           PPC_CPU_FREQ);
> > > > 
> > > > So.. this appears to be simulating to the guest that cycles are
> > > > occurring at a constant rate, consistent with the advertised CPU
> > > > frequency.  Which sounds right, except... it's not clear to me that
> > > > you're using the same logic to generate the values you read from the
> > > > cycles PMC (in that case it shouldn't need to reference icount at all,
> > > > right?).
> > > 
> > > 'remaining_val' meaning depends on the event being sampled in the PMC
> > > in that moment. PMCs 1 to 4 can have a multitude of events, PMC5 is always
> > > count instructions and PMC6 is always counting cycles.
> > > 
> > > For 0x02, env->spr[SPR_POWER_PMC1] contains instructions. remaining_val is
> > > the remaining insns for the counter negative condition, and icount_to_ns()
> > > is the timeout estimation for that. The value of the PMC1 will be set
> > > via update_PMC_PM_INST_CMPL(), which in turn is just a matter of summing
> > > the elapsed icount delta between start and freeze into the PMC.
> > > 
> > > For 0x1e, env->spr[SPR_POWER_PMC1] contains cycles. remaining_val is
> > > the remaining cycles for counter negative cycles, and this muldiv64 calc
> > > is the timeout estimation in this case. The PMC value is set via
> > > update_PMC_PM_CYC(), which in turn does a math with the current icount
> > > delta in get_cycles(icount_delta) to get the current PMC value.
> > > 
> > > All the metrics implemented in this PMU relies on 'icount_delta', the
> > > amount of icount units between the change states of MMCR0_FC (and other
> > > freeze counter bits like patch 17).
> > 
> > Ah, sorry, I missed that the PMC value (and therefore remaining val)
> > was based on the icount delta.
> > 
> > So.. that's consistent, but what is the justification for using
> > something based on icount for cycles, rather than something based on time?
> 
> 
> We could calculate the cycles via time granted that the clock we're using
> is fixed in 1Ghz, so 1ns => 1 cycle. For that, we would need a similar mechanic
> to what we already do with icount - get a time_base, cycles would be calculated
> via a time_delta, etc.
> 
> However, and commenting a bit on Richard's review in patch 08, the cycle
> calculation we're doing is just returning icount_to_ns(icount_delta) because
> PPC_CPU_FREQ and NANOSECONDS_PER_SECOND are the same value. So, given that the
> conditions in which we would need to store and calculate a time delta is the
> same as what we're already doing with icount today, isn't
> cycles = icount_to_ns(icount_delta) = time_delta?
> 
> I mean, nothing is stopping us from calculating cycles using time, but in the
> end we would do the same thing we're already doing today.

Oh.. ok.  I had assumed that icount worked by instrumenting the
generate TCG code to actually count instructions, rather than working
off the time.
Richard Henderson Aug. 12, 2021, 4:45 a.m. UTC | #6
On 8/11/21 5:39 PM, David Gibson wrote:
>> I mean, nothing is stopping us from calculating cycles using time, but in the
>> end we would do the same thing we're already doing today.
> 
> Oh.. ok.  I had assumed that icount worked by instrumenting the
> generate TCG code to actually count instructions, rather than working
> off the time.

David, you're right, icount instruments the generated tcg code.
You also have to add -icount to the command-line.


r~
Richard Henderson Aug. 12, 2021, 4:56 a.m. UTC | #7
On 8/11/21 6:45 PM, Richard Henderson wrote:
> On 8/11/21 5:39 PM, David Gibson wrote:
>>> I mean, nothing is stopping us from calculating cycles using time, but in the
>>> end we would do the same thing we're already doing today.
>>
>> Oh.. ok.  I had assumed that icount worked by instrumenting the
>> generate TCG code to actually count instructions, rather than working
>> off the time.
> 
> David, you're right, icount instruments the generated tcg code.
> You also have to add -icount to the command-line.

Oh, and btw, icount disables multi-threaded tcg, so you're going to be running that guest 
in round-robin mode.

Icount affects so many aspects of qemu that I really do not think it is the best option 
for a PMU.

If you want to count instructions retired, then just do that.  Stuff values into 
tcg_gen_insn_start so they're available for exception unwind, and otherwise decrement the 
counter at the end of a TB.

If you really must interrupt exactly at 0 (and not simply at some point past underflow), 
then we can adjust the tb lookup logic to let you reduce tb->cflags.CF_COUNT_MASK in 
cpu_get_tb_cpu_state.


r~
Daniel Henrique Barboza Aug. 12, 2021, 10:17 a.m. UTC | #8
On 8/12/21 1:56 AM, Richard Henderson wrote:
> On 8/11/21 6:45 PM, Richard Henderson wrote:
>> On 8/11/21 5:39 PM, David Gibson wrote:
>>>> I mean, nothing is stopping us from calculating cycles using time, but in the
>>>> end we would do the same thing we're already doing today.
>>>
>>> Oh.. ok.  I had assumed that icount worked by instrumenting the
>>> generate TCG code to actually count instructions, rather than working
>>> off the time.
>>
>> David, you're right, icount instruments the generated tcg code.
>> You also have to add -icount to the command-line.
> 
> Oh, and btw, icount disables multi-threaded tcg, so you're going to be running that guest in round-robin mode.
> 
> Icount affects so many aspects of qemu that I really do not think it is the best option for a PMU.

Using icount in the PMU is my fallback plan. I spent some time trying to
count instructions using translationOps but never got it right. I got
up to a point where the tests were counting instructions for the first
time it was run in the guest, then nothing else counted in consecutive
runs.

I was able to figure out that it had to do with how the translation block
works. If a previously ran TB was found via lookup then the translationOps
callbacks I was using weren't being called. I know that I was missing a
piece of info there, but since I'm trying to deal with other aspects of the
PMU logic I fell back into using icount to get things of the ground.

All this said ....

> 
> If you want to count instructions retired, then just do that.  Stuff values into tcg_gen_insn_start so they're available for exception unwind, and otherwise decrement the counter at the end of a TB.

... I don't bother giving this a try. Can you clarify what do you mean
with "exception unwind"?

I tried something similar with tcg_gen_insn_start() (called via ppc_tr_insn_start()).
This this ops is called inside translator_loop(), and translator_loop() isn't
being ran if TB_lookup returns the TB, I was observing the behavior I
described above of a test counting instructions in its first run only.


> 
> If you really must interrupt exactly at 0 (and not simply at some point past underflow), then we can adjust the tb lookup logic to let you reduce tb->cflags.CF_COUNT_MASK in cpu_get_tb_cpu_state.

That would be good, but depending on the amount of work I would consider doing
this is a follow-up of this work. It's ok if the PMU overflows a bit instructions
for our current purposes ATM.


Thanks,


Daniel

> 
> 
> r~
Daniel Henrique Barboza Aug. 12, 2021, 9:24 p.m. UTC | #9
On 8/12/21 7:17 AM, Daniel Henrique Barboza wrote:
> 
> 
> On 8/12/21 1:56 AM, Richard Henderson wrote:
>> On 8/11/21 6:45 PM, Richard Henderson wrote:
>>> On 8/11/21 5:39 PM, David Gibson wrote:
>>>>> I mean, nothing is stopping us from calculating cycles using time, but in the
>>>>> end we would do the same thing we're already doing today.
>>>>
>>>> Oh.. ok.  I had assumed that icount worked by instrumenting the
>>>> generate TCG code to actually count instructions, rather than working
>>>> off the time.
>>>
>>> David, you're right, icount instruments the generated tcg code.
>>> You also have to add -icount to the command-line.
>>
>> Oh, and btw, icount disables multi-threaded tcg, so you're going to be running that guest in round-robin mode.
>>
>> Icount affects so many aspects of qemu that I really do not think it is the best option for a PMU.
> 
> Using icount in the PMU is my fallback plan. I spent some time trying to
> count instructions using translationOps but never got it right. I got
> up to a point where the tests were counting instructions for the first
> time it was run in the guest, then nothing else counted in consecutive
> runs.
> 
> I was able to figure out that it had to do with how the translation block
> works. If a previously ran TB was found via lookup then the translationOps
> callbacks I was using weren't being called. I know that I was missing a
> piece of info there, but since I'm trying to deal with other aspects of the
> PMU logic I fell back into using icount to get things of the ground.


So, I made an attempt to remove all icount() references from the PMU code and instead
did the following (posting just the relevant diff):


+
+void helper_insns_inc(CPUPPCState *env)
+{
+    env->pmu_insns_count++;
+}
+
+void helper_insns_dec(CPUPPCState *env)
+{
+    env->pmu_insns_count--;
+}
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 60f35360b7..c56c656694 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -8689,6 +8689,7 @@ static void ppc_tr_tb_start(DisasContextBase *db, CPUState *cs)
  
  static void ppc_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
  {
+    gen_helper_insns_inc(cpu_env);
      tcg_gen_insn_start(dcbase->pc_next);
  }
  
@@ -8755,6 +8756,8 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
          return;
      }
  
+    gen_helper_insns_dec(cpu_env);
+
      /* Honor single stepping. */
      sse = ctx->singlestep_enabled & (CPU_SINGLE_STEP | GDBSTUB_SINGLE_STEP);
      if (unlikely(sse)) {


And then I used 'env->pmu_insns_count' to update the instruction counters. And it's
working, with a slightly better performance than we have with the icount()
version. I'm not sure why it's working now since I tried something very similar
before and it didn't. Figures.

It's still overshooting the instructions a bit, but then there's the optimization
you mentioned previously with the tb lookup logic that could be done to improve
that as well.


I'm not sure if this is in line or close with what you proposed, but it's already
better than the icount version that I posted here.


Thanks,


Daniel


> 
> All this said ....
> 
>>
>> If you want to count instructions retired, then just do that.  Stuff values into tcg_gen_insn_start so they're available for exception unwind, and otherwise decrement the counter at the end of a TB.
> 
> ... I don't bother giving this a try. Can you clarify what do you mean
> with "exception unwind"?
> 
> I tried something similar with tcg_gen_insn_start() (called via ppc_tr_insn_start()).
> This this ops is called inside translator_loop(), and translator_loop() isn't
> being ran if TB_lookup returns the TB, I was observing the behavior I
> described above of a test counting instructions in its first run only.
> 
> 
>>
>> If you really must interrupt exactly at 0 (and not simply at some point past underflow), then we can adjust the tb lookup logic to let you reduce tb->cflags.CF_COUNT_MASK in cpu_get_tb_cpu_state.
> 
> That would be good, but depending on the amount of work I would consider doing
> this is a follow-up of this work. It's ok if the PMU overflows a bit instructions
> for our current purposes ATM.
> 
> 
> Thanks,
> 
> 
> Daniel
> 
>>
>>
>> r~
Richard Henderson Aug. 13, 2021, 12:35 a.m. UTC | #10
On 8/12/21 11:24 AM, Daniel Henrique Barboza wrote:
> +void helper_insns_inc(CPUPPCState *env)
> +{
> +    env->pmu_insns_count++;
> +}
> +
> +void helper_insns_dec(CPUPPCState *env)
> +{
> +    env->pmu_insns_count--;
> +}
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index 60f35360b7..c56c656694 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -8689,6 +8689,7 @@ static void ppc_tr_tb_start(DisasContextBase *db, CPUState *cs)
> 
>   static void ppc_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
>   {
> +    gen_helper_insns_inc(cpu_env);
>       tcg_gen_insn_start(dcbase->pc_next);
>   }
> 
> @@ -8755,6 +8756,8 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>           return;
>       }
> 
> +    gen_helper_insns_dec(cpu_env);
> +
>       /* Honor single stepping. */
>       sse = ctx->singlestep_enabled & (CPU_SINGLE_STEP | GDBSTUB_SINGLE_STEP);
>       if (unlikely(sse)) {
> 
> 
> And then I used 'env->pmu_insns_count' to update the instruction counters. And it's
> working, with a slightly better performance than we have with the icount()
> version. I'm not sure why it's working now since I tried something very similar
> before and it didn't. Figures.

Not sure why you're decrementing there.

Also, how do you want to count retired instructions? Ones that merely start?  E.g. 
including loads that fault?  How about illegal instructions?  Or does the instruction need 
to run to completion?  Which of these you choose affects where you place the increment.

It is of course extremely heavyweight to call a helper to perform a simple addition operation.

You may also wish to look at target/hexagon, gen_exec_counters(), which is doing the same 
sort of thing.  But not completely, since it loses count along dynamic exception paths 
(e.g. faulting load).  That can be fixed as well, via restore_state_to_opc.


r~
Daniel Henrique Barboza Aug. 14, 2021, 7:13 p.m. UTC | #11
On 8/12/21 9:35 PM, Richard Henderson wrote:
> On 8/12/21 11:24 AM, Daniel Henrique Barboza wrote:
>> +void helper_insns_inc(CPUPPCState *env)
>> +{
>> +    env->pmu_insns_count++;
>> +}
>> +
>> +void helper_insns_dec(CPUPPCState *env)
>> +{
>> +    env->pmu_insns_count--;
>> +}
>> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
>> index 60f35360b7..c56c656694 100644
>> --- a/target/ppc/translate.c
>> +++ b/target/ppc/translate.c
>> @@ -8689,6 +8689,7 @@ static void ppc_tr_tb_start(DisasContextBase *db, CPUState *cs)
>>
>>   static void ppc_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
>>   {
>> +    gen_helper_insns_inc(cpu_env);
>>       tcg_gen_insn_start(dcbase->pc_next);
>>   }
>>
>> @@ -8755,6 +8756,8 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>>           return;
>>       }
>>
>> +    gen_helper_insns_dec(cpu_env);
>> +
>>       /* Honor single stepping. */
>>       sse = ctx->singlestep_enabled & (CPU_SINGLE_STEP | GDBSTUB_SINGLE_STEP);
>>       if (unlikely(sse)) {
>>
>>
>> And then I used 'env->pmu_insns_count' to update the instruction counters. And it's
>> working, with a slightly better performance than we have with the icount()
>> version. I'm not sure why it's working now since I tried something very similar
>> before and it didn't. Figures.
> 
> Not sure why you're decrementing there.
> 
> Also, how do you want to count retired instructions? Ones that merely start?  E.g. including loads that fault?  How about illegal instructions?  Or does the instruction need to run to completion?  Which of these you choose affects where you place the increment.

Before I proceed, let me mention that the numbers I'm about to post here are from the powerpc
selftest located here:

https://github.com/torvalds/linux/blob/master/tools/testing/selftests/powerpc/pmu/count_instructions.c

This test runs an instruction loop that consists of 'addi' instructions . Before running the instructions
there's an overhead calculation with an empty loop.


So, the event I want to count is described as "The thread has completed an instruction" in the ISA.
There are no events that counts illegal instructions or that causes fault, so my guess is that
all completed functions regardless of side-effects are counted.

Putting the incrementer in ppc_tr_insn_start() like I mentioned in my previous reply provides results
similar to the icount version:

# ./count_instructions
test: count_instructions
tags: git_version:v5.13-5357-gdbe69e433722-dirty
Binding to cpu 0
main test running as pid 568
Overhead of null loop: 2370 instructions
instructions: result 1002370 running/enabled 1587314
cycles: result 1001372 running/enabled 1348532
Looped for 1000000 instructions, overhead 2370
Expected 1002370
Actual   1002370
Delta    0, 0.000000%
instructions: result 10010309 running/enabled 11603340
cycles: result 10009311 running/enabled 11362216
Looped for 10000000 instructions, overhead 2370
Expected 10002370
Actual   10010309
Delta    7939, 0.079308%
[FAIL] Test FAILED on line 119
failure: count_instructions

I'm aware that putting the increment in insn_start() is too early since the instruction didn't
even complete, so I attempted to put the increment at the end of ppc_tr_translate_insn(). I
ended up with fewer instructions counted for the same test:

# ./count_instructions
test: count_instructions
tags: git_version:v5.13-5357-gdbe69e433722-dirty
Binding to cpu 0
main test running as pid 575
Overhead of null loop: 1962 instructions
instructions: result 939462 running/enabled 1587310
cycles: result 1001372 running/enabled 1348532
Looped for 1000000 instructions, overhead 1962
Expected 1001962
Actual   939462
Delta    -62500, -6.652744%
[FAIL] Test FAILED on line 116
failure: count_instructions

Reading translator_loop() I can't explain why  we're missing this amount of functions because
translate_insn() is always called right after insns_start():


     while (true) {
         db->num_insns++;

         /* NOTE: incrementing the counter in insn_start() ppc impl gives similar result as icount*/
         ops->insn_start(db, cpu);
         tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */

        (...)

         /* NOTE: incrementing the counter in translate_insn() PPC impl misses 6% of the instructions,
         even though it's always called right after insn_start() */
         if (db->num_insns == db->max_insns && (cflags & CF_LAST_IO)) {
             /* Accept I/O on the last instruction.  */
             gen_io_start();
             ops->translate_insn(db, cpu);
         } else {
             /* we should only see CF_MEMI_ONLY for io_recompile */
             tcg_debug_assert(!(cflags & CF_MEMI_ONLY));
             ops->translate_insn(db, cpu);
         }

I've recompiled QEMU with --enable-tcg-debug to see if tcg_debug_assert() right after insn_start() was
being hit. The assert isn't being hit in the test.


I also tried to read the instructions in tb_stop() via db->num_insns. Since I'm not worried about missing
the counter_overflow a few instructions it would also be an alternative. I changed the helper to receive
a number of instructions to be added instead of single increments:

static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
      target_ulong nip = ctx->base.pc_next;
      int sse;
  
+    gen_helper_insns_inc(cpu_env, tcg_constant_i32(dcbase->num_insns));
+
      if (is_jmp == DISAS_NORETURN) {
          /* We have already exited the TB. */
          return;


The test gives us these numbers:

# ./count_instructions
test: count_instructions
tags: git_version:v5.13-5357-gdbe69e433722-dirty
Binding to cpu 0
main test running as pid 573
Overhead of null loop: 85 instructions
instructions: result 85 running/enabled 1587316
cycles: result 1001372 running/enabled 1348534
Looped for 1000000 instructions, overhead 85
Expected 1000085
Actual   85
Delta    -1000000, -1176470.588235%
[FAIL] Test FAILED on line 116
failure: count_instructions

This is the situation I've faced some time ago and commented on a previous email. My hypothesis
back then was that translation_loop(), which is called via gen_intermediate_code() - tb_gen_code(),
was not being executed because tb_gen_code() is always guarded by a tb_lookup() , and
assumed that the lookup was finding the already translated TB and cpu_tb_exec() with it.


Well, we just saw that incrementing the counter in both insn_start() and translate_insn() gives
a far greater number than trying to sum up all the db->num_insns in tb_stop(), and all of
them are called inside translator_loop(). This invalidates (I guess) the assumptions I made
on tb_gen_code() up above. So yeah, I need more time to understand why I'm getting these
numbers.

For now, I'm really tempted into increment the instructions at insn_start() and come back at
instruction counting specifics in a follow-up work.


> 
> It is of course extremely heavyweight to call a helper to perform a simple addition operation.

This increment would update all registers that are counting instructions and
also fire a PMU interrupt when an overflow happens. I also added the frozen
count PMU bit in 'hflags' so we can increment only if the PMU is running.


There's a good chance that most of this can be done outside the helper in a
TCG function in translate.c. That would minimize the impact of the running
PMU in the translation process. I'll see how it goes.


> 
> You may also wish to look at target/hexagon, gen_exec_counters(), which is doing the same sort of thing.  But not completely, since it loses count along dynamic exception paths (e.g. faulting load).  That can be fixed as well, via restore_state_to_opc.

I saw the hexagon code and attempted to accumulate the instructions in
a 'num_insn' ctx variable, that was incremented in insns_start(), and then
update the PMU counters in tb_stop() with it. The results were similar
to what I've described above with db->num_insns in tb_stop().



Thanks,


Daniel


> 
> 
> r~
Richard Henderson Aug. 15, 2021, 7:24 p.m. UTC | #12
On 8/14/21 9:13 AM, Daniel Henrique Barboza wrote:
> https://github.com/torvalds/linux/blob/master/tools/testing/selftests/powerpc/pmu/count_instructions.c 
> 
> 
> This test runs an instruction loop that consists of 'addi' instructions . Before running 
> the instructions
> there's an overhead calculation with an empty loop.
...
> static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
>       target_ulong nip = ctx->base.pc_next;
>       int sse;
> 
> +    gen_helper_insns_inc(cpu_env, tcg_constant_i32(dcbase->num_insns));
> +
>       if (is_jmp == DISAS_NORETURN) {
>           /* We have already exited the TB. */
>           return;

You've not considered how branches are implemented.

We generate code to exit the tb in gen_bcond.  Anything you emit after that is dead code.


r~
diff mbox series

Patch

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 1d38b8cf7a..5c81d459f4 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -350,6 +350,7 @@  typedef struct ppc_v3_pate_t {
 #define MMCR0_EBE   PPC_BIT(43)         /* Perf Monitor EBB Enable */
 #define MMCR0_FCECE PPC_BIT(38)         /* FC on Enabled Cond or Event */
 #define MMCR0_PMCC  PPC_BITMASK(44, 45) /* PMC Control */
+#define MMCR0_PMC1CE PPC_BIT(48)
 
 #define MMCR1_PMC1SEL_SHIFT (63 - 39)
 #define MMCR1_PMC1SEL PPC_BITMASK(32, 39)
diff --git a/target/ppc/pmu_book3s_helper.c b/target/ppc/pmu_book3s_helper.c
index 43cc0eb722..58ae65e22b 100644
--- a/target/ppc/pmu_book3s_helper.c
+++ b/target/ppc/pmu_book3s_helper.c
@@ -25,6 +25,7 @@ 
  * and SPAPR code.
  */
 #define PPC_CPU_FREQ 1000000000
+#define COUNTER_NEGATIVE_VAL 0x80000000
 
 static uint64_t get_cycles(uint64_t icount_delta)
 {
@@ -32,22 +33,9 @@  static uint64_t get_cycles(uint64_t icount_delta)
                     NANOSECONDS_PER_SECOND);
 }
 
-static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
-                                    uint64_t icount_delta)
-{
-    env->spr[sprn] += icount_delta;
-}
-
-static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
-                              uint64_t icount_delta)
-{
-    env->spr[sprn] += get_cycles(icount_delta);
-}
-
-static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
-                                        uint64_t icount_delta)
+static uint8_t get_PMC_event(CPUPPCState *env, int sprn)
 {
-    int event;
+    int event = 0x0;
 
     switch (sprn) {
     case SPR_POWER_PMC1:
@@ -65,11 +53,35 @@  static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
     case SPR_POWER_PMC4:
         event = MMCR1_PMC4SEL & env->spr[SPR_POWER_MMCR1];
         break;
+    case SPR_POWER_PMC5:
+        event = 0x2;
+        break;
+    case SPR_POWER_PMC6:
+        event = 0x1E;
+        break;
     default:
-        return;
+        break;
     }
 
-    switch (event) {
+    return event;
+}
+
+static void update_PMC_PM_INST_CMPL(CPUPPCState *env, int sprn,
+                                    uint64_t icount_delta)
+{
+    env->spr[sprn] += icount_delta;
+}
+
+static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
+                              uint64_t icount_delta)
+{
+    env->spr[sprn] += get_cycles(icount_delta);
+}
+
+static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,
+                                        uint64_t icount_delta)
+{
+    switch (get_PMC_event(env, sprn)) {
     case 0x2:
         update_PMC_PM_INST_CMPL(env, sprn, icount_delta);
         break;
@@ -99,30 +111,57 @@  static void update_PMCs(CPUPPCState *env, uint64_t icount_delta)
     update_PMC_PM_CYC(env, SPR_POWER_PMC6, icount_delta);
 }
 
+static void set_PMU_excp_timer(CPUPPCState *env)
+{
+    uint64_t timeout, now, remaining_val;
+
+    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMC1CE)) {
+        return;
+    }
+
+    remaining_val = COUNTER_NEGATIVE_VAL - env->spr[SPR_POWER_PMC1];
+
+    switch (get_PMC_event(env, SPR_POWER_PMC1)) {
+    case 0x2:
+        timeout = icount_to_ns(remaining_val);
+        break;
+    case 0x1e:
+        timeout = muldiv64(remaining_val, NANOSECONDS_PER_SECOND,
+                           PPC_CPU_FREQ);
+        break;
+    default:
+        return;
+    }
+
+    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+
+    timer_mod(env->pmu_intr_timer, now + timeout);
+}
+
 static void cpu_ppc_pmu_timer_cb(void *opaque)
 {
     PowerPCCPU *cpu = opaque;
     CPUPPCState *env = &cpu->env;
-    uint64_t mmcr0;
-
-    mmcr0 = env->spr[SPR_POWER_MMCR0];
-    if (env->spr[SPR_POWER_MMCR0] & MMCR0_EBE) {
-        /* freeeze counters if needed */
-        if (mmcr0 & MMCR0_FCECE) {
-            mmcr0 &= ~MMCR0_FCECE;
-            mmcr0 |= MMCR0_FC;
-        }
+    uint64_t icount_delta = (uint64_t)icount_get_raw() - env->pmu_base_icount;
 
-        /* Clear PMAE and set PMAO */
-        if (mmcr0 & MMCR0_PMAE) {
-            mmcr0 &= ~MMCR0_PMAE;
-            mmcr0 |= MMCR0_PMAO;
-        }
-        env->spr[SPR_POWER_MMCR0] = mmcr0;
+    if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_EBE)) {
+        return;
+    }
+
+    update_PMCs(env, icount_delta);
+
+    if (env->spr[SPR_POWER_MMCR0] & MMCR0_FCECE) {
+        env->spr[SPR_POWER_MMCR0] &= ~MMCR0_FCECE;
+        env->spr[SPR_POWER_MMCR0] |= MMCR0_FC;
+    }
 
-        /* Fire the PMC hardware exception */
-        ppc_set_irq(cpu, PPC_INTERRUPT_PMC, 1);
+    if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE) {
+        env->spr[SPR_POWER_MMCR0] &= ~MMCR0_PMAE;
+        env->spr[SPR_POWER_MMCR0] |= MMCR0_PMAO;
     }
+
+    /* Fire the PMC hardware exception */
+    ppc_set_irq(cpu, PPC_INTERRUPT_PMC, 1);
 }
 
 void cpu_ppc_pmu_timer_init(CPUPPCState *env)
@@ -134,12 +173,19 @@  void cpu_ppc_pmu_timer_init(CPUPPCState *env)
     env->pmu_intr_timer = timer;
 }
 
+static bool mmcr0_counter_neg_cond_enabled(uint64_t mmcr0)
+{
+    return mmcr0 & MMCR0_PMC1CE;
+}
+
 void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
 {
     uint64_t curr_icount = (uint64_t)icount_get_raw();
     bool curr_FC = env->spr[SPR_POWER_MMCR0] & MMCR0_FC;
     bool new_FC = value & MMCR0_FC;
 
+    env->spr[SPR_POWER_MMCR0] = value;
+
     /*
      * In an frozen count (FC) bit change:
      *
@@ -163,10 +209,19 @@  void helper_store_mmcr0(CPUPPCState *env, target_ulong value)
              * until the freeze.
              */
             update_PMCs(env, icount_delta);
+
+            /* delete pending timer */
+            timer_del(env->pmu_intr_timer);
         } else {
             env->pmu_base_icount = curr_icount;
+
+            /*
+             * Start performance monitor alert timer for counter negative
+             * events, if needed.
+             */
+            if (mmcr0_counter_neg_cond_enabled(env->spr[SPR_POWER_MMCR0])) {
+                set_PMU_excp_timer(env);
+            }
         }
     }
-
-    env->spr[SPR_POWER_MMCR0] = value;
 }