diff mbox series

[RFC,06/10] target/riscv: Define PMU event related structures

Message ID 20241009-pmu_event_machine-v1-6-dcbd7a60e3ba@rivosinc.com (mailing list archive)
State New
Headers show
Series Allow platform specific PMU event encoding | expand

Commit Message

Atish Kumar Patra Oct. 9, 2024, 11:09 p.m. UTC
Signed-off-by: Atish Patra <atishp@rivosinc.com>
---
 target/riscv/cpu.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

Comments

Alexei Filippov Oct. 10, 2024, 12:44 p.m. UTC | #1
On 10.10.2024 02:09, Atish Patra wrote:
> Signed-off-by: Atish Patra <atishp@rivosinc.com>
> ---
>   target/riscv/cpu.h | 25 +++++++++++++++++++++++++
>   1 file changed, 25 insertions(+)
> 
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 2ac391a7cf74..53426710f73e 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -189,6 +189,28 @@ typedef struct PMUFixedCtrState {
>           uint64_t counter_virt_prev[2];
>   } PMUFixedCtrState;
>   
> +typedef uint64_t (*PMU_EVENT_CYCLE_FUNC)(RISCVCPU *);
> +typedef uint64_t (*PMU_EVENT_INSTRET_FUNC)(RISCVCPU *);
> +typedef uint64_t (*PMU_EVENT_TLB_FUNC)(RISCVCPU *, MMUAccessType access_type);
> +
> +typedef struct PMUEventInfo {
> +    /* Event ID (BIT [0:55] valid) */
> +    uint64_t event_id;
> +    /* Supported hpmcounters for this event */
> +    uint32_t counter_mask;
> +    /* Bitmask of valid event bits */
> +    uint64_t event_mask;
> +} PMUEventInfo;
> +
> +typedef struct PMUEventFunc {
> +    /* Get the ID of the event that can monitor cycles */
> +    PMU_EVENT_CYCLE_FUNC get_cycle_id;
> +    /* Get the ID of the event that can monitor cycles */
> +    PMU_EVENT_INSTRET_FUNC get_intstret_id;
> +    /* Get the ID of the event that can monitor TLB events*/
> +    PMU_EVENT_TLB_FUNC get_tlb_access_id;
Ok, this is kinda interesting decision, but it's not scalable. AFAIU 
none spec provide us full enum of existing events. So anytime when 
somebody will try to implement their own pmu events they would have to 
add additional callbacks, and this structure never will be fulled 
properly. And then we ended up with structure 1000+ callback with only 5 
machines wich supports pmu events. I suggest my approach with only 
read/write callbacks, where machine can decide by itself how to handle 
any of machine specific events.
> +} PMUEventFunc;
> +
>   struct CPUArchState {
>       target_ulong gpr[32];
>       target_ulong gprh[32]; /* 64 top bits of the 128-bit registers */
> @@ -386,6 +408,9 @@ struct CPUArchState {
>       target_ulong mhpmeventh_val[RV_MAX_MHPMEVENTS];
>   
>       PMUFixedCtrState pmu_fixed_ctrs[2];
> +    PMUEventInfo *pmu_events;
> +    PMUEventFunc pmu_efuncs;
> +    int num_pmu_events;
>   
>       target_ulong sscratch;
>       target_ulong mscratch;
>
Atish Kumar Patra Oct. 11, 2024, 8:45 p.m. UTC | #2
On Thu, Oct 10, 2024 at 5:44 AM Alexei Filippov
<alexei.filippov@yadro.com> wrote:
>
>
>
> On 10.10.2024 02:09, Atish Patra wrote:
> > Signed-off-by: Atish Patra <atishp@rivosinc.com>
> > ---
> >   target/riscv/cpu.h | 25 +++++++++++++++++++++++++
> >   1 file changed, 25 insertions(+)
> >
> > diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> > index 2ac391a7cf74..53426710f73e 100644
> > --- a/target/riscv/cpu.h
> > +++ b/target/riscv/cpu.h
> > @@ -189,6 +189,28 @@ typedef struct PMUFixedCtrState {
> >           uint64_t counter_virt_prev[2];
> >   } PMUFixedCtrState;
> >
> > +typedef uint64_t (*PMU_EVENT_CYCLE_FUNC)(RISCVCPU *);
> > +typedef uint64_t (*PMU_EVENT_INSTRET_FUNC)(RISCVCPU *);
> > +typedef uint64_t (*PMU_EVENT_TLB_FUNC)(RISCVCPU *, MMUAccessType access_type);
> > +
> > +typedef struct PMUEventInfo {
> > +    /* Event ID (BIT [0:55] valid) */
> > +    uint64_t event_id;
> > +    /* Supported hpmcounters for this event */
> > +    uint32_t counter_mask;
> > +    /* Bitmask of valid event bits */
> > +    uint64_t event_mask;
> > +} PMUEventInfo;
> > +
> > +typedef struct PMUEventFunc {
> > +    /* Get the ID of the event that can monitor cycles */
> > +    PMU_EVENT_CYCLE_FUNC get_cycle_id;
> > +    /* Get the ID of the event that can monitor cycles */
> > +    PMU_EVENT_INSTRET_FUNC get_intstret_id;
> > +    /* Get the ID of the event that can monitor TLB events*/
> > +    PMU_EVENT_TLB_FUNC get_tlb_access_id;
> Ok, this is kinda interesting decision, but it's not scalable. AFAIU

Yes it is not scalable if there is a need to scale as mentioned earlier.
Do you have any other events that can be emulated in Qemu ?

Having said that, I am okay with single call back though but not too sure
about read/write callback unless there is a use case to support those.

> none spec provide us full enum of existing events. So anytime when
> somebody will try to implement their own pmu events they would have to
> add additional callbacks, and this structure never will be fulled
> properly. And then we ended up with structure 1000+ callback with only 5
> machines wich supports pmu events. I suggest my approach with only
> read/write callbacks, where machine can decide by itself how to handle
> any of machine specific events.

Lot of these events are microarchitectural events which can't be
supported in Qemu.
I don't think it's a good idea at all to add dummy values for all the
events defined in a platform
which is harder to debug for a user.


> > +} PMUEventFunc;
> > +
> >   struct CPUArchState {
> >       target_ulong gpr[32];
> >       target_ulong gprh[32]; /* 64 top bits of the 128-bit registers */
> > @@ -386,6 +408,9 @@ struct CPUArchState {
> >       target_ulong mhpmeventh_val[RV_MAX_MHPMEVENTS];
> >
> >       PMUFixedCtrState pmu_fixed_ctrs[2];
> > +    PMUEventInfo *pmu_events;
> > +    PMUEventFunc pmu_efuncs;
> > +    int num_pmu_events;
> >
> >       target_ulong sscratch;
> >       target_ulong mscratch;
> >
Aleksei Filippov Oct. 21, 2024, 1:44 p.m. UTC | #3
> On 11 Oct 2024, at 23:45, Atish Kumar Patra <atishp@rivosinc.com> wrote:
> 
> On Thu, Oct 10, 2024 at 5:44 AM Alexei Filippov
> <alexei.filippov@yadro.com> wrote:
>> 
>> 
>> 
>> On 10.10.2024 02:09, Atish Patra wrote:
>>> Signed-off-by: Atish Patra <atishp@rivosinc.com>
>>> ---
>>>  target/riscv/cpu.h | 25 +++++++++++++++++++++++++
>>>  1 file changed, 25 insertions(+)
>>> 
>>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>>> index 2ac391a7cf74..53426710f73e 100644
>>> --- a/target/riscv/cpu.h
>>> +++ b/target/riscv/cpu.h
>>> @@ -189,6 +189,28 @@ typedef struct PMUFixedCtrState {
>>>          uint64_t counter_virt_prev[2];
>>>  } PMUFixedCtrState;
>>> 
>>> +typedef uint64_t (*PMU_EVENT_CYCLE_FUNC)(RISCVCPU *);
>>> +typedef uint64_t (*PMU_EVENT_INSTRET_FUNC)(RISCVCPU *);
>>> +typedef uint64_t (*PMU_EVENT_TLB_FUNC)(RISCVCPU *, MMUAccessType access_type);
>>> +
>>> +typedef struct PMUEventInfo {
>>> +    /* Event ID (BIT [0:55] valid) */
>>> +    uint64_t event_id;
>>> +    /* Supported hpmcounters for this event */
>>> +    uint32_t counter_mask;
>>> +    /* Bitmask of valid event bits */
>>> +    uint64_t event_mask;
>>> +} PMUEventInfo;
>>> +
>>> +typedef struct PMUEventFunc {
>>> +    /* Get the ID of the event that can monitor cycles */
>>> +    PMU_EVENT_CYCLE_FUNC get_cycle_id;
>>> +    /* Get the ID of the event that can monitor cycles */
>>> +    PMU_EVENT_INSTRET_FUNC get_intstret_id;
>>> +    /* Get the ID of the event that can monitor TLB events*/
>>> +    PMU_EVENT_TLB_FUNC get_tlb_access_id;
>> Ok, this is kinda interesting decision, but it's not scalable. AFAIU
> 
> Yes it is not scalable if there is a need to scale as mentioned earlier.
> Do you have any other events that can be emulated in Qemu ?
> 
> Having said that, I am okay with single call back though but not too sure
> about read/write callback unless there is a use case to support those.
> 
>> none spec provide us full enum of existing events. So anytime when
>> somebody will try to implement their own pmu events they would have to
>> add additional callbacks, and this structure never will be fulled
>> properly. And then we ended up with structure 1000+ callback with only 5
>> machines wich supports pmu events. I suggest my approach with only
>> read/write callbacks, where machine can decide by itself how to handle
>> any of machine specific events.
> 
> Lot of these events are microarchitectural events which can't be
> supported in Qemu.
> I don't think it's a good idea at all to add dummy values for all the
> events defined in a platform
> which is harder to debug for a user.

Yes, you're right that the rest of the events are microarchitectural and that they can't be properly simulated in QEMU at the moment, but it seems to me that's not really the point here. The point is how elastic this solution can be - that is, whether to do any events or not and how exactly they should be counted should be decided by the vendor of a particular machine, and not by the simulator in general. Plus, I have a very real use case where it will come in handy - debugging perf. Support the possibility of simulating events on QEMU side will make the life of t perf folks much easier. I do not insist specifically on my implementation of this solution, but I think that the solution with the creation of a callback for each of the events will significantly complicate the porting of the PMU for machine vendors.
> 
> 
>>> +} PMUEventFunc;
>>> +
>>>  struct CPUArchState {
>>>      target_ulong gpr[32];
>>>      target_ulong gprh[32]; /* 64 top bits of the 128-bit registers */
>>> @@ -386,6 +408,9 @@ struct CPUArchState {
>>>      target_ulong mhpmeventh_val[RV_MAX_MHPMEVENTS];
>>> 
>>>      PMUFixedCtrState pmu_fixed_ctrs[2];
>>> +    PMUEventInfo *pmu_events;
>>> +    PMUEventFunc pmu_efuncs;
>>> +    int num_pmu_events;
>>> 
>>>      target_ulong sscratch;
>>>      target_ulong mscratch;
Atish Kumar Patra Oct. 22, 2024, 12:58 p.m. UTC | #4
On Mon, Oct 21, 2024 at 6:45 AM Aleksei Filippov
<alexei.filippov@syntacore.com> wrote:
>
>
>
> > On 11 Oct 2024, at 23:45, Atish Kumar Patra <atishp@rivosinc.com> wrote:
> >
> > On Thu, Oct 10, 2024 at 5:44 AM Alexei Filippov
> > <alexei.filippov@yadro.com> wrote:
> >>
> >>
> >>
> >> On 10.10.2024 02:09, Atish Patra wrote:
> >>> Signed-off-by: Atish Patra <atishp@rivosinc.com>
> >>> ---
> >>>  target/riscv/cpu.h | 25 +++++++++++++++++++++++++
> >>>  1 file changed, 25 insertions(+)
> >>>
> >>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> >>> index 2ac391a7cf74..53426710f73e 100644
> >>> --- a/target/riscv/cpu.h
> >>> +++ b/target/riscv/cpu.h
> >>> @@ -189,6 +189,28 @@ typedef struct PMUFixedCtrState {
> >>>          uint64_t counter_virt_prev[2];
> >>>  } PMUFixedCtrState;
> >>>
> >>> +typedef uint64_t (*PMU_EVENT_CYCLE_FUNC)(RISCVCPU *);
> >>> +typedef uint64_t (*PMU_EVENT_INSTRET_FUNC)(RISCVCPU *);
> >>> +typedef uint64_t (*PMU_EVENT_TLB_FUNC)(RISCVCPU *, MMUAccessType access_type);
> >>> +
> >>> +typedef struct PMUEventInfo {
> >>> +    /* Event ID (BIT [0:55] valid) */
> >>> +    uint64_t event_id;
> >>> +    /* Supported hpmcounters for this event */
> >>> +    uint32_t counter_mask;
> >>> +    /* Bitmask of valid event bits */
> >>> +    uint64_t event_mask;
> >>> +} PMUEventInfo;
> >>> +
> >>> +typedef struct PMUEventFunc {
> >>> +    /* Get the ID of the event that can monitor cycles */
> >>> +    PMU_EVENT_CYCLE_FUNC get_cycle_id;
> >>> +    /* Get the ID of the event that can monitor cycles */
> >>> +    PMU_EVENT_INSTRET_FUNC get_intstret_id;
> >>> +    /* Get the ID of the event that can monitor TLB events*/
> >>> +    PMU_EVENT_TLB_FUNC get_tlb_access_id;
> >> Ok, this is kinda interesting decision, but it's not scalable. AFAIU
> >
> > Yes it is not scalable if there is a need to scale as mentioned earlier.
> > Do you have any other events that can be emulated in Qemu ?
> >
> > Having said that, I am okay with single call back though but not too sure
> > about read/write callback unless there is a use case to support those.
> >
> >> none spec provide us full enum of existing events. So anytime when
> >> somebody will try to implement their own pmu events they would have to
> >> add additional callbacks, and this structure never will be fulled
> >> properly. And then we ended up with structure 1000+ callback with only 5
> >> machines wich supports pmu events. I suggest my approach with only
> >> read/write callbacks, where machine can decide by itself how to handle
> >> any of machine specific events.
> >
> > Lot of these events are microarchitectural events which can't be
> > supported in Qemu.
> > I don't think it's a good idea at all to add dummy values for all the
> > events defined in a platform
> > which is harder to debug for a user.
>
> Yes, you're right that the rest of the events are microarchitectural and that they can't be properly simulated in QEMU at the moment, but it seems to me that's not really the point here. The point is how elastic this solution can be - that is, whether to do any events or not and how exactly they should be counted should be decided by the vendor of a particular machine, and not by the simulator in general. Plus, I have a very real use case where it will come in handy - debugging perf. Support the possibility of simulating events on QEMU side will make the life of t perf folks much easier. I do not insist specifically on my implementation of this solution, but I think that the solution with the creation of a callback for each of the events will significantly complicate the porting of the PMU for machine vendors.
> >

As I mentioned in other threads, I am absolutely okay with single call
backs. So Let's do that. However, I am not in favor of adding
microarchitectural events that we can't support in Qemu with
completely bogus data. Debugging perf in Qemu can be easily done with
the current set of events supported. Those are not the most accurate
but it's a representation of what Qemu is running. Do you foresee any
debugging issue if we don't support all the events a platform
advertises ?

> >
> >>> +} PMUEventFunc;
> >>> +
> >>>  struct CPUArchState {
> >>>      target_ulong gpr[32];
> >>>      target_ulong gprh[32]; /* 64 top bits of the 128-bit registers */
> >>> @@ -386,6 +408,9 @@ struct CPUArchState {
> >>>      target_ulong mhpmeventh_val[RV_MAX_MHPMEVENTS];
> >>>
> >>>      PMUFixedCtrState pmu_fixed_ctrs[2];
> >>> +    PMUEventInfo *pmu_events;
> >>> +    PMUEventFunc pmu_efuncs;
> >>> +    int num_pmu_events;
> >>>
> >>>      target_ulong sscratch;
> >>>      target_ulong mscratch;
>
>
diff mbox series

Patch

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 2ac391a7cf74..53426710f73e 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -189,6 +189,28 @@  typedef struct PMUFixedCtrState {
         uint64_t counter_virt_prev[2];
 } PMUFixedCtrState;
 
+typedef uint64_t (*PMU_EVENT_CYCLE_FUNC)(RISCVCPU *);
+typedef uint64_t (*PMU_EVENT_INSTRET_FUNC)(RISCVCPU *);
+typedef uint64_t (*PMU_EVENT_TLB_FUNC)(RISCVCPU *, MMUAccessType access_type);
+
+typedef struct PMUEventInfo {
+    /* Event ID (BIT [0:55] valid) */
+    uint64_t event_id;
+    /* Supported hpmcounters for this event */
+    uint32_t counter_mask;
+    /* Bitmask of valid event bits */
+    uint64_t event_mask;
+} PMUEventInfo;
+
+typedef struct PMUEventFunc {
+    /* Get the ID of the event that can monitor cycles */
+    PMU_EVENT_CYCLE_FUNC get_cycle_id;
+    /* Get the ID of the event that can monitor cycles */
+    PMU_EVENT_INSTRET_FUNC get_intstret_id;
+    /* Get the ID of the event that can monitor TLB events*/
+    PMU_EVENT_TLB_FUNC get_tlb_access_id;
+} PMUEventFunc;
+
 struct CPUArchState {
     target_ulong gpr[32];
     target_ulong gprh[32]; /* 64 top bits of the 128-bit registers */
@@ -386,6 +408,9 @@  struct CPUArchState {
     target_ulong mhpmeventh_val[RV_MAX_MHPMEVENTS];
 
     PMUFixedCtrState pmu_fixed_ctrs[2];
+    PMUEventInfo *pmu_events;
+    PMUEventFunc pmu_efuncs;
+    int num_pmu_events;
 
     target_ulong sscratch;
     target_ulong mscratch;