diff mbox series

[2/3] perf/amlogic: Fix large number of counter issue

Message ID 20230209115403.521868-2-jiucheng.xu@amlogic.com (mailing list archive)
State New, archived
Headers show
Series [1/3] perf/amlogic: Fix config1/config2 parsing issue | expand

Commit Message

Jiucheng Xu Feb. 9, 2023, 11:54 a.m. UTC
When use 1ms interval, very large number of counter happens
once in a while as below:

25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/

Root cause is the race between irq handler
and pmu.read callback. Use spin lock to protect the sw&hw
counters.

Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
---
 drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

Comments

Will Deacon March 27, 2023, 2:10 p.m. UTC | #1
On Thu, Feb 09, 2023 at 07:54:02PM +0800, Jiucheng Xu wrote:
> When use 1ms interval, very large number of counter happens
> once in a while as below:
> 
> 25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> 26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> 26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> 
> Root cause is the race between irq handler
> and pmu.read callback. Use spin lock to protect the sw&hw
> counters.
> 
> Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
> ---
>  drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> index 0b24dee1ed3c..9b2e5d5c0626 100644
> --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
> +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> @@ -14,6 +14,7 @@
>  #include <linux/perf_event.h>
>  #include <linux/platform_device.h>
>  #include <linux/printk.h>
> +#include <linux/spinlock.h>
>  #include <linux/sysfs.h>
>  #include <linux/types.h>
>  
> @@ -23,6 +24,7 @@ struct ddr_pmu {
>  	struct pmu pmu;
>  	struct dmc_info info;
>  	struct dmc_counter counters;	/* save counters from hw */
> +	spinlock_t lock;		/* protect hw/sw counter */
>  	bool pmu_enabled;
>  	struct device *dev;
>  	char *name;
> @@ -92,10 +94,12 @@ static void meson_ddr_perf_event_update(struct perf_event *event)
>  	int idx;
>  	int chann_nr = pmu->info.hw_info->chann_nr;
>  
> +	spin_lock(&pmu->lock);

Why doesn't this need the _irqsave() variant if we're racing with the irq
handler?

Will
Jiucheng Xu March 28, 2023, 2:29 a.m. UTC | #2
My Amlogic email box has some issues. Use my personal email 
<jiucheng.xu@163.com> to reply.

On 2023/3/27 22:10, Will Deacon wrote:
> [ EXTERNAL EMAIL ]
>
> On Thu, Feb 09, 2023 at 07:54:02PM +0800, Jiucheng Xu wrote:
>> When use 1ms interval, very large number of counter happens
>> once in a while as below:
>>
>> 25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
>> 26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
>> 26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
>>
>> Root cause is the race between irq handler
>> and pmu.read callback. Use spin lock to protect the sw&hw
>> counters.
>>
>> Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
>> ---
>>   drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
>>   1 file changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
>> index 0b24dee1ed3c..9b2e5d5c0626 100644
>> --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
>> +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
>> @@ -14,6 +14,7 @@
>>   #include <linux/perf_event.h>
>>   #include <linux/platform_device.h>
>>   #include <linux/printk.h>
>> +#include <linux/spinlock.h>
>>   #include <linux/sysfs.h>
>>   #include <linux/types.h>
>>   
>> @@ -23,6 +24,7 @@ struct ddr_pmu {
>>   	struct pmu pmu;
>>   	struct dmc_info info;
>>   	struct dmc_counter counters;	/* save counters from hw */
>> +	spinlock_t lock;		/* protect hw/sw counter */
>>   	bool pmu_enabled;
>>   	struct device *dev;
>>   	char *name;
>> @@ -92,10 +94,12 @@ static void meson_ddr_perf_event_update(struct perf_event *event)
>>   	int idx;
>>   	int chann_nr = pmu->info.hw_info->chann_nr;
>>   
>> +	spin_lock(&pmu->lock);
> Why doesn't this need the _irqsave() variant if we're racing with the irq
> handler?
>
> Will
I think meson_ddr_perf_event_update function is called with hard irq off.
So update function couldn't be interrupted by irq handler. Right?

Thanks,
Jiucheng
>
Will Deacon March 28, 2023, 11:55 a.m. UTC | #3
On Tue, Mar 28, 2023 at 10:29:04AM +0800, Jiucheng Xu wrote:
> 
> My Amlogic email box has some issues. Use my personal email
> <jiucheng.xu@163.com> to reply.
> 
> On 2023/3/27 22:10, Will Deacon wrote:
> > [ EXTERNAL EMAIL ]
> > 
> > On Thu, Feb 09, 2023 at 07:54:02PM +0800, Jiucheng Xu wrote:
> > > When use 1ms interval, very large number of counter happens
> > > once in a while as below:
> > > 
> > > 25.968654513 281474976710655.84 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> > > 26.118657346 281474976710655.88 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> > > 26.180137180 281474976710655.66 MB meson_ddr_bw/chan_1_rw_bytes,arm=1/
> > > 
> > > Root cause is the race between irq handler
> > > and pmu.read callback. Use spin lock to protect the sw&hw
> > > counters.
> > > 
> > > Signed-off-by: Jiucheng Xu <jiucheng.xu@amlogic.com>
> > > ---
> > >   drivers/perf/amlogic/meson_ddr_pmu_core.c | 10 +++++++++-
> > >   1 file changed, 9 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> > > index 0b24dee1ed3c..9b2e5d5c0626 100644
> > > --- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
> > > +++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
> > > @@ -14,6 +14,7 @@
> > >   #include <linux/perf_event.h>
> > >   #include <linux/platform_device.h>
> > >   #include <linux/printk.h>
> > > +#include <linux/spinlock.h>
> > >   #include <linux/sysfs.h>
> > >   #include <linux/types.h>
> > > @@ -23,6 +24,7 @@ struct ddr_pmu {
> > >   	struct pmu pmu;
> > >   	struct dmc_info info;
> > >   	struct dmc_counter counters;	/* save counters from hw */
> > > +	spinlock_t lock;		/* protect hw/sw counter */
> > >   	bool pmu_enabled;
> > >   	struct device *dev;
> > >   	char *name;
> > > @@ -92,10 +94,12 @@ static void meson_ddr_perf_event_update(struct perf_event *event)
> > >   	int idx;
> > >   	int chann_nr = pmu->info.hw_info->chann_nr;
> > > +	spin_lock(&pmu->lock);
> > Why doesn't this need the _irqsave() variant if we're racing with the irq
> > handler?
> > 
> > Will
> I think meson_ddr_perf_event_update function is called with hard irq off.
> So update function couldn't be interrupted by irq handler. Right?

I'm just confused about the race, then. The commit message says you have a
race between an irq handler and a callback, which you fix with a spinlock
that isn't irq safe. So either the race is real and the lock needs to be
irqsafe, or the race is something else entirely, no?

Will
diff mbox series

Patch

diff --git a/drivers/perf/amlogic/meson_ddr_pmu_core.c b/drivers/perf/amlogic/meson_ddr_pmu_core.c
index 0b24dee1ed3c..9b2e5d5c0626 100644
--- a/drivers/perf/amlogic/meson_ddr_pmu_core.c
+++ b/drivers/perf/amlogic/meson_ddr_pmu_core.c
@@ -14,6 +14,7 @@ 
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
+#include <linux/spinlock.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
 
@@ -23,6 +24,7 @@  struct ddr_pmu {
 	struct pmu pmu;
 	struct dmc_info info;
 	struct dmc_counter counters;	/* save counters from hw */
+	spinlock_t lock;		/* protect hw/sw counter */
 	bool pmu_enabled;
 	struct device *dev;
 	char *name;
@@ -92,10 +94,12 @@  static void meson_ddr_perf_event_update(struct perf_event *event)
 	int idx;
 	int chann_nr = pmu->info.hw_info->chann_nr;
 
+	spin_lock(&pmu->lock);
 	/* get the remain counters in register. */
 	pmu->info.hw_info->get_counters(&pmu->info, &dc);
 
 	ddr_cnt_addition(&sum_dc, &pmu->counters, &dc, chann_nr);
+	spin_unlock(&pmu->lock);
 
 	switch (event->attr.config) {
 	case ALL_CHAN_COUNTER_ID:
@@ -355,6 +359,7 @@  static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
 
 	pmu = dmc_info_to_pmu(info);
 
+	spin_lock(&pmu->lock);
 	if (info->hw_info->irq_handler(info, &counters) != 0)
 		goto out;
 
@@ -372,6 +377,8 @@  static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
 		 * it in ISR to support continue mode.
 		 */
 		info->hw_info->enable(info);
+out:
+	spin_unlock(&pmu->lock);
 
 	dev_dbg(pmu->dev, "counts: %llu %llu %llu, %llu, %llu, %llu\t\t"
 			"sum: %llu %llu %llu, %llu, %llu, %llu\n",
@@ -388,7 +395,7 @@  static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
 			pmu->counters.channel_cnt[1],
 			pmu->counters.channel_cnt[2],
 			pmu->counters.channel_cnt[3]);
-out:
+
 	return IRQ_HANDLED;
 }
 
@@ -539,6 +546,7 @@  int meson_ddr_pmu_create(struct platform_device *pdev)
 	pmu->name = name;
 	pmu->dev = &pdev->dev;
 	pmu->pmu_enabled = false;
+	spin_lock_init(&pmu->lock);
 
 	platform_set_drvdata(pdev, pmu);