diff mbox series

[14/30] drm/i915/pmu: Replace open coded kstat_irqs() copy

Message ID 20201210194043.957046529@linutronix.de (mailing list archive)
State New, archived
Headers show
Series genirq: Treewide hunt for irq descriptor abuse and assorted fixes | expand

Commit Message

Thomas Gleixner Dec. 10, 2020, 7:25 p.m. UTC
Driver code has no business with the internals of the irq descriptor.

Aside of that the count is per interrupt line and therefore takes
interrupts from other devices into account which share the interrupt line
and are not handled by the graphics driver.

Replace it with a pmu private count which only counts interrupts which
originate from the graphics card.

To avoid atomics or heuristics of some sort make the counter field
'unsigned long'. That limits the count to 4e9 on 32bit which is a lot and
postprocessing can easily deal with the occasional wraparound.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
---
 drivers/gpu/drm/i915/i915_irq.c |   34 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_pmu.c |   18 +-----------------
 drivers/gpu/drm/i915/i915_pmu.h |    8 ++++++++
 3 files changed, 43 insertions(+), 17 deletions(-)

Comments

Jani Nikula Dec. 11, 2020, 9:54 a.m. UTC | #1
On Thu, 10 Dec 2020, Thomas Gleixner <tglx@linutronix.de> wrote:
> Driver code has no business with the internals of the irq descriptor.
>
> Aside of that the count is per interrupt line and therefore takes
> interrupts from other devices into account which share the interrupt line
> and are not handled by the graphics driver.
>
> Replace it with a pmu private count which only counts interrupts which
> originate from the graphics card.
>
> To avoid atomics or heuristics of some sort make the counter field
> 'unsigned long'. That limits the count to 4e9 on 32bit which is a lot and
> postprocessing can easily deal with the occasional wraparound.

I'll let Tvrtko and Chris review the substance here, but assuming they
don't object,

Acked-by: Jani Nikula <jani.nikula@intel.com>

for merging via whichever tree makes most sense.

>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Jani Nikula <jani.nikula@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: David Airlie <airlied@linux.ie>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Cc: intel-gfx@lists.freedesktop.org
> Cc: dri-devel@lists.freedesktop.org
> ---
>  drivers/gpu/drm/i915/i915_irq.c |   34 ++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_pmu.c |   18 +-----------------
>  drivers/gpu/drm/i915/i915_pmu.h |    8 ++++++++
>  3 files changed, 43 insertions(+), 17 deletions(-)
>
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -60,6 +60,24 @@
>   * and related files, but that will be described in separate chapters.
>   */
>  
> +/*
> + * Interrupt statistic for PMU. Increments the counter only if the
> + * interrupt originated from the the GPU so interrupts from a device which
> + * shares the interrupt line are not accounted.
> + */
> +static inline void pmu_irq_stats(struct drm_i915_private *priv,
> +				 irqreturn_t res)
> +{
> +	if (unlikely(res != IRQ_HANDLED))
> +		return;
> +
> +	/*
> +	 * A clever compiler translates that into INC. A not so clever one
> +	 * should at least prevent store tearing.
> +	 */
> +	WRITE_ONCE(priv->pmu.irq_count, priv->pmu.irq_count + 1);
> +}
> +
>  typedef bool (*long_pulse_detect_func)(enum hpd_pin pin, u32 val);
>  
>  static const u32 hpd_ilk[HPD_NUM_PINS] = {
> @@ -1599,6 +1617,8 @@ static irqreturn_t valleyview_irq_handle
>  		valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
>  	} while (0);
>  
> +	pmu_irq_stats(dev_priv, ret);
> +
>  	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>  
>  	return ret;
> @@ -1676,6 +1696,8 @@ static irqreturn_t cherryview_irq_handle
>  		valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
>  	} while (0);
>  
> +	pmu_irq_stats(dev_priv, ret);
> +
>  	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>  
>  	return ret;
> @@ -2103,6 +2125,8 @@ static irqreturn_t ilk_irq_handler(int i
>  	if (sde_ier)
>  		raw_reg_write(regs, SDEIER, sde_ier);
>  
> +	pmu_irq_stats(i915, ret);
> +
>  	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
>  	enable_rpm_wakeref_asserts(&i915->runtime_pm);
>  
> @@ -2419,6 +2443,8 @@ static irqreturn_t gen8_irq_handler(int
>  
>  	gen8_master_intr_enable(regs);
>  
> +	pmu_irq_stats(dev_priv, IRQ_HANDLED);
> +
>  	return IRQ_HANDLED;
>  }
>  
> @@ -2514,6 +2540,8 @@ static __always_inline irqreturn_t
>  
>  	gen11_gu_misc_irq_handler(gt, gu_misc_iir);
>  
> +	pmu_irq_stats(i915, IRQ_HANDLED);
> +
>  	return IRQ_HANDLED;
>  }
>  
> @@ -3688,6 +3716,8 @@ static irqreturn_t i8xx_irq_handler(int
>  		i8xx_pipestat_irq_handler(dev_priv, iir, pipe_stats);
>  	} while (0);
>  
> +	pmu_irq_stats(dev_priv, ret);
> +
>  	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>  
>  	return ret;
> @@ -3796,6 +3826,8 @@ static irqreturn_t i915_irq_handler(int
>  		i915_pipestat_irq_handler(dev_priv, iir, pipe_stats);
>  	} while (0);
>  
> +	pmu_irq_stats(dev_priv, ret);
> +
>  	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>  
>  	return ret;
> @@ -3941,6 +3973,8 @@ static irqreturn_t i965_irq_handler(int
>  		i965_pipestat_irq_handler(dev_priv, iir, pipe_stats);
>  	} while (0);
>  
> +	pmu_irq_stats(dev_priv, IRQ_HANDLED);
> +
>  	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>  
>  	return ret;
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -423,22 +423,6 @@ static enum hrtimer_restart i915_sample(
>  	return HRTIMER_RESTART;
>  }
>  
> -static u64 count_interrupts(struct drm_i915_private *i915)
> -{
> -	/* open-coded kstat_irqs() */
> -	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
> -	u64 sum = 0;
> -	int cpu;
> -
> -	if (!desc || !desc->kstat_irqs)
> -		return 0;
> -
> -	for_each_possible_cpu(cpu)
> -		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
> -
> -	return sum;
> -}
> -
>  static void i915_pmu_event_destroy(struct perf_event *event)
>  {
>  	struct drm_i915_private *i915 =
> @@ -581,7 +565,7 @@ static u64 __i915_pmu_event_read(struct
>  				   USEC_PER_SEC /* to MHz */);
>  			break;
>  		case I915_PMU_INTERRUPTS:
> -			val = count_interrupts(i915);
> +			val = READ_ONCE(pmu->irq_count);
>  			break;
>  		case I915_PMU_RC6_RESIDENCY:
>  			val = get_rc6(&i915->gt);
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -108,6 +108,14 @@ struct i915_pmu {
>  	 */
>  	ktime_t sleep_last;
>  	/**
> +	 * @irq_count: Number of interrupts
> +	 *
> +	 * Intentionally unsigned long to avoid atomics or heuristics on 32bit.
> +	 * 4e9 interrupts are a lot and postprocessing can really deal with an
> +	 * occasional wraparound easily. It's 32bit after all.
> +	 */
> +	unsigned long irq_count;
> +	/**
>  	 * @events_attr_group: Device events attribute group.
>  	 */
>  	struct attribute_group events_attr_group;
>
Tvrtko Ursulin Dec. 11, 2020, 10:13 a.m. UTC | #2
On 10/12/2020 19:25, Thomas Gleixner wrote:
> Driver code has no business with the internals of the irq descriptor.
> 
> Aside of that the count is per interrupt line and therefore takes
> interrupts from other devices into account which share the interrupt line
> and are not handled by the graphics driver.
> 
> Replace it with a pmu private count which only counts interrupts which
> originate from the graphics card.
> 
> To avoid atomics or heuristics of some sort make the counter field
> 'unsigned long'. That limits the count to 4e9 on 32bit which is a lot and
> postprocessing can easily deal with the occasional wraparound.

After my failed hasty sketch from last night I had a different one which 
was kind of heuristics based (re-reading the upper dword and retrying if 
it changed on 32-bit). But you are right - it is okay to at least start 
like this today and if later there is a need we can either do that or 
deal with wrap at PMU read time.

So thanks for dealing with it, some small comments below but overall it 
is fine.

> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Cc: Jani Nikula <jani.nikula@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: David Airlie <airlied@linux.ie>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Cc: intel-gfx@lists.freedesktop.org
> Cc: dri-devel@lists.freedesktop.org
> ---
>   drivers/gpu/drm/i915/i915_irq.c |   34 ++++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_pmu.c |   18 +-----------------
>   drivers/gpu/drm/i915/i915_pmu.h |    8 ++++++++
>   3 files changed, 43 insertions(+), 17 deletions(-)
> 
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -60,6 +60,24 @@
>    * and related files, but that will be described in separate chapters.
>    */
>   
> +/*
> + * Interrupt statistic for PMU. Increments the counter only if the
> + * interrupt originated from the the GPU so interrupts from a device which
> + * shares the interrupt line are not accounted.
> + */
> +static inline void pmu_irq_stats(struct drm_i915_private *priv,

We never use priv as a local name, it should be either i915 or dev_priv.

> +				 irqreturn_t res)
> +{
> +	if (unlikely(res != IRQ_HANDLED))
> +		return;
> +
> +	/*
> +	 * A clever compiler translates that into INC. A not so clever one
> +	 * should at least prevent store tearing.
> +	 */
> +	WRITE_ONCE(priv->pmu.irq_count, priv->pmu.irq_count + 1);

Curious, probably more educational for me - given x86_32 and x86_64, and 
the context of it getting called, what is the difference from just doing 
irq_count++?

> +}
> +
>   typedef bool (*long_pulse_detect_func)(enum hpd_pin pin, u32 val);
>   
>   static const u32 hpd_ilk[HPD_NUM_PINS] = {
> @@ -1599,6 +1617,8 @@ static irqreturn_t valleyview_irq_handle
>   		valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
>   	} while (0);
>   
> +	pmu_irq_stats(dev_priv, ret);
> +
>   	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>   
>   	return ret;
> @@ -1676,6 +1696,8 @@ static irqreturn_t cherryview_irq_handle
>   		valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
>   	} while (0);
>   
> +	pmu_irq_stats(dev_priv, ret);
> +
>   	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>   
>   	return ret;
> @@ -2103,6 +2125,8 @@ static irqreturn_t ilk_irq_handler(int i
>   	if (sde_ier)
>   		raw_reg_write(regs, SDEIER, sde_ier);
>   
> +	pmu_irq_stats(i915, ret);
> +
>   	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
>   	enable_rpm_wakeref_asserts(&i915->runtime_pm);
>   
> @@ -2419,6 +2443,8 @@ static irqreturn_t gen8_irq_handler(int
>   
>   	gen8_master_intr_enable(regs);
>   
> +	pmu_irq_stats(dev_priv, IRQ_HANDLED);
> +
>   	return IRQ_HANDLED;
>   }
>   
> @@ -2514,6 +2540,8 @@ static __always_inline irqreturn_t
>   
>   	gen11_gu_misc_irq_handler(gt, gu_misc_iir);
>   
> +	pmu_irq_stats(i915, IRQ_HANDLED);
> +
>   	return IRQ_HANDLED;
>   }
>   
> @@ -3688,6 +3716,8 @@ static irqreturn_t i8xx_irq_handler(int
>   		i8xx_pipestat_irq_handler(dev_priv, iir, pipe_stats);
>   	} while (0);
>   
> +	pmu_irq_stats(dev_priv, ret);
> +
>   	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>   
>   	return ret;
> @@ -3796,6 +3826,8 @@ static irqreturn_t i915_irq_handler(int
>   		i915_pipestat_irq_handler(dev_priv, iir, pipe_stats);
>   	} while (0);
>   
> +	pmu_irq_stats(dev_priv, ret);
> +
>   	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>   
>   	return ret;
> @@ -3941,6 +3973,8 @@ static irqreturn_t i965_irq_handler(int
>   		i965_pipestat_irq_handler(dev_priv, iir, pipe_stats);
>   	} while (0);
>   
> +	pmu_irq_stats(dev_priv, IRQ_HANDLED);
> +
>   	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
>   
>   	return ret;
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -423,22 +423,6 @@ static enum hrtimer_restart i915_sample(
>   	return HRTIMER_RESTART;
>   }

In this file you can also drop the #include <linux/irq.h> line.

>   
> -static u64 count_interrupts(struct drm_i915_private *i915)
> -{
> -	/* open-coded kstat_irqs() */
> -	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
> -	u64 sum = 0;
> -	int cpu;
> -
> -	if (!desc || !desc->kstat_irqs)
> -		return 0;
> -
> -	for_each_possible_cpu(cpu)
> -		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
> -
> -	return sum;
> -}
> -
>   static void i915_pmu_event_destroy(struct perf_event *event)
>   {
>   	struct drm_i915_private *i915 =
> @@ -581,7 +565,7 @@ static u64 __i915_pmu_event_read(struct
>   				   USEC_PER_SEC /* to MHz */);
>   			break;
>   		case I915_PMU_INTERRUPTS:
> -			val = count_interrupts(i915);
> +			val = READ_ONCE(pmu->irq_count);

I guess same curiosity about READ_ONCE like in the increment site.

>   			break;
>   		case I915_PMU_RC6_RESIDENCY:
>   			val = get_rc6(&i915->gt);
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -108,6 +108,14 @@ struct i915_pmu {
>   	 */
>   	ktime_t sleep_last;
>   	/**
> +	 * @irq_count: Number of interrupts
> +	 *
> +	 * Intentionally unsigned long to avoid atomics or heuristics on 32bit.
> +	 * 4e9 interrupts are a lot and postprocessing can really deal with an
> +	 * occasional wraparound easily. It's 32bit after all.
> +	 */
> +	unsigned long irq_count;
> +	/**
>   	 * @events_attr_group: Device events attribute group.
>   	 */
>   	struct attribute_group events_attr_group;
> 

Regards,

Tvrtko
Thomas Gleixner Dec. 11, 2020, 12:57 p.m. UTC | #3
On Fri, Dec 11 2020 at 10:13, Tvrtko Ursulin wrote:
> On 10/12/2020 19:25, Thomas Gleixner wrote:

>> 
>> Aside of that the count is per interrupt line and therefore takes
>> interrupts from other devices into account which share the interrupt line
>> and are not handled by the graphics driver.
>> 
>> Replace it with a pmu private count which only counts interrupts which
>> originate from the graphics card.
>> 
>> To avoid atomics or heuristics of some sort make the counter field
>> 'unsigned long'. That limits the count to 4e9 on 32bit which is a lot and
>> postprocessing can easily deal with the occasional wraparound.
>
> After my failed hasty sketch from last night I had a different one which 
> was kind of heuristics based (re-reading the upper dword and retrying if 
> it changed on 32-bit).

The problem is that there will be two seperate modifications for the low
and high word. Several ways how the compiler can translate this, but the
problem is the same for all of them:

CPU 0                           CPU 1
        load low
        load high
        add  low, 1
        addc high, 0            
        store low               load high
--> NMI                         load low
                                load high and compare
        store high

You can't catch that. If this really becomes an issue you need a
sequence counter around it.
      

> But you are right - it is okay to at least start 
> like this today and if later there is a need we can either do that or 
> deal with wrap at PMU read time.

Right.

>> +/*
>> + * Interrupt statistic for PMU. Increments the counter only if the
>> + * interrupt originated from the the GPU so interrupts from a device which
>> + * shares the interrupt line are not accounted.
>> + */
>> +static inline void pmu_irq_stats(struct drm_i915_private *priv,
>
> We never use priv as a local name, it should be either i915 or
> dev_priv.

Sure, will fix.

>> +	/*
>> +	 * A clever compiler translates that into INC. A not so clever one
>> +	 * should at least prevent store tearing.
>> +	 */
>> +	WRITE_ONCE(priv->pmu.irq_count, priv->pmu.irq_count + 1);
>
> Curious, probably more educational for me - given x86_32 and x86_64, and 
> the context of it getting called, what is the difference from just doing 
> irq_count++?

Several reasons:

    1) The compiler can pretty much do what it wants with cnt++
       including tearing and whatever. https://lwn.net/Articles/816850/
       for the full set of insanities.

       Not really a problem here, but

    2) It's annotating the reader and the writer side and documenting
       that this is subject to concurrency

    3) It will prevent KCSAN to complain about the data race,
       i.e. concurrent modification while reading.

Thanks,

        tglx

>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>> @@ -423,22 +423,6 @@ static enum hrtimer_restart i915_sample(
>>   	return HRTIMER_RESTART;
>>   }
>
> In this file you can also drop the #include <linux/irq.h> line.

Indeed.

Thanks,

        tglx
David Laight Dec. 11, 2020, 2:19 p.m. UTC | #4
From: Thomas Gleixner
> Sent: 11 December 2020 12:58
..
> > After my failed hasty sketch from last night I had a different one which
> > was kind of heuristics based (re-reading the upper dword and retrying if
> > it changed on 32-bit).
> 
> The problem is that there will be two seperate modifications for the low
> and high word. Several ways how the compiler can translate this, but the
> problem is the same for all of them:
> 
> CPU 0                           CPU 1
>         load low
>         load high
>         add  low, 1
>         addc high, 0
>         store low               load high
> --> NMI                         load low
>                                 load high and compare
>         store high
> 
> You can't catch that. If this really becomes an issue you need a
> sequence counter around it.

Or just two copies of the high word.
Provided the accesses are sequenced:
writer:
	load high:low
	add small_value,high:low
	store high
	store low
	store high_copy
reader:
	load high_copy
	load low
	load high
	if (high != high_copy)
		low = 0;

The read value is always stale, so it probably doesn't
matter that the value you have is one that is between the
value when you started and that when you finished.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Thomas Gleixner Dec. 11, 2020, 9:10 p.m. UTC | #5
On Fri, Dec 11 2020 at 14:19, David Laight wrote:
> From: Thomas Gleixner
>> You can't catch that. If this really becomes an issue you need a
>> sequence counter around it.
>
> Or just two copies of the high word.
> Provided the accesses are sequenced:
> writer:
> 	load high:low
> 	add small_value,high:low
> 	store high
> 	store low
> 	store high_copy
> reader:
> 	load high_copy
> 	load low
> 	load high
> 	if (high != high_copy)
> 		low = 0;

And low = 0 is solving what? You need to loop back and retry until it's
consistent and then it's nothing else than an open coded sequence count.

Thanks,

        tglx
David Laight Dec. 11, 2020, 10:06 p.m. UTC | #6
From: Thomas Gleixner
> Sent: 11 December 2020 21:11
> 
> On Fri, Dec 11 2020 at 14:19, David Laight wrote:
> > From: Thomas Gleixner
> >> You can't catch that. If this really becomes an issue you need a
> >> sequence counter around it.
> >
> > Or just two copies of the high word.
> > Provided the accesses are sequenced:
> > writer:
> > 	load high:low
> > 	add small_value,high:low
> > 	store high
> > 	store low
> > 	store high_copy
> > reader:
> > 	load high_copy
> > 	load low
> > 	load high
> > 	if (high != high_copy)
> > 		low = 0;
> 
> And low = 0 is solving what? You need to loop back and retry until it's
> consistent and then it's nothing else than an open coded sequence count.

If it is a counter or timestamp then the high:0 value happened
some time between when you started trying to read the value and
when you finished trying to read it.

As such it is a perfectly reasonable return value.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
diff mbox series

Patch

--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -60,6 +60,24 @@ 
  * and related files, but that will be described in separate chapters.
  */
 
+/*
+ * Interrupt statistic for PMU. Increments the counter only if the
+ * interrupt originated from the the GPU so interrupts from a device which
+ * shares the interrupt line are not accounted.
+ */
+static inline void pmu_irq_stats(struct drm_i915_private *priv,
+				 irqreturn_t res)
+{
+	if (unlikely(res != IRQ_HANDLED))
+		return;
+
+	/*
+	 * A clever compiler translates that into INC. A not so clever one
+	 * should at least prevent store tearing.
+	 */
+	WRITE_ONCE(priv->pmu.irq_count, priv->pmu.irq_count + 1);
+}
+
 typedef bool (*long_pulse_detect_func)(enum hpd_pin pin, u32 val);
 
 static const u32 hpd_ilk[HPD_NUM_PINS] = {
@@ -1599,6 +1617,8 @@  static irqreturn_t valleyview_irq_handle
 		valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
 	} while (0);
 
+	pmu_irq_stats(dev_priv, ret);
+
 	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
@@ -1676,6 +1696,8 @@  static irqreturn_t cherryview_irq_handle
 		valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
 	} while (0);
 
+	pmu_irq_stats(dev_priv, ret);
+
 	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
@@ -2103,6 +2125,8 @@  static irqreturn_t ilk_irq_handler(int i
 	if (sde_ier)
 		raw_reg_write(regs, SDEIER, sde_ier);
 
+	pmu_irq_stats(i915, ret);
+
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
 	enable_rpm_wakeref_asserts(&i915->runtime_pm);
 
@@ -2419,6 +2443,8 @@  static irqreturn_t gen8_irq_handler(int
 
 	gen8_master_intr_enable(regs);
 
+	pmu_irq_stats(dev_priv, IRQ_HANDLED);
+
 	return IRQ_HANDLED;
 }
 
@@ -2514,6 +2540,8 @@  static __always_inline irqreturn_t
 
 	gen11_gu_misc_irq_handler(gt, gu_misc_iir);
 
+	pmu_irq_stats(i915, IRQ_HANDLED);
+
 	return IRQ_HANDLED;
 }
 
@@ -3688,6 +3716,8 @@  static irqreturn_t i8xx_irq_handler(int
 		i8xx_pipestat_irq_handler(dev_priv, iir, pipe_stats);
 	} while (0);
 
+	pmu_irq_stats(dev_priv, ret);
+
 	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
@@ -3796,6 +3826,8 @@  static irqreturn_t i915_irq_handler(int
 		i915_pipestat_irq_handler(dev_priv, iir, pipe_stats);
 	} while (0);
 
+	pmu_irq_stats(dev_priv, ret);
+
 	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
@@ -3941,6 +3973,8 @@  static irqreturn_t i965_irq_handler(int
 		i965_pipestat_irq_handler(dev_priv, iir, pipe_stats);
 	} while (0);
 
+	pmu_irq_stats(dev_priv, IRQ_HANDLED);
+
 	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -423,22 +423,6 @@  static enum hrtimer_restart i915_sample(
 	return HRTIMER_RESTART;
 }
 
-static u64 count_interrupts(struct drm_i915_private *i915)
-{
-	/* open-coded kstat_irqs() */
-	struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
-	u64 sum = 0;
-	int cpu;
-
-	if (!desc || !desc->kstat_irqs)
-		return 0;
-
-	for_each_possible_cpu(cpu)
-		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
-
-	return sum;
-}
-
 static void i915_pmu_event_destroy(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
@@ -581,7 +565,7 @@  static u64 __i915_pmu_event_read(struct
 				   USEC_PER_SEC /* to MHz */);
 			break;
 		case I915_PMU_INTERRUPTS:
-			val = count_interrupts(i915);
+			val = READ_ONCE(pmu->irq_count);
 			break;
 		case I915_PMU_RC6_RESIDENCY:
 			val = get_rc6(&i915->gt);
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -108,6 +108,14 @@  struct i915_pmu {
 	 */
 	ktime_t sleep_last;
 	/**
+	 * @irq_count: Number of interrupts
+	 *
+	 * Intentionally unsigned long to avoid atomics or heuristics on 32bit.
+	 * 4e9 interrupts are a lot and postprocessing can really deal with an
+	 * occasional wraparound easily. It's 32bit after all.
+	 */
+	unsigned long irq_count;
+	/**
 	 * @events_attr_group: Device events attribute group.
 	 */
 	struct attribute_group events_attr_group;