diff mbox

drm/i915/pmu: Work around compiler warnings on some kernel configs

Message ID 20180314080535.17490-1-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin March 14, 2018, 8:05 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Arnd Bergman reports:
"""
The conditional spinlock confuses gcc into thinking the 'flags' value
might contain uninitialized data:

drivers/gpu/drm/i915/i915_pmu.c: In function '__i915_pmu_event_read':
arch/x86/include/asm/paravirt_types.h:573:3: error: 'flags' may be used uninitialized in this function [-Werror=maybe-uninitialized]

The code is correct, but it's easy to see how the compiler gets confused
here. This avoids the problem by pulling the lock outside of the function
into its only caller.
"""

On deeper look it seems this is caused by paravirt spinlocks
implementation when CONFIG_PARAVIRT_DEBUG is set, which by being
complicated, manages to convince gcc locked parameter can be changed
externally (impossible).

Work around it by removing the conditional locking parameters altogether.
(It was never the most elegant code anyway.)

Slight penalty we now pay is an additional irqsave spin lock/unlock cycle
on the event enable path. But since enable is not a fast path, that is
preferrable to the alternative solution which was doing MMIO under irqsave
spinlock.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 1fe699e30113 ("drm/i915/pmu: Fix sleep under atomic in RC6 readout")
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: David Airlie <airlied@linux.ie>
Cc: intel-gfx@lists.freedesktop.org
Cc: dri-devel@lists.freedesktop.org
---
 drivers/gpu/drm/i915/i915_pmu.c | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

Comments

Chris Wilson March 14, 2018, 11:02 a.m. UTC | #1
Quoting Tvrtko Ursulin (2018-03-14 08:05:35)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Arnd Bergman reports:
> """
> The conditional spinlock confuses gcc into thinking the 'flags' value
> might contain uninitialized data:
> 
> drivers/gpu/drm/i915/i915_pmu.c: In function '__i915_pmu_event_read':
> arch/x86/include/asm/paravirt_types.h:573:3: error: 'flags' may be used uninitialized in this function [-Werror=maybe-uninitialized]
> 
> The code is correct, but it's easy to see how the compiler gets confused
> here. This avoids the problem by pulling the lock outside of the function
> into its only caller.
> """
> 
> On deeper look it seems this is caused by paravirt spinlocks
> implementation when CONFIG_PARAVIRT_DEBUG is set, which by being
> complicated, manages to convince gcc locked parameter can be changed
> externally (impossible).
> 
> Work around it by removing the conditional locking parameters altogether.
> (It was never the most elegant code anyway.)
> 
> Slight penalty we now pay is an additional irqsave spin lock/unlock cycle
> on the event enable path. But since enable is not a fast path, that is
> preferrable to the alternative solution which was doing MMIO under irqsave
> spinlock.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Reported-by: Arnd Bergmann <arnd@arndb.de>
> Fixes: 1fe699e30113 ("drm/i915/pmu: Fix sleep under atomic in RC6 readout")
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Imre Deak <imre.deak@intel.com>
> Cc: Jani Nikula <jani.nikula@linux.intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: David Airlie <airlied@linux.ie>
> Cc: intel-gfx@lists.freedesktop.org
> Cc: dri-devel@lists.freedesktop.org
> ---
>  drivers/gpu/drm/i915/i915_pmu.c | 32 +++++++++++++-------------------
>  1 file changed, 13 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index 4bc7aefa9541..11fb76bd3860 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -412,7 +412,7 @@ static u64 __get_rc6(struct drm_i915_private *i915)
>         return val;
>  }
>  
> -static u64 get_rc6(struct drm_i915_private *i915, bool locked)
> +static u64 get_rc6(struct drm_i915_private *i915)
>  {
>  #if IS_ENABLED(CONFIG_PM)
>         unsigned long flags;
> @@ -428,8 +428,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
>                  * previously.
>                  */
>  
> -               if (!locked)
> -                       spin_lock_irqsave(&i915->pmu.lock, flags);
> +               spin_lock_irqsave(&i915->pmu.lock, flags);
>  
>                 if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
>                         i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
> @@ -438,12 +437,10 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
>                         val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
>                 }
>  
> -               if (!locked)
> -                       spin_unlock_irqrestore(&i915->pmu.lock, flags);
> +               spin_unlock_irqrestore(&i915->pmu.lock, flags);
>         } else {
>                 struct pci_dev *pdev = i915->drm.pdev;
>                 struct device *kdev = &pdev->dev;
> -               unsigned long flags2;
>  
>                 /*
>                  * We are runtime suspended.
> @@ -452,10 +449,8 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
>                  * on top of the last known real value, as the approximated RC6
>                  * counter value.
>                  */
> -               if (!locked)
> -                       spin_lock_irqsave(&i915->pmu.lock, flags);
> -
> -               spin_lock_irqsave(&kdev->power.lock, flags2);
> +               spin_lock_irqsave(&i915->pmu.lock, flags);
> +               spin_lock(&kdev->power.lock);
>  
>                 if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
>                         i915->pmu.suspended_jiffies_last =
> @@ -465,14 +460,13 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
>                       i915->pmu.suspended_jiffies_last;
>                 val += jiffies - kdev->power.accounting_timestamp;
>  
> -               spin_unlock_irqrestore(&kdev->power.lock, flags2);
> +               spin_unlock(&kdev->power.lock);
>  
>                 val = jiffies_to_nsecs(val);
>                 val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
>                 i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
>  
> -               if (!locked)
> -                       spin_unlock_irqrestore(&i915->pmu.lock, flags);
> +               spin_unlock_irqrestore(&i915->pmu.lock, flags);
>         }
>  
>         return val;
> @@ -481,7 +475,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked)
>  #endif
>  }
>  
> -static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
> +static u64 __i915_pmu_event_read(struct perf_event *event)
>  {
>         struct drm_i915_private *i915 =
>                 container_of(event->pmu, typeof(*i915), pmu.base);
> @@ -519,7 +513,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
>                         val = count_interrupts(i915);
>                         break;
>                 case I915_PMU_RC6_RESIDENCY:
> -                       val = get_rc6(i915, locked);
> +                       val = get_rc6(i915);
>                         break;
>                 }
>         }
> @@ -534,7 +528,7 @@ static void i915_pmu_event_read(struct perf_event *event)
>  
>  again:
>         prev = local64_read(&hwc->prev_count);
> -       new = __i915_pmu_event_read(event, false);
> +       new = __i915_pmu_event_read(event);
>  
>         if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
>                 goto again;
> @@ -584,14 +578,14 @@ static void i915_pmu_enable(struct perf_event *event)
>                 engine->pmu.enable_count[sample]++;
>         }
>  
> +       spin_unlock_irqrestore(&i915->pmu.lock, flags);
> +
>         /*
>          * Store the current counter value so we can report the correct delta
>          * for all listeners. Even when the event was already enabled and has
>          * an existing non-zero value.
>          */
> -       local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true));
> -
> -       spin_unlock_irqrestore(&i915->pmu.lock, flags);
> +       local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
>  }

Ok, the only question then is whether pmu_enable/pmu_disable is
externally serialised. Afaict, that is true through serialisation of
event->state.

Hmm, actually instead of doing local64_set() here, it would be symmetric
with i915_pmu_event_stop() if it was done in i915_pmu_event_start():

i915_pmu_event_start {
	i915_pmu_enable(event);
	/* ... */
	i915_pmu_event_read(event);
	event->hw.state = 0;
}

i915_pmu_event_stop {
	if (flags & UPDATE)
		i915_pmu_event_read(event);
	i915_pmu_disable(event);
	event->hw.state = STOPPED;
}

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Can you follow up with that adjustment for balance, if it suits you?
-Chris
Tvrtko Ursulin March 14, 2018, 5:57 p.m. UTC | #2
On 14/03/2018 08:31, Patchwork wrote:
> == Series Details ==
> 
> Series: drm/i915/pmu: Work around compiler warnings on some kernel configs
> URL   : https://patchwork.freedesktop.org/series/39939/
> State : success
> 
> == Summary ==
> 
> Series 39939v1 drm/i915/pmu: Work around compiler warnings on some kernel configs
> https://patchwork.freedesktop.org/api/1.0/series/39939/revisions/1/mbox/
> 
> ---- Known issues:
> 
> Test gem_mmap_gtt:
>          Subgroup basic-small-bo-tiledx:
>                  fail       -> PASS       (fi-gdg-551) fdo#102575
> Test kms_pipe_crc_basic:
>          Subgroup suspend-read-crc-pipe-b:
>                  pass       -> INCOMPLETE (fi-snb-2520m) fdo#103713
> 
> fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575
> fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713
> 
> fi-bdw-5557u     total:288  pass:267  dwarn:0   dfail:0   fail:0   skip:21  time:431s
> fi-bdw-gvtdvm    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:439s
> fi-blb-e6850     total:288  pass:223  dwarn:1   dfail:0   fail:0   skip:64  time:382s
> fi-bsw-n3050     total:288  pass:242  dwarn:0   dfail:0   fail:0   skip:46  time:534s
> fi-bwr-2160      total:288  pass:183  dwarn:0   dfail:0   fail:0   skip:105 time:301s
> fi-bxt-dsi       total:288  pass:258  dwarn:0   dfail:0   fail:0   skip:30  time:504s
> fi-bxt-j4205     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:513s
> fi-byt-j1900     total:288  pass:253  dwarn:0   dfail:0   fail:0   skip:35  time:507s
> fi-byt-n2820     total:288  pass:249  dwarn:0   dfail:0   fail:0   skip:39  time:498s
> fi-cfl-8700k     total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:410s
> fi-cfl-s2        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:587s
> fi-cfl-u         total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:511s
> fi-cnl-y3        total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:585s
> fi-elk-e7500     total:288  pass:229  dwarn:0   dfail:0   fail:0   skip:59  time:420s
> fi-gdg-551       total:288  pass:180  dwarn:0   dfail:0   fail:0   skip:108 time:316s
> fi-glk-1         total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:530s
> fi-hsw-4770      total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:401s
> fi-ilk-650       total:288  pass:228  dwarn:0   dfail:0   fail:0   skip:60  time:418s
> fi-ivb-3520m     total:288  pass:259  dwarn:0   dfail:0   fail:0   skip:29  time:477s
> fi-ivb-3770      total:288  pass:255  dwarn:0   dfail:0   fail:0   skip:33  time:430s
> fi-kbl-7500u     total:288  pass:263  dwarn:1   dfail:0   fail:0   skip:24  time:474s
> fi-kbl-7567u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:470s
> fi-kbl-r         total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:513s
> fi-skl-6260u     total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:441s
> fi-skl-6600u     total:288  pass:261  dwarn:0   dfail:0   fail:0   skip:27  time:529s
> fi-skl-6700hq    total:288  pass:262  dwarn:0   dfail:0   fail:0   skip:26  time:539s
> fi-skl-6700k2    total:288  pass:264  dwarn:0   dfail:0   fail:0   skip:24  time:512s
> fi-skl-6770hq    total:288  pass:268  dwarn:0   dfail:0   fail:0   skip:20  time:498s
> fi-skl-guc       total:288  pass:260  dwarn:0   dfail:0   fail:0   skip:28  time:428s
> fi-skl-gvtdvm    total:288  pass:265  dwarn:0   dfail:0   fail:0   skip:23  time:437s
> fi-snb-2520m     total:245  pass:211  dwarn:0   dfail:0   fail:0   skip:33
> fi-snb-2600      total:288  pass:248  dwarn:0   dfail:0   fail:0   skip:40  time:398s
> Blacklisted hosts:
> fi-cnl-drrs      total:288  pass:257  dwarn:3   dfail:0   fail:0   skip:28  time:517s
> 
> 613eb885b69e808a46f11125870e47b67a326d76 drm-tip: 2018y-03m-14d-05h-56m-23s UTC integration manifest
> 34462fc0bee3 drm/i915/pmu: Work around compiler warnings on some kernel configs

Pushed it, thanks for the review!

(And I forgot to copy Arnd on the patch..)

So Arnd, sorry, I forgot Reported-by does not add Cc from git 
send-email. https://patchwork.freedesktop.org/series/39939/ is my 
version of the fix for this. (Now merged.) Hopefully it works for your 
randconfigs as well and thanks for sending a patch in the first place!

Regards,

Tvrtko
Arnd Bergmann March 14, 2018, 8:08 p.m. UTC | #3
On Wed, Mar 14, 2018 at 6:57 PM, Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
> On 14/03/2018 08:31, Patchwork wrote:

>
> Pushed it, thanks for the review!
>
> (And I forgot to copy Arnd on the patch..)
>
> So Arnd, sorry, I forgot Reported-by does not add Cc from git send-email.
> https://patchwork.freedesktop.org/series/39939/ is my version of the fix for
> this. (Now merged.) Hopefully it works for your randconfigs as well and
> thanks for sending a patch in the first place!

The patch looks good to me, thanks for the follow-up

Acked-by: Arnd Bergmann <arnd@arndb.de>

I didn't test it, but you'll hear from me if it breaks again.

One comment about the use of spin_lock_irqsave(), you wrote:

"Slight penalty we now pay is an additional irqsave spin lock/unlock cycle
on the event enable path. But since enable is not a fast path, that is
preferrable to the alternative solution which was doing MMIO under irqsave
spinlock."

While I don't know about the exact cost on x86, on many architectures
spin_lock_irqsave()/spin_unlock_irqrestore() is much more expensive
than a plain spin_lock()/spin_unlock() or spin_lock_irq()/spin_unlock_irq()
pair that doesn't have to store the disabled-state.

I see you already removed the inner irqsave()/irqrestore() pair that
is now useless (I failed to notice that in my original patch), but in my
experience, it's usually possible to do the same for many others
as well after proving that a function is always called with IRQs enabled
or always disabled.

     Arnd
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 4bc7aefa9541..11fb76bd3860 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -412,7 +412,7 @@  static u64 __get_rc6(struct drm_i915_private *i915)
 	return val;
 }
 
-static u64 get_rc6(struct drm_i915_private *i915, bool locked)
+static u64 get_rc6(struct drm_i915_private *i915)
 {
 #if IS_ENABLED(CONFIG_PM)
 	unsigned long flags;
@@ -428,8 +428,7 @@  static u64 get_rc6(struct drm_i915_private *i915, bool locked)
 		 * previously.
 		 */
 
-		if (!locked)
-			spin_lock_irqsave(&i915->pmu.lock, flags);
+		spin_lock_irqsave(&i915->pmu.lock, flags);
 
 		if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
 			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
@@ -438,12 +437,10 @@  static u64 get_rc6(struct drm_i915_private *i915, bool locked)
 			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
 		}
 
-		if (!locked)
-			spin_unlock_irqrestore(&i915->pmu.lock, flags);
+		spin_unlock_irqrestore(&i915->pmu.lock, flags);
 	} else {
 		struct pci_dev *pdev = i915->drm.pdev;
 		struct device *kdev = &pdev->dev;
-		unsigned long flags2;
 
 		/*
 		 * We are runtime suspended.
@@ -452,10 +449,8 @@  static u64 get_rc6(struct drm_i915_private *i915, bool locked)
 		 * on top of the last known real value, as the approximated RC6
 		 * counter value.
 		 */
-		if (!locked)
-			spin_lock_irqsave(&i915->pmu.lock, flags);
-
-		spin_lock_irqsave(&kdev->power.lock, flags2);
+		spin_lock_irqsave(&i915->pmu.lock, flags);
+		spin_lock(&kdev->power.lock);
 
 		if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
 			i915->pmu.suspended_jiffies_last =
@@ -465,14 +460,13 @@  static u64 get_rc6(struct drm_i915_private *i915, bool locked)
 		      i915->pmu.suspended_jiffies_last;
 		val += jiffies - kdev->power.accounting_timestamp;
 
-		spin_unlock_irqrestore(&kdev->power.lock, flags2);
+		spin_unlock(&kdev->power.lock);
 
 		val = jiffies_to_nsecs(val);
 		val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
 		i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
 
-		if (!locked)
-			spin_unlock_irqrestore(&i915->pmu.lock, flags);
+		spin_unlock_irqrestore(&i915->pmu.lock, flags);
 	}
 
 	return val;
@@ -481,7 +475,7 @@  static u64 get_rc6(struct drm_i915_private *i915, bool locked)
 #endif
 }
 
-static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
+static u64 __i915_pmu_event_read(struct perf_event *event)
 {
 	struct drm_i915_private *i915 =
 		container_of(event->pmu, typeof(*i915), pmu.base);
@@ -519,7 +513,7 @@  static u64 __i915_pmu_event_read(struct perf_event *event, bool locked)
 			val = count_interrupts(i915);
 			break;
 		case I915_PMU_RC6_RESIDENCY:
-			val = get_rc6(i915, locked);
+			val = get_rc6(i915);
 			break;
 		}
 	}
@@ -534,7 +528,7 @@  static void i915_pmu_event_read(struct perf_event *event)
 
 again:
 	prev = local64_read(&hwc->prev_count);
-	new = __i915_pmu_event_read(event, false);
+	new = __i915_pmu_event_read(event);
 
 	if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev)
 		goto again;
@@ -584,14 +578,14 @@  static void i915_pmu_enable(struct perf_event *event)
 		engine->pmu.enable_count[sample]++;
 	}
 
+	spin_unlock_irqrestore(&i915->pmu.lock, flags);
+
 	/*
 	 * Store the current counter value so we can report the correct delta
 	 * for all listeners. Even when the event was already enabled and has
 	 * an existing non-zero value.
 	 */
-	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true));
-
-	spin_unlock_irqrestore(&i915->pmu.lock, flags);
+	local64_set(&event->hw.prev_count, __i915_pmu_event_read(event));
 }
 
 static void i915_pmu_disable(struct perf_event *event)