Message ID | 20180314080535.17490-1-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Tvrtko Ursulin (2018-03-14 08:05:35) > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Arnd Bergman reports: > """ > The conditional spinlock confuses gcc into thinking the 'flags' value > might contain uninitialized data: > > drivers/gpu/drm/i915/i915_pmu.c: In function '__i915_pmu_event_read': > arch/x86/include/asm/paravirt_types.h:573:3: error: 'flags' may be used uninitialized in this function [-Werror=maybe-uninitialized] > > The code is correct, but it's easy to see how the compiler gets confused > here. This avoids the problem by pulling the lock outside of the function > into its only caller. > """ > > On deeper look it seems this is caused by paravirt spinlocks > implementation when CONFIG_PARAVIRT_DEBUG is set, which by being > complicated, manages to convince gcc locked parameter can be changed > externally (impossible). > > Work around it by removing the conditional locking parameters altogether. > (It was never the most elegant code anyway.) > > Slight penalty we now pay is an additional irqsave spin lock/unlock cycle > on the event enable path. But since enable is not a fast path, that is > preferrable to the alternative solution which was doing MMIO under irqsave > spinlock. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Reported-by: Arnd Bergmann <arnd@arndb.de> > Fixes: 1fe699e30113 ("drm/i915/pmu: Fix sleep under atomic in RC6 readout") > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Imre Deak <imre.deak@intel.com> > Cc: Jani Nikula <jani.nikula@linux.intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> > Cc: David Airlie <airlied@linux.ie> > Cc: intel-gfx@lists.freedesktop.org > Cc: dri-devel@lists.freedesktop.org > --- > drivers/gpu/drm/i915/i915_pmu.c | 32 +++++++++++++------------------- > 1 file changed, 13 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c > index 4bc7aefa9541..11fb76bd3860 100644 > --- a/drivers/gpu/drm/i915/i915_pmu.c > +++ b/drivers/gpu/drm/i915/i915_pmu.c > @@ -412,7 +412,7 @@ static u64 __get_rc6(struct drm_i915_private *i915) > return val; > } > > -static u64 get_rc6(struct drm_i915_private *i915, bool locked) > +static u64 get_rc6(struct drm_i915_private *i915) > { > #if IS_ENABLED(CONFIG_PM) > unsigned long flags; > @@ -428,8 +428,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) > * previously. > */ > > - if (!locked) > - spin_lock_irqsave(&i915->pmu.lock, flags); > + spin_lock_irqsave(&i915->pmu.lock, flags); > > if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { > i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; > @@ -438,12 +437,10 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) > val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; > } > > - if (!locked) > - spin_unlock_irqrestore(&i915->pmu.lock, flags); > + spin_unlock_irqrestore(&i915->pmu.lock, flags); > } else { > struct pci_dev *pdev = i915->drm.pdev; > struct device *kdev = &pdev->dev; > - unsigned long flags2; > > /* > * We are runtime suspended. > @@ -452,10 +449,8 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) > * on top of the last known real value, as the approximated RC6 > * counter value. > */ > - if (!locked) > - spin_lock_irqsave(&i915->pmu.lock, flags); > - > - spin_lock_irqsave(&kdev->power.lock, flags2); > + spin_lock_irqsave(&i915->pmu.lock, flags); > + spin_lock(&kdev->power.lock); > > if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) > i915->pmu.suspended_jiffies_last = > @@ -465,14 +460,13 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) > i915->pmu.suspended_jiffies_last; > val += jiffies - kdev->power.accounting_timestamp; > > - spin_unlock_irqrestore(&kdev->power.lock, flags2); > + spin_unlock(&kdev->power.lock); > > val = jiffies_to_nsecs(val); > val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; > i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; > > - if (!locked) > - spin_unlock_irqrestore(&i915->pmu.lock, flags); > + spin_unlock_irqrestore(&i915->pmu.lock, flags); > } > > return val; > @@ -481,7 +475,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) > #endif > } > > -static u64 __i915_pmu_event_read(struct perf_event *event, bool locked) > +static u64 __i915_pmu_event_read(struct perf_event *event) > { > struct drm_i915_private *i915 = > container_of(event->pmu, typeof(*i915), pmu.base); > @@ -519,7 +513,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event, bool locked) > val = count_interrupts(i915); > break; > case I915_PMU_RC6_RESIDENCY: > - val = get_rc6(i915, locked); > + val = get_rc6(i915); > break; > } > } > @@ -534,7 +528,7 @@ static void i915_pmu_event_read(struct perf_event *event) > > again: > prev = local64_read(&hwc->prev_count); > - new = __i915_pmu_event_read(event, false); > + new = __i915_pmu_event_read(event); > > if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) > goto again; > @@ -584,14 +578,14 @@ static void i915_pmu_enable(struct perf_event *event) > engine->pmu.enable_count[sample]++; > } > > + spin_unlock_irqrestore(&i915->pmu.lock, flags); > + > /* > * Store the current counter value so we can report the correct delta > * for all listeners. Even when the event was already enabled and has > * an existing non-zero value. > */ > - local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true)); > - > - spin_unlock_irqrestore(&i915->pmu.lock, flags); > + local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); > } Ok, the only question then is whether pmu_enable/pmu_disable is externally serialised. Afaict, that is true through serialisation of event->state. Hmm, actually instead of doing local64_set() here, it would be symmetric with i915_pmu_event_stop() if it was done in i915_pmu_event_start(): i915_pmu_event_start { i915_pmu_enable(event); /* ... */ i915_pmu_event_read(event); event->hw.state = 0; } i915_pmu_event_stop { if (flags & UPDATE) i915_pmu_event_read(event); i915_pmu_disable(event); event->hw.state = STOPPED; } Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Can you follow up with that adjustment for balance, if it suits you? -Chris
On 14/03/2018 08:31, Patchwork wrote: > == Series Details == > > Series: drm/i915/pmu: Work around compiler warnings on some kernel configs > URL : https://patchwork.freedesktop.org/series/39939/ > State : success > > == Summary == > > Series 39939v1 drm/i915/pmu: Work around compiler warnings on some kernel configs > https://patchwork.freedesktop.org/api/1.0/series/39939/revisions/1/mbox/ > > ---- Known issues: > > Test gem_mmap_gtt: > Subgroup basic-small-bo-tiledx: > fail -> PASS (fi-gdg-551) fdo#102575 > Test kms_pipe_crc_basic: > Subgroup suspend-read-crc-pipe-b: > pass -> INCOMPLETE (fi-snb-2520m) fdo#103713 > > fdo#102575 https://bugs.freedesktop.org/show_bug.cgi?id=102575 > fdo#103713 https://bugs.freedesktop.org/show_bug.cgi?id=103713 > > fi-bdw-5557u total:288 pass:267 dwarn:0 dfail:0 fail:0 skip:21 time:431s > fi-bdw-gvtdvm total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:439s > fi-blb-e6850 total:288 pass:223 dwarn:1 dfail:0 fail:0 skip:64 time:382s > fi-bsw-n3050 total:288 pass:242 dwarn:0 dfail:0 fail:0 skip:46 time:534s > fi-bwr-2160 total:288 pass:183 dwarn:0 dfail:0 fail:0 skip:105 time:301s > fi-bxt-dsi total:288 pass:258 dwarn:0 dfail:0 fail:0 skip:30 time:504s > fi-bxt-j4205 total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:513s > fi-byt-j1900 total:288 pass:253 dwarn:0 dfail:0 fail:0 skip:35 time:507s > fi-byt-n2820 total:288 pass:249 dwarn:0 dfail:0 fail:0 skip:39 time:498s > fi-cfl-8700k total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:410s > fi-cfl-s2 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:587s > fi-cfl-u total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:511s > fi-cnl-y3 total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:585s > fi-elk-e7500 total:288 pass:229 dwarn:0 dfail:0 fail:0 skip:59 time:420s > fi-gdg-551 total:288 pass:180 dwarn:0 dfail:0 fail:0 skip:108 time:316s > fi-glk-1 total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:530s > fi-hsw-4770 total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:401s > fi-ilk-650 total:288 pass:228 dwarn:0 dfail:0 fail:0 skip:60 time:418s > fi-ivb-3520m total:288 pass:259 dwarn:0 dfail:0 fail:0 skip:29 time:477s > fi-ivb-3770 total:288 pass:255 dwarn:0 dfail:0 fail:0 skip:33 time:430s > fi-kbl-7500u total:288 pass:263 dwarn:1 dfail:0 fail:0 skip:24 time:474s > fi-kbl-7567u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:470s > fi-kbl-r total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:513s > fi-skl-6260u total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:441s > fi-skl-6600u total:288 pass:261 dwarn:0 dfail:0 fail:0 skip:27 time:529s > fi-skl-6700hq total:288 pass:262 dwarn:0 dfail:0 fail:0 skip:26 time:539s > fi-skl-6700k2 total:288 pass:264 dwarn:0 dfail:0 fail:0 skip:24 time:512s > fi-skl-6770hq total:288 pass:268 dwarn:0 dfail:0 fail:0 skip:20 time:498s > fi-skl-guc total:288 pass:260 dwarn:0 dfail:0 fail:0 skip:28 time:428s > fi-skl-gvtdvm total:288 pass:265 dwarn:0 dfail:0 fail:0 skip:23 time:437s > fi-snb-2520m total:245 pass:211 dwarn:0 dfail:0 fail:0 skip:33 > fi-snb-2600 total:288 pass:248 dwarn:0 dfail:0 fail:0 skip:40 time:398s > Blacklisted hosts: > fi-cnl-drrs total:288 pass:257 dwarn:3 dfail:0 fail:0 skip:28 time:517s > > 613eb885b69e808a46f11125870e47b67a326d76 drm-tip: 2018y-03m-14d-05h-56m-23s UTC integration manifest > 34462fc0bee3 drm/i915/pmu: Work around compiler warnings on some kernel configs Pushed it, thanks for the review! (And I forgot to copy Arnd on the patch..) So Arnd, sorry, I forgot Reported-by does not add Cc from git send-email. https://patchwork.freedesktop.org/series/39939/ is my version of the fix for this. (Now merged.) Hopefully it works for your randconfigs as well and thanks for sending a patch in the first place! Regards, Tvrtko
On Wed, Mar 14, 2018 at 6:57 PM, Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> wrote: > On 14/03/2018 08:31, Patchwork wrote: > > Pushed it, thanks for the review! > > (And I forgot to copy Arnd on the patch..) > > So Arnd, sorry, I forgot Reported-by does not add Cc from git send-email. > https://patchwork.freedesktop.org/series/39939/ is my version of the fix for > this. (Now merged.) Hopefully it works for your randconfigs as well and > thanks for sending a patch in the first place! The patch looks good to me, thanks for the follow-up Acked-by: Arnd Bergmann <arnd@arndb.de> I didn't test it, but you'll hear from me if it breaks again. One comment about the use of spin_lock_irqsave(), you wrote: "Slight penalty we now pay is an additional irqsave spin lock/unlock cycle on the event enable path. But since enable is not a fast path, that is preferrable to the alternative solution which was doing MMIO under irqsave spinlock." While I don't know about the exact cost on x86, on many architectures spin_lock_irqsave()/spin_unlock_irqrestore() is much more expensive than a plain spin_lock()/spin_unlock() or spin_lock_irq()/spin_unlock_irq() pair that doesn't have to store the disabled-state. I see you already removed the inner irqsave()/irqrestore() pair that is now useless (I failed to notice that in my original patch), but in my experience, it's usually possible to do the same for many others as well after proving that a function is always called with IRQs enabled or always disabled. Arnd
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 4bc7aefa9541..11fb76bd3860 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -412,7 +412,7 @@ static u64 __get_rc6(struct drm_i915_private *i915) return val; } -static u64 get_rc6(struct drm_i915_private *i915, bool locked) +static u64 get_rc6(struct drm_i915_private *i915) { #if IS_ENABLED(CONFIG_PM) unsigned long flags; @@ -428,8 +428,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) * previously. */ - if (!locked) - spin_lock_irqsave(&i915->pmu.lock, flags); + spin_lock_irqsave(&i915->pmu.lock, flags); if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; @@ -438,12 +437,10 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; } - if (!locked) - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&i915->pmu.lock, flags); } else { struct pci_dev *pdev = i915->drm.pdev; struct device *kdev = &pdev->dev; - unsigned long flags2; /* * We are runtime suspended. @@ -452,10 +449,8 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) * on top of the last known real value, as the approximated RC6 * counter value. */ - if (!locked) - spin_lock_irqsave(&i915->pmu.lock, flags); - - spin_lock_irqsave(&kdev->power.lock, flags2); + spin_lock_irqsave(&i915->pmu.lock, flags); + spin_lock(&kdev->power.lock); if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) i915->pmu.suspended_jiffies_last = @@ -465,14 +460,13 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) i915->pmu.suspended_jiffies_last; val += jiffies - kdev->power.accounting_timestamp; - spin_unlock_irqrestore(&kdev->power.lock, flags2); + spin_unlock(&kdev->power.lock); val = jiffies_to_nsecs(val); val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; - if (!locked) - spin_unlock_irqrestore(&i915->pmu.lock, flags); + spin_unlock_irqrestore(&i915->pmu.lock, flags); } return val; @@ -481,7 +475,7 @@ static u64 get_rc6(struct drm_i915_private *i915, bool locked) #endif } -static u64 __i915_pmu_event_read(struct perf_event *event, bool locked) +static u64 __i915_pmu_event_read(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); @@ -519,7 +513,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event, bool locked) val = count_interrupts(i915); break; case I915_PMU_RC6_RESIDENCY: - val = get_rc6(i915, locked); + val = get_rc6(i915); break; } } @@ -534,7 +528,7 @@ static void i915_pmu_event_read(struct perf_event *event) again: prev = local64_read(&hwc->prev_count); - new = __i915_pmu_event_read(event, false); + new = __i915_pmu_event_read(event); if (local64_cmpxchg(&hwc->prev_count, prev, new) != prev) goto again; @@ -584,14 +578,14 @@ static void i915_pmu_enable(struct perf_event *event) engine->pmu.enable_count[sample]++; } + spin_unlock_irqrestore(&i915->pmu.lock, flags); + /* * Store the current counter value so we can report the correct delta * for all listeners. Even when the event was already enabled and has * an existing non-zero value. */ - local64_set(&event->hw.prev_count, __i915_pmu_event_read(event, true)); - - spin_unlock_irqrestore(&i915->pmu.lock, flags); + local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); } static void i915_pmu_disable(struct perf_event *event)