Message ID | 1459788671-17501-1-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Apr 04, 2016 at 05:51:09PM +0100, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Current implementation releases the forcewake at any time between > straight away, and one jiffie from the last put, or first automatic > grab. That isn't the problem though. The problem is that we set the timer on first use rather than last use. All you are stating here is that by lengthening the timeout on your system you reduce the number of times it times out. Having said that, the conversion to hrtimer seems sensible but to add tracking of the last access as opposed to first we either fallback to jiffie (in which case hrtimer is moot) or rely on ktime_get_raw() being fast enough for every register write. Hmm, my usual response to that has been if it matters we avoid the heavyweight macros and use the _FW interface - or even raw readl/writel. Could you try storing ktime_get_raw() on every access and rearming the timer if it expires before last-access + min-period? -Chris
On 04/04/16 17:51, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Current implementation releases the forcewake at any time between > straight away, and one jiffie from the last put, or first automatic > grab. > > This does not sound like what was desired since jiffies are typically > between 1 and 10ms depending on the kernel configuration. > > Change the auto-release mechanism to use hrtimers and set the timeout > to 1ms with a 1ms of slack. This should make the GPU power consistent > across kernel configs and the slack should enable some timer coallescing "coalescing" > where multiple force-wake domains exist, or with unrelated timers. > > For GlBench/T-Rex this decreases the number of forcewake releases from > ~480 to ~300 per second, and for a heavy combined OGL/OCL test from > ~670 to ~360. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> LGTM. Reviewed-by: Dave Gordon <david.s.gordon@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 +- > drivers/gpu/drm/i915/intel_uncore.c | 25 ++++++++++++++++--------- > 2 files changed, 17 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index dd187727c813..7d4c704d7d75 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -666,7 +666,7 @@ struct intel_uncore { > struct drm_i915_private *i915; > enum forcewake_domain_id id; > unsigned wake_count; > - struct timer_list timer; > + struct hrtimer timer; > i915_reg_t reg_set; > u32 val_set; > u32 val_clear; > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index ac1c545436af..76ac990de354 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -60,7 +60,11 @@ fw_domain_reset(const struct intel_uncore_forcewake_domain *d) > static inline void > fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) > { > - mod_timer_pinned(&d->timer, jiffies + 1); > + d->wake_count++; > + hrtimer_start_range_ns(&d->timer, > + ktime_set(0, NSEC_PER_MSEC), > + NSEC_PER_MSEC, > + HRTIMER_MODE_REL); > } > > static inline void > @@ -224,9 +228,11 @@ static int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) > return ret; > } > > -static void intel_uncore_fw_release_timer(unsigned long arg) > +static enum hrtimer_restart > +intel_uncore_fw_release_timer(struct hrtimer *timer) > { > - struct intel_uncore_forcewake_domain *domain = (void *)arg; > + struct intel_uncore_forcewake_domain *domain = > + container_of(timer, struct intel_uncore_forcewake_domain, timer); > unsigned long irqflags; > > assert_rpm_device_not_suspended(domain->i915); > @@ -240,6 +246,8 @@ static void intel_uncore_fw_release_timer(unsigned long arg) > 1 << domain->id); > > spin_unlock_irqrestore(&domain->i915->uncore.lock, irqflags); > + > + return HRTIMER_NORESTART; > } > > void intel_uncore_forcewake_reset(struct drm_device *dev, bool restore) > @@ -259,16 +267,16 @@ void intel_uncore_forcewake_reset(struct drm_device *dev, bool restore) > active_domains = 0; > > for_each_fw_domain(domain, dev_priv, id) { > - if (del_timer_sync(&domain->timer) == 0) > + if (hrtimer_cancel(&domain->timer) == 0) > continue; > > - intel_uncore_fw_release_timer((unsigned long)domain); > + intel_uncore_fw_release_timer(&domain->timer); > } > > spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); > > for_each_fw_domain(domain, dev_priv, id) { > - if (timer_pending(&domain->timer)) > + if (hrtimer_active(&domain->timer)) > active_domains |= (1 << id); > } > > @@ -491,7 +499,6 @@ static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv, > if (--domain->wake_count) > continue; > > - domain->wake_count++; > fw_domain_arm_timer(domain); > } > } > @@ -732,7 +739,6 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, > continue; > } > > - domain->wake_count++; > fw_domain_arm_timer(domain); > } > > @@ -1150,7 +1156,8 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, > d->i915 = dev_priv; > d->id = domain_id; > > - setup_timer(&d->timer, intel_uncore_fw_release_timer, (unsigned long)d); > + hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); > + d->timer.function = intel_uncore_fw_release_timer; > > dev_priv->uncore.fw_domains |= (1 << domain_id); > >
On 04/04/16 19:58, Chris Wilson wrote: > On Mon, Apr 04, 2016 at 05:51:09PM +0100, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> Current implementation releases the forcewake at any time between >> straight away, and one jiffie from the last put, or first automatic >> grab. > > That isn't the problem though. The problem is that we set the timer on > first use rather than last use. All you are stating here is that by > lengthening the timeout on your system you reduce the number of times it > times out. I thought that was already done, per my comments on one of your recent patches (rename __force_wake_get to __force_wake_auto) and your reply (gains are probably worth extra complexity). But of course Tvrtko wasn't here the last few days so may not have seen that. I suppose it would make sense to fold both changes together. But changing the timeout to 1ms makes it generally *shorter* than before, not longer. The gain can't just be from having a longer timeout, 'cos it isn't. (Average timeout now 1.5ms vs. previous 8ms?) I think it should come from the fact that accesses are likely to be bursty, having a bimodal distribution in the time domain. When we've made one access, we're likely to make another much sooner than 1ms later, but once we stop making accesses there will probably be far more than 1ms before the next set of accesses. > Having said that, the conversion to hrtimer seems sensible but to add > tracking of the last access as opposed to first we either fallback to > jiffie (in which case hrtimer is moot) or rely on ktime_get_raw() being > fast enough for every register write. Hmm, my usual response to that has > been if it matters we avoid the heavyweight macros and use the _FW > interface - or even raw readl/writel. > > Could you try storing ktime_get_raw() on every access and rearming the > timer if it expires before last-access + min-period? > -Chris One more idea - do we want two different expiry periods depending on whether the caller used the _auto version or not? If they did, they're probably anticipating "only a few" accesses in the current operation, but perhaps with no idea about when the next set of accesses will occur. If not, then the final decrement means they think they've finished for a while, and perhaps don't expect to come back for quite a long time. In that situation the timeout only helps avoid rapid off/on cycles by unrelated functions, not between logically-related operations. So would two different timeouts make sense? Or even ... _auto: longish timeout from the *first* _get() - don't rearm thereafter other: shortish timeout from the _put() - non-auto _get() will cancel or is that just too complicated? .Dave.
On Mon, Apr 04, 2016 at 08:41:20PM +0100, Dave Gordon wrote: > On 04/04/16 19:58, Chris Wilson wrote: > >On Mon, Apr 04, 2016 at 05:51:09PM +0100, Tvrtko Ursulin wrote: > >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >> > >>Current implementation releases the forcewake at any time between > >>straight away, and one jiffie from the last put, or first automatic > >>grab. > > > >That isn't the problem though. The problem is that we set the timer on > >first use rather than last use. All you are stating here is that by > >lengthening the timeout on your system you reduce the number of times it > >times out. > > I thought that was already done, per my comments on one of your > recent patches (rename __force_wake_get to __force_wake_auto) and > your reply (gains are probably worth extra complexity). But of > course Tvrtko wasn't here the last few days so may not have seen > that. I suppose it would make sense to fold both changes together. We were talking about the cost of having 2 passes vs 1 to do the same task. Before Tvrtko left, I realised again that we didn't defer the timer itself for every access, rearming a timer on every register access seems costly (more so switching to hrtimer). Rearming the timer inside the timer callback seems a reasonable compromise. > But changing the timeout to 1ms makes it generally *shorter* than > before, not longer. The gain can't just be from having a longer > timeout, 'cos it isn't. (Average timeout now 1.5ms vs. previous > 8ms?) It is. jiffies+1 does not equal an 8ms delay (it would be 1ms on his machine anways, 10ms on mine). The issue is that +1 jiffie is up to 1 jiffie and often close to 0, and that very well explains the wakeups Tvrtko measured. > I think it should come from the fact that accesses are likely to be > bursty, having a bimodal distribution in the time domain. When we've > made one access, we're likely to make another much sooner than 1ms > later, but once we stop making accesses there will probably be far > more than 1ms before the next set of accesses. Yes. We expect temporal coherence in the access patten. That's assuredly so since we very rarely access one register by itself. > One more idea - do we want two different expiry periods depending on > whether the caller used the _auto version or not? Hard to tell. Broadly speaking we have two users of the non-auto interface: userspace disabling forcewake indefinitely, and the other is code that wants to hold the spinlock + forcewake over a bunch of register writes. > If they did, they're probably anticipating "only a few" accesses in > the current operation, but perhaps with no idea about when the next > set of accesses will occur. > > If not, then the final decrement means they think they've finished > for a while, and perhaps don't expect to come back for quite a long > time. In that situation the timeout only helps avoid rapid off/on > cycles by unrelated functions, not between logically-related > operations. Execlists. (Wants to hold forcewake over a bunch of reads and writes, and may be very frequent context switches or very slow) > So would two different timeouts make sense? Or even ... > > _auto: longish timeout from the *first* _get() - don't rearm thereafter > other: shortish timeout from the _put() - non-auto _get() will cancel > > or is that just too complicated? The challenge is that I think we should be pushing for shorter timeouts, because of the bimodality in many accesses followed by a long period of sleep. One of the original reasons for the choice in 1 jiffie with a pinned timer was that it should mean that on the next call to schedule() we should be deasserting forcewake (i.e. we only hold forcewake for this sequence of register accesses). However, execlists scuppers that by having an interrupt context that requires forcewake, and frequently so. So I think we do need a combination of something like task_work_run() to tie a forcewake reference to the active task with something like an extra timer to allow frequent wakers to avoid having to retake forcewake. -Chris
On 04/04/16 19:58, Chris Wilson wrote: > On Mon, Apr 04, 2016 at 05:51:09PM +0100, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> Current implementation releases the forcewake at any time between >> straight away, and one jiffie from the last put, or first automatic >> grab. > > That isn't the problem though. The problem is that we set the timer on > first use rather than last use. All you are stating here is that by > lengthening the timeout on your system you reduce the number of times it > times out. It is true the reduction I see is due lengthening of the average timeout. But with regards to re-arming approach, I thought so initially myself, but then, if we expect bursty access then it shouldn't matter and the simplicity of doing it like it currently is better. Even in practice, I noticed the effect of lengthening the timeout is much greater than moving the arming to the last access. And to get to very few to none auto releases on busy workloads we need in the regions of 5ms, which would be a big change. Or maybe not if you consider HZ=100 kernels. It is very difficult to know what is actually better considering power between the CPU and GPU and performance. So I though the best would be to keep it similar to the current timings, just fix the dependency on HZ and also slack with hrtimers might help with something. As a final data point, explicit puts and auto releases seems to be relatively balanced in my testing. With this patch T-Rex for example auto-releases in the region of 3-4 times in 10ms, with around 5-10 explicit gets, and 5-10 implicit gets in 10ms. A different, interrupt heavy workload (~20k irqs/sec) manages similarly 2-4 auto-releases per 10ms, and has ~250 explicit gets and ~380 implicit per 10ms. Looking at the two I think the fact they manage to auto-release relatively similarly, compared to a huge different in fw gets, suggest burstyness. So I am not sure that any smarts with the release period would be interesting. At least not without serious power/perf measurements. > Having said that, the conversion to hrtimer seems sensible but to add > tracking of the last access as opposed to first we either fallback to > jiffie (in which case hrtimer is moot) or rely on ktime_get_raw() being > fast enough for every register write. Hmm, my usual response to that has > been if it matters we avoid the heavyweight macros and use the _FW > interface - or even raw readl/writel. > > Could you try storing ktime_get_raw() on every access and rearming the > timer if it expires before last-access + min-period? That would be similar to your patch from before my holiday, right? As I said above, I did not notice much change with that approach. Just extending the timeout has a much greater effect, but as I wrote above, I am not sure we want to change it. Regards, Tvrtko
On Tue, Apr 05, 2016 at 09:54:58AM +0100, Tvrtko Ursulin wrote: > > On 04/04/16 19:58, Chris Wilson wrote: > >On Mon, Apr 04, 2016 at 05:51:09PM +0100, Tvrtko Ursulin wrote: > >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >> > >>Current implementation releases the forcewake at any time between > >>straight away, and one jiffie from the last put, or first automatic > >>grab. > > > >That isn't the problem though. The problem is that we set the timer on > >first use rather than last use. All you are stating here is that by > >lengthening the timeout on your system you reduce the number of times it > >times out. > > It is true the reduction I see is due lengthening of the average timeout. > > But with regards to re-arming approach, I thought so initially > myself, but then, if we expect bursty access then it shouldn't > matter and the simplicity of doing it like it currently is better. > > Even in practice, I noticed the effect of lengthening the timeout is > much greater than moving the arming to the last access. And to get > to very few to none auto releases on busy workloads we need in the > regions of 5ms, which would be a big change. Or maybe not if you > consider HZ=100 kernels. > > It is very difficult to know what is actually better considering > power between the CPU and GPU and performance. So I though the best > would be to keep it similar to the current timings, just fix the > dependency on HZ and also slack with hrtimers might help with > something. > > As a final data point, explicit puts and auto releases seems to be > relatively balanced in my testing. With this patch T-Rex for example > auto-releases in the region of 3-4 times in 10ms, with around 5-10 > explicit gets, and 5-10 implicit gets in 10ms. > > A different, interrupt heavy workload (~20k irqs/sec) manages > similarly 2-4 auto-releases per 10ms, and has ~250 explicit gets and > ~380 implicit per 10ms. > > Looking at the two I think the fact they manage to auto-release > relatively similarly, compared to a huge different in fw gets, > suggest burstyness. So I am not sure that any smarts with the > release period would be interesting. At least not without serious > power/perf measurements. > > >Having said that, the conversion to hrtimer seems sensible but to add > >tracking of the last access as opposed to first we either fallback to > >jiffie (in which case hrtimer is moot) or rely on ktime_get_raw() being > >fast enough for every register write. Hmm, my usual response to that has > >been if it matters we avoid the heavyweight macros and use the _FW > >interface - or even raw readl/writel. > > > >Could you try storing ktime_get_raw() on every access and rearming the > >timer if it expires before last-access + min-period? > > That would be similar to your patch from before my holiday, right? > As I said above, I did not notice much change with that approach. > Just extending the timeout has a much greater effect, but as I wrote > above, I am not sure we want to change it. There was just one bug in that patch in checking the expiration that makes a huge difference, but if you please add the discussion above to the commit log that would be invaluable. -Chris
On 05/04/16 09:59, Chris Wilson wrote: > On Tue, Apr 05, 2016 at 09:54:58AM +0100, Tvrtko Ursulin wrote: >> >> On 04/04/16 19:58, Chris Wilson wrote: >>> On Mon, Apr 04, 2016 at 05:51:09PM +0100, Tvrtko Ursulin wrote: >>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>>> >>>> Current implementation releases the forcewake at any time between >>>> straight away, and one jiffie from the last put, or first automatic >>>> grab. >>> >>> That isn't the problem though. The problem is that we set the timer on >>> first use rather than last use. All you are stating here is that by >>> lengthening the timeout on your system you reduce the number of times it >>> times out. >> >> It is true the reduction I see is due lengthening of the average timeout. >> >> But with regards to re-arming approach, I thought so initially >> myself, but then, if we expect bursty access then it shouldn't >> matter and the simplicity of doing it like it currently is better. >> >> Even in practice, I noticed the effect of lengthening the timeout is >> much greater than moving the arming to the last access. And to get >> to very few to none auto releases on busy workloads we need in the >> regions of 5ms, which would be a big change. Or maybe not if you >> consider HZ=100 kernels. >> >> It is very difficult to know what is actually better considering >> power between the CPU and GPU and performance. So I though the best >> would be to keep it similar to the current timings, just fix the >> dependency on HZ and also slack with hrtimers might help with >> something. >> >> As a final data point, explicit puts and auto releases seems to be >> relatively balanced in my testing. With this patch T-Rex for example >> auto-releases in the region of 3-4 times in 10ms, with around 5-10 >> explicit gets, and 5-10 implicit gets in 10ms. >> >> A different, interrupt heavy workload (~20k irqs/sec) manages >> similarly 2-4 auto-releases per 10ms, and has ~250 explicit gets and >> ~380 implicit per 10ms. >> >> Looking at the two I think the fact they manage to auto-release >> relatively similarly, compared to a huge different in fw gets, >> suggest burstyness. So I am not sure that any smarts with the >> release period would be interesting. At least not without serious >> power/perf measurements. >> >>> Having said that, the conversion to hrtimer seems sensible but to add >>> tracking of the last access as opposed to first we either fallback to >>> jiffie (in which case hrtimer is moot) or rely on ktime_get_raw() being >>> fast enough for every register write. Hmm, my usual response to that has >>> been if it matters we avoid the heavyweight macros and use the _FW >>> interface - or even raw readl/writel. >>> >>> Could you try storing ktime_get_raw() on every access and rearming the >>> timer if it expires before last-access + min-period? >> >> That would be similar to your patch from before my holiday, right? >> As I said above, I did not notice much change with that approach. >> Just extending the timeout has a much greater effect, but as I wrote >> above, I am not sure we want to change it. > > There was just one bug in that patch in checking the expiration that > makes a huge difference, but if you please add the discussion above to > the commit log that would be invaluable. I think I had a fixed version, would have to dig in branches now. I will add more text to the commit. Most importantly, do you think this would affect HZ=10 or HZ=250 kernels at all? Presumably if it could, someone would know today that there is a difference between the kernels based on HZ already? Regards, Tvrtko
On Tue, Apr 05, 2016 at 11:02:15AM +0100, Tvrtko Ursulin wrote: > > On 05/04/16 09:59, Chris Wilson wrote: > >On Tue, Apr 05, 2016 at 09:54:58AM +0100, Tvrtko Ursulin wrote: > >> > >>On 04/04/16 19:58, Chris Wilson wrote: > >>>On Mon, Apr 04, 2016 at 05:51:09PM +0100, Tvrtko Ursulin wrote: > >>>>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >>>> > >>>>Current implementation releases the forcewake at any time between > >>>>straight away, and one jiffie from the last put, or first automatic > >>>>grab. > >>> > >>>That isn't the problem though. The problem is that we set the timer on > >>>first use rather than last use. All you are stating here is that by > >>>lengthening the timeout on your system you reduce the number of times it > >>>times out. > >> > >>It is true the reduction I see is due lengthening of the average timeout. > >> > >>But with regards to re-arming approach, I thought so initially > >>myself, but then, if we expect bursty access then it shouldn't > >>matter and the simplicity of doing it like it currently is better. > >> > >>Even in practice, I noticed the effect of lengthening the timeout is > >>much greater than moving the arming to the last access. And to get > >>to very few to none auto releases on busy workloads we need in the > >>regions of 5ms, which would be a big change. Or maybe not if you > >>consider HZ=100 kernels. > >> > >>It is very difficult to know what is actually better considering > >>power between the CPU and GPU and performance. So I though the best > >>would be to keep it similar to the current timings, just fix the > >>dependency on HZ and also slack with hrtimers might help with > >>something. > >> > >>As a final data point, explicit puts and auto releases seems to be > >>relatively balanced in my testing. With this patch T-Rex for example > >>auto-releases in the region of 3-4 times in 10ms, with around 5-10 > >>explicit gets, and 5-10 implicit gets in 10ms. > >> > >>A different, interrupt heavy workload (~20k irqs/sec) manages > >>similarly 2-4 auto-releases per 10ms, and has ~250 explicit gets and > >>~380 implicit per 10ms. > >> > >>Looking at the two I think the fact they manage to auto-release > >>relatively similarly, compared to a huge different in fw gets, > >>suggest burstyness. So I am not sure that any smarts with the > >>release period would be interesting. At least not without serious > >>power/perf measurements. > >> > >>>Having said that, the conversion to hrtimer seems sensible but to add > >>>tracking of the last access as opposed to first we either fallback to > >>>jiffie (in which case hrtimer is moot) or rely on ktime_get_raw() being > >>>fast enough for every register write. Hmm, my usual response to that has > >>>been if it matters we avoid the heavyweight macros and use the _FW > >>>interface - or even raw readl/writel. > >>> > >>>Could you try storing ktime_get_raw() on every access and rearming the > >>>timer if it expires before last-access + min-period? > >> > >>That would be similar to your patch from before my holiday, right? > >>As I said above, I did not notice much change with that approach. > >>Just extending the timeout has a much greater effect, but as I wrote > >>above, I am not sure we want to change it. > > > >There was just one bug in that patch in checking the expiration that > >makes a huge difference, but if you please add the discussion above to > >the commit log that would be invaluable. > > I think I had a fixed version, would have to dig in branches now. > > I will add more text to the commit. > > Most importantly, do you think this would affect HZ=10 or HZ=250 > kernels at all? Presumably if it could, someone would know today > that there is a difference between the kernels based on HZ already? Hmm, I switch quite regularly - that may explain why sometimes I see fw_domains_get quite heavy in the profiles and other. But I have never correlated that with changing .configs. And I don't keep stats for rc6 during stress tests - especially more notable is the lack of light desktop load testing. One interesting side-effect of htrimer is that iirc it is installed on the local cpu and isn't run until we hit a preemption point (i.e. the scheduler in my case) so for those who are running high latency low HZ are likely unaffected at all! As far as hrtimer is concerned, my only real worry is that if it is too expensive - but since it is being increasingly widely used I shouldn't worry myself too much about that (if it were to be a problem, it will get fixed!). -Chris
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dd187727c813..7d4c704d7d75 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -666,7 +666,7 @@ struct intel_uncore { struct drm_i915_private *i915; enum forcewake_domain_id id; unsigned wake_count; - struct timer_list timer; + struct hrtimer timer; i915_reg_t reg_set; u32 val_set; u32 val_clear; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index ac1c545436af..76ac990de354 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -60,7 +60,11 @@ fw_domain_reset(const struct intel_uncore_forcewake_domain *d) static inline void fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d) { - mod_timer_pinned(&d->timer, jiffies + 1); + d->wake_count++; + hrtimer_start_range_ns(&d->timer, + ktime_set(0, NSEC_PER_MSEC), + NSEC_PER_MSEC, + HRTIMER_MODE_REL); } static inline void @@ -224,9 +228,11 @@ static int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv) return ret; } -static void intel_uncore_fw_release_timer(unsigned long arg) +static enum hrtimer_restart +intel_uncore_fw_release_timer(struct hrtimer *timer) { - struct intel_uncore_forcewake_domain *domain = (void *)arg; + struct intel_uncore_forcewake_domain *domain = + container_of(timer, struct intel_uncore_forcewake_domain, timer); unsigned long irqflags; assert_rpm_device_not_suspended(domain->i915); @@ -240,6 +246,8 @@ static void intel_uncore_fw_release_timer(unsigned long arg) 1 << domain->id); spin_unlock_irqrestore(&domain->i915->uncore.lock, irqflags); + + return HRTIMER_NORESTART; } void intel_uncore_forcewake_reset(struct drm_device *dev, bool restore) @@ -259,16 +267,16 @@ void intel_uncore_forcewake_reset(struct drm_device *dev, bool restore) active_domains = 0; for_each_fw_domain(domain, dev_priv, id) { - if (del_timer_sync(&domain->timer) == 0) + if (hrtimer_cancel(&domain->timer) == 0) continue; - intel_uncore_fw_release_timer((unsigned long)domain); + intel_uncore_fw_release_timer(&domain->timer); } spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); for_each_fw_domain(domain, dev_priv, id) { - if (timer_pending(&domain->timer)) + if (hrtimer_active(&domain->timer)) active_domains |= (1 << id); } @@ -491,7 +499,6 @@ static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv, if (--domain->wake_count) continue; - domain->wake_count++; fw_domain_arm_timer(domain); } } @@ -732,7 +739,6 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, continue; } - domain->wake_count++; fw_domain_arm_timer(domain); } @@ -1150,7 +1156,8 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, d->i915 = dev_priv; d->id = domain_id; - setup_timer(&d->timer, intel_uncore_fw_release_timer, (unsigned long)d); + hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + d->timer.function = intel_uncore_fw_release_timer; dev_priv->uncore.fw_domains |= (1 << domain_id);