diff mbox

drm/i915: Fallback to reserve forcewake if primary ack missing

Message ID 20171027140421.15752-1-mika.kuoppala@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala Oct. 27, 2017, 2:04 p.m. UTC
There is a possibility on gen9 hardware to miss the forcewake ack
message. The recommended workaround is to use another free
bit and toggle it until original bit is successfully acknowledged.

Some future gen9 revs might or might not fix the underlying issue but
the fallback to reserve bit dance can be considered as harmless:
without the ack timeout we never reach the reserve bit forcewake.
Thus as of now we adopt a blanket approach for all gen9 and leave
the bypassing the reserve bit approach for future patches if
corresponding hw revisions do appear.

Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
for forcewake request/clear ack") did increase the forcewake timeout.
If the issue was a delayed ack, future work could include finding
a suitable timeout value both for primary ack and reserve toggle
to reduce the worst case latency.

v2: use bit 15, naming, comment (Chris), only wait fallback ack

References: HSDES #1604254524
References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h     |   5 +-
 drivers/gpu/drm/i915/intel_uncore.c | 141 +++++++++++++++++++++++++++++++++---
 2 files changed, 134 insertions(+), 12 deletions(-)

Comments

Mika Kuoppala Oct. 27, 2017, 2:08 p.m. UTC | #1
Mika Kuoppala <mika.kuoppala@linux.intel.com> writes:

> There is a possibility on gen9 hardware to miss the forcewake ack
> message. The recommended workaround is to use another free
> bit and toggle it until original bit is successfully acknowledged.
>
> Some future gen9 revs might or might not fix the underlying issue but
> the fallback to reserve bit dance can be considered as harmless:
> without the ack timeout we never reach the reserve bit forcewake.
> Thus as of now we adopt a blanket approach for all gen9 and leave
> the bypassing the reserve bit approach for future patches if
> corresponding hw revisions do appear.
>
> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
> for forcewake request/clear ack") did increase the forcewake timeout.
> If the issue was a delayed ack, future work could include finding
> a suitable timeout value both for primary ack and reserve toggle
> to reduce the worst case latency.
>
> v2: use bit 15, naming, comment (Chris), only wait fallback ack
>

And then I forgot to change the naming in the subject and
commit message. Sigh
-Mika


> References: HSDES #1604254524
> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h     |   5 +-
>  drivers/gpu/drm/i915/intel_uncore.c | 141 +++++++++++++++++++++++++++++++++---
>  2 files changed, 134 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8c775e96b4e4..f0f8f6059652 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7774,8 +7774,9 @@ enum {
>  #define  FORCEWAKE_ACK_MEDIA_GEN9		_MMIO(0x0D88)
>  #define  FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0x0D84)
>  #define  FORCEWAKE_ACK_BLITTER_GEN9		_MMIO(0x130044)
> -#define   FORCEWAKE_KERNEL			0x1
> -#define   FORCEWAKE_USER			0x2
> +#define   FORCEWAKE_KERNEL			BIT(0)
> +#define   FORCEWAKE_USER			BIT(1)
> +#define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
>  #define  FORCEWAKE_MT_ACK			_MMIO(0x130040)
>  #define  ECOBUS					_MMIO(0xa180)
>  #define    FORCEWAKE_MT_ENABLE			(1<<5)
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 96ee6b2754be..588ae35c8c9a 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -69,17 +69,108 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
>  			       HRTIMER_MODE_REL);
>  }
>  
> +static inline int
> +__wait_for_ack(const struct drm_i915_private *i915,
> +	       const struct intel_uncore_forcewake_domain *d,
> +	       const u32 ack,
> +	       const u32 value)
> +{
> +	return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == value,
> +			       FORCEWAKE_ACK_TIMEOUT_MS);
> +}
> +
> +
> +static inline int
> +wait_ack_clear(const struct drm_i915_private *i915,
> +	       const struct intel_uncore_forcewake_domain *d,
> +	       const u32 ack)
> +{
> +	return __wait_for_ack(i915, d, ack, 0);
> +}
> +
> +static inline int
> +wait_ack_set(const struct drm_i915_private *i915,
> +	     const struct intel_uncore_forcewake_domain *d,
> +	     const u32 ack)
> +{
> +	return __wait_for_ack(i915, d, ack, ack);
> +}
> +
>  static inline void
>  fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
>  			 const struct intel_uncore_forcewake_domain *d)
>  {
> -	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
> -			     FORCEWAKE_KERNEL) == 0,
> -			    FORCEWAKE_ACK_TIMEOUT_MS))
> +	if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
>  		DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
>  			  intel_uncore_forcewake_domain_to_str(d->id));
>  }
>  
> +static int
> +wait_ack_with_fallback(const struct drm_i915_private *i915,
> +		       const struct intel_uncore_forcewake_domain *d,
> +		       const u32 ack,
> +		       const u32 value)
> +{
> +	int ret;
> +
> +	/*
> +	 * There is a possibility of driver's wake request colliding
> +	 * with hardware's own wake requests and that can cause
> +	 * hardware to not deliver the driver's ack message.
> +	 *
> +	 * Use a fallback bit toggle to kick the gpu state machine
> +	 * in hopes that the original ack will be delivered along with
> +	 * the fallback ack.
> +	 *
> +	 * This workaround is described in HSDES #1604254524
> +	 */
> +
> +	wait_ack_clear(i915, d, FORCEWAKE_KERNEL_FALLBACK);
> +	__raw_i915_write32(i915, d->reg_set,
> +			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL_FALLBACK));
> +	wait_ack_set(i915, d, FORCEWAKE_KERNEL_FALLBACK);
> +
> +	ret = (__raw_i915_read32(i915, d->reg_ack) & ack) == value;
> +
> +	__raw_i915_write32(i915, d->reg_set,
> +			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL_FALLBACK));
> +
> +	return ret;
> +}
> +
> +enum ack_type {
> +	ACK_CLEAR = 0,
> +	ACK_SET
> +};
> +
> +static int
> +fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915,
> +				 const struct intel_uncore_forcewake_domain *d,
> +				 const enum ack_type type)
> +{
> +	int retry = 10;
> +	int ret;
> +
> +	do {
> +		ret = wait_ack_with_fallback(i915, d, FORCEWAKE_KERNEL,
> +					     type == ACK_SET ?
> +					     FORCEWAKE_KERNEL : 0);
> +	} while (ret && --retry);
> +
> +	return ret;
> +}
> +
> +static inline void
> +fw_domain_wait_ack_clear_fallback(const struct drm_i915_private *i915,
> +				  const struct intel_uncore_forcewake_domain *d)
> +{
> +	if (likely(!wait_ack_clear(i915, d, FORCEWAKE_KERNEL)))
> +		return;
> +
> +	if (fw_domain_wait_ack_with_fallback(i915, d, ACK_CLEAR))
> +		fw_domain_wait_ack_clear(i915, d);
> +}
> +
>  static inline void
>  fw_domain_get(struct drm_i915_private *i915,
>  	      const struct intel_uncore_forcewake_domain *d)
> @@ -88,17 +179,26 @@ fw_domain_get(struct drm_i915_private *i915,
>  }
>  
>  static inline void
> -fw_domain_wait_ack(const struct drm_i915_private *i915,
> -		   const struct intel_uncore_forcewake_domain *d)
> +fw_domain_wait_ack_set(const struct drm_i915_private *i915,
> +		       const struct intel_uncore_forcewake_domain *d)
>  {
> -	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
> -			     FORCEWAKE_KERNEL),
> -			    FORCEWAKE_ACK_TIMEOUT_MS))
> +	if (wait_ack_set(i915, d, FORCEWAKE_KERNEL))
>  		DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
>  			  intel_uncore_forcewake_domain_to_str(d->id));
>  }
>  
>  static inline void
> +fw_domain_wait_ack_set_fallback(const struct drm_i915_private *i915,
> +				const struct intel_uncore_forcewake_domain *d)
> +{
> +	if (likely(!wait_ack_set(i915, d, FORCEWAKE_KERNEL)))
> +		return;
> +
> +	if (fw_domain_wait_ack_with_fallback(i915, d, ACK_SET))
> +		fw_domain_wait_ack_set(i915, d);
> +}
> +
> +static inline void
>  fw_domain_put(const struct drm_i915_private *i915,
>  	      const struct intel_uncore_forcewake_domain *d)
>  {
> @@ -119,7 +219,27 @@ fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
>  	}
>  
>  	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
> -		fw_domain_wait_ack(i915, d);
> +		fw_domain_wait_ack_set(i915, d);
> +
> +	i915->uncore.fw_domains_active |= fw_domains;
> +}
> +
> +static void
> +fw_domains_get_with_fallback(struct drm_i915_private *i915,
> +			     enum forcewake_domains fw_domains)
> +{
> +	struct intel_uncore_forcewake_domain *d;
> +	unsigned int tmp;
> +
> +	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
> +
> +	for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
> +		fw_domain_wait_ack_clear_fallback(i915, d);
> +		fw_domain_get(i915, d);
> +	}
> +
> +	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
> +		fw_domain_wait_ack_set_fallback(i915, d);
>  
>  	i915->uncore.fw_domains_active |= fw_domains;
>  }
> @@ -1142,7 +1262,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
>  	}
>  
>  	if (INTEL_GEN(dev_priv) >= 9) {
> -		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
> +		dev_priv->uncore.funcs.force_wake_get =
> +			fw_domains_get_with_fallback;
>  		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
>  		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
>  			       FORCEWAKE_RENDER_GEN9,
> -- 
> 2.11.0
Chris Wilson Oct. 27, 2017, 2:38 p.m. UTC | #2
Quoting Mika Kuoppala (2017-10-27 15:04:21)
> There is a possibility on gen9 hardware to miss the forcewake ack
> message. The recommended workaround is to use another free
> bit and toggle it until original bit is successfully acknowledged.
> 
> Some future gen9 revs might or might not fix the underlying issue but
> the fallback to reserve bit dance can be considered as harmless:
> without the ack timeout we never reach the reserve bit forcewake.
> Thus as of now we adopt a blanket approach for all gen9 and leave
> the bypassing the reserve bit approach for future patches if
> corresponding hw revisions do appear.
> 
> Commit 83e3337204b2 ("drm/i915: Increase maximum polling time to 50ms
> for forcewake request/clear ack") did increase the forcewake timeout.
> If the issue was a delayed ack, future work could include finding
> a suitable timeout value both for primary ack and reserve toggle
> to reduce the worst case latency.
> 
> v2: use bit 15, naming, comment (Chris), only wait fallback ack
> 
> References: HSDES #1604254524
> References: https://bugs.freedesktop.org/show_bug.cgi?id=102051
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h     |   5 +-
>  drivers/gpu/drm/i915/intel_uncore.c | 141 +++++++++++++++++++++++++++++++++---
>  2 files changed, 134 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8c775e96b4e4..f0f8f6059652 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7774,8 +7774,9 @@ enum {
>  #define  FORCEWAKE_ACK_MEDIA_GEN9              _MMIO(0x0D88)
>  #define  FORCEWAKE_ACK_RENDER_GEN9             _MMIO(0x0D84)
>  #define  FORCEWAKE_ACK_BLITTER_GEN9            _MMIO(0x130044)
> -#define   FORCEWAKE_KERNEL                     0x1
> -#define   FORCEWAKE_USER                       0x2
> +#define   FORCEWAKE_KERNEL                     BIT(0)
> +#define   FORCEWAKE_USER                       BIT(1)
> +#define   FORCEWAKE_KERNEL_FALLBACK            BIT(15)
>  #define  FORCEWAKE_MT_ACK                      _MMIO(0x130040)
>  #define  ECOBUS                                        _MMIO(0xa180)
>  #define    FORCEWAKE_MT_ENABLE                 (1<<5)
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 96ee6b2754be..588ae35c8c9a 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -69,17 +69,108 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
>                                HRTIMER_MODE_REL);
>  }
>  
> +static inline int
> +__wait_for_ack(const struct drm_i915_private *i915,
> +              const struct intel_uncore_forcewake_domain *d,
> +              const u32 ack,
> +              const u32 value)
> +{
> +       return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == value,
> +                              FORCEWAKE_ACK_TIMEOUT_MS);
> +}
> +
> +

Double \n

> +static inline int
> +wait_ack_clear(const struct drm_i915_private *i915,
> +              const struct intel_uncore_forcewake_domain *d,
> +              const u32 ack)
> +{
> +       return __wait_for_ack(i915, d, ack, 0);
> +}
> +
> +static inline int
> +wait_ack_set(const struct drm_i915_private *i915,
> +            const struct intel_uncore_forcewake_domain *d,
> +            const u32 ack)
> +{
> +       return __wait_for_ack(i915, d, ack, ack);
> +}
> +
>  static inline void
>  fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
>                          const struct intel_uncore_forcewake_domain *d)
>  {
> -       if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
> -                            FORCEWAKE_KERNEL) == 0,
> -                           FORCEWAKE_ACK_TIMEOUT_MS))
> +       if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
>                 DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
>                           intel_uncore_forcewake_domain_to_str(d->id));
>  }
>  
> +static int
> +wait_ack_with_fallback(const struct drm_i915_private *i915,
> +                      const struct intel_uncore_forcewake_domain *d,
> +                      const u32 ack,
> +                      const u32 value)
> +{
> +       int ret;
> +
> +       /*
> +        * There is a possibility of driver's wake request colliding
> +        * with hardware's own wake requests and that can cause
> +        * hardware to not deliver the driver's ack message.
> +        *
> +        * Use a fallback bit toggle to kick the gpu state machine
> +        * in hopes that the original ack will be delivered along with
> +        * the fallback ack.
> +        *
> +        * This workaround is described in HSDES #1604254524
> +        */
> +
> +       wait_ack_clear(i915, d, FORCEWAKE_KERNEL_FALLBACK);
> +       __raw_i915_write32(i915, d->reg_set,
> +                          _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL_FALLBACK));
> +       wait_ack_set(i915, d, FORCEWAKE_KERNEL_FALLBACK);

My mind boggles when it comes to thinking how this interacts with a
second failure of the same type.  For that level of paranoia I think
you really do need to recurse onto the next fallback bit... (And then
unwind the acks all the way back.) If the bug happens twice, the
ack_clear following the clear is shortcircuited, so the subsequent
write+ack_set is ill-defined, and may not even be noticed.

Ok, I can see how this would work if the register was updated on edge
events, and one of those went missing. On the second ack, the refresh of
the register would see both bits set.

I think you want to insert a variable udelay (say 10*pass) between the
FALLBACK write + wait_ack_set(). I would also move the loops together to
make that pass known.

> +
> +       ret = (__raw_i915_read32(i915, d->reg_ack) & ack) == value;
> +
> +       __raw_i915_write32(i915, d->reg_set,
> +                          _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL_FALLBACK));
> +
> +       return ret;
> +}
> +
> +enum ack_type {
> +       ACK_CLEAR = 0,
> +       ACK_SET
> +};
> +
> +static int
> +fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915,
> +                                const struct intel_uncore_forcewake_domain *d,
> +                                const enum ack_type type)
> +{
> +       int retry = 10;
> +       int ret;
> +
> +       do {
> +               ret = wait_ack_with_fallback(i915, d, FORCEWAKE_KERNEL,
> +                                            type == ACK_SET ?
> +                                            FORCEWAKE_KERNEL : 0);

/me channeling Joonas

Stick that ternary into a local for readability.

> +       } while (ret && --retry);

Since this is the fallback path, we can afford a DRM_DEBUG_DRIVER here.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8c775e96b4e4..f0f8f6059652 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7774,8 +7774,9 @@  enum {
 #define  FORCEWAKE_ACK_MEDIA_GEN9		_MMIO(0x0D88)
 #define  FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0x0D84)
 #define  FORCEWAKE_ACK_BLITTER_GEN9		_MMIO(0x130044)
-#define   FORCEWAKE_KERNEL			0x1
-#define   FORCEWAKE_USER			0x2
+#define   FORCEWAKE_KERNEL			BIT(0)
+#define   FORCEWAKE_USER			BIT(1)
+#define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
 #define  FORCEWAKE_MT_ACK			_MMIO(0x130040)
 #define  ECOBUS					_MMIO(0xa180)
 #define    FORCEWAKE_MT_ENABLE			(1<<5)
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 96ee6b2754be..588ae35c8c9a 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -69,17 +69,108 @@  fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
 			       HRTIMER_MODE_REL);
 }
 
+static inline int
+__wait_for_ack(const struct drm_i915_private *i915,
+	       const struct intel_uncore_forcewake_domain *d,
+	       const u32 ack,
+	       const u32 value)
+{
+	return wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) & ack) == value,
+			       FORCEWAKE_ACK_TIMEOUT_MS);
+}
+
+
+static inline int
+wait_ack_clear(const struct drm_i915_private *i915,
+	       const struct intel_uncore_forcewake_domain *d,
+	       const u32 ack)
+{
+	return __wait_for_ack(i915, d, ack, 0);
+}
+
+static inline int
+wait_ack_set(const struct drm_i915_private *i915,
+	     const struct intel_uncore_forcewake_domain *d,
+	     const u32 ack)
+{
+	return __wait_for_ack(i915, d, ack, ack);
+}
+
 static inline void
 fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
 			 const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
-			     FORCEWAKE_KERNEL) == 0,
-			    FORCEWAKE_ACK_TIMEOUT_MS))
+	if (wait_ack_clear(i915, d, FORCEWAKE_KERNEL))
 		DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
 }
 
+static int
+wait_ack_with_fallback(const struct drm_i915_private *i915,
+		       const struct intel_uncore_forcewake_domain *d,
+		       const u32 ack,
+		       const u32 value)
+{
+	int ret;
+
+	/*
+	 * There is a possibility of driver's wake request colliding
+	 * with hardware's own wake requests and that can cause
+	 * hardware to not deliver the driver's ack message.
+	 *
+	 * Use a fallback bit toggle to kick the gpu state machine
+	 * in hopes that the original ack will be delivered along with
+	 * the fallback ack.
+	 *
+	 * This workaround is described in HSDES #1604254524
+	 */
+
+	wait_ack_clear(i915, d, FORCEWAKE_KERNEL_FALLBACK);
+	__raw_i915_write32(i915, d->reg_set,
+			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL_FALLBACK));
+	wait_ack_set(i915, d, FORCEWAKE_KERNEL_FALLBACK);
+
+	ret = (__raw_i915_read32(i915, d->reg_ack) & ack) == value;
+
+	__raw_i915_write32(i915, d->reg_set,
+			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL_FALLBACK));
+
+	return ret;
+}
+
+enum ack_type {
+	ACK_CLEAR = 0,
+	ACK_SET
+};
+
+static int
+fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915,
+				 const struct intel_uncore_forcewake_domain *d,
+				 const enum ack_type type)
+{
+	int retry = 10;
+	int ret;
+
+	do {
+		ret = wait_ack_with_fallback(i915, d, FORCEWAKE_KERNEL,
+					     type == ACK_SET ?
+					     FORCEWAKE_KERNEL : 0);
+	} while (ret && --retry);
+
+	return ret;
+}
+
+static inline void
+fw_domain_wait_ack_clear_fallback(const struct drm_i915_private *i915,
+				  const struct intel_uncore_forcewake_domain *d)
+{
+	if (likely(!wait_ack_clear(i915, d, FORCEWAKE_KERNEL)))
+		return;
+
+	if (fw_domain_wait_ack_with_fallback(i915, d, ACK_CLEAR))
+		fw_domain_wait_ack_clear(i915, d);
+}
+
 static inline void
 fw_domain_get(struct drm_i915_private *i915,
 	      const struct intel_uncore_forcewake_domain *d)
@@ -88,17 +179,26 @@  fw_domain_get(struct drm_i915_private *i915,
 }
 
 static inline void
-fw_domain_wait_ack(const struct drm_i915_private *i915,
-		   const struct intel_uncore_forcewake_domain *d)
+fw_domain_wait_ack_set(const struct drm_i915_private *i915,
+		       const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
-			     FORCEWAKE_KERNEL),
-			    FORCEWAKE_ACK_TIMEOUT_MS))
+	if (wait_ack_set(i915, d, FORCEWAKE_KERNEL))
 		DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
 }
 
 static inline void
+fw_domain_wait_ack_set_fallback(const struct drm_i915_private *i915,
+				const struct intel_uncore_forcewake_domain *d)
+{
+	if (likely(!wait_ack_set(i915, d, FORCEWAKE_KERNEL)))
+		return;
+
+	if (fw_domain_wait_ack_with_fallback(i915, d, ACK_SET))
+		fw_domain_wait_ack_set(i915, d);
+}
+
+static inline void
 fw_domain_put(const struct drm_i915_private *i915,
 	      const struct intel_uncore_forcewake_domain *d)
 {
@@ -119,7 +219,27 @@  fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
 	}
 
 	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
-		fw_domain_wait_ack(i915, d);
+		fw_domain_wait_ack_set(i915, d);
+
+	i915->uncore.fw_domains_active |= fw_domains;
+}
+
+static void
+fw_domains_get_with_fallback(struct drm_i915_private *i915,
+			     enum forcewake_domains fw_domains)
+{
+	struct intel_uncore_forcewake_domain *d;
+	unsigned int tmp;
+
+	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
+
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
+		fw_domain_wait_ack_clear_fallback(i915, d);
+		fw_domain_get(i915, d);
+	}
+
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
+		fw_domain_wait_ack_set_fallback(i915, d);
 
 	i915->uncore.fw_domains_active |= fw_domains;
 }
@@ -1142,7 +1262,8 @@  static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	}
 
 	if (INTEL_GEN(dev_priv) >= 9) {
-		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
+		dev_priv->uncore.funcs.force_wake_get =
+			fw_domains_get_with_fallback;
 		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_RENDER_GEN9,