diff mbox

drm/i915: Only grab correct forcewake for the engine with execlists

Message ID 1459954195-19807-1-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin April 6, 2016, 2:49 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Rather than blindly waking up all forcewake domains on command
submission, we can teach each engine what is (or are) the correct
one to take.

On platforms with multiple forcewake domains like VLV, CHV, SKL
and BXT, this has the potential of lowering the GPU and CPU power
use and submission latency.

To implement it, we extract the code which holds the built in
knowledge on which forcewake domains are applicable for each
registers from the MMIO accessors into separate macros, and
implement two new functions named intel_reg_read_fw_domains and
intel_reg_read_fw_domains.

These enables callers, in this case the execlists engine setup
code, to query which forcewake domains are relevant per engine
on the currently running platform.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |   6 +
 drivers/gpu/drm/i915/intel_lrc.c        |  18 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
 drivers/gpu/drm/i915/intel_uncore.c     | 306 +++++++++++++++++++++-----------
 4 files changed, 226 insertions(+), 105 deletions(-)

Comments

Chris Wilson April 6, 2016, 3:12 p.m. UTC | #1
On Wed, Apr 06, 2016 at 03:49:55PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Rather than blindly waking up all forcewake domains on command
> submission, we can teach each engine what is (or are) the correct
> one to take.
> 
> On platforms with multiple forcewake domains like VLV, CHV, SKL
> and BXT, this has the potential of lowering the GPU and CPU power
> use and submission latency.
> 
> To implement it, we extract the code which holds the built in
> knowledge on which forcewake domains are applicable for each
> registers from the MMIO accessors into separate macros, and
> implement two new functions named intel_reg_read_fw_domains and
> intel_reg_read_fw_domains.
> 
> These enables callers, in this case the execlists engine setup
> code, to query which forcewake domains are relevant per engine
> on the currently running platform.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |   6 +
>  drivers/gpu/drm/i915/intel_lrc.c        |  18 +-
>  drivers/gpu/drm/i915/intel_ringbuffer.h |   1 +
>  drivers/gpu/drm/i915/intel_uncore.c     | 306 +++++++++++++++++++++-----------
>  4 files changed, 226 insertions(+), 105 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a1f78f275c55..311217e7f917 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -633,6 +633,12 @@ enum forcewake_domains {
>  			 FORCEWAKE_MEDIA)
>  };
>  
> +enum forcewake_domains
> +intel_reg_read_fw_domains(struct drm_i915_private *dev_priv, i915_reg_t reg);
> +
> +enum forcewake_domains
> +intel_reg_write_fw_domains(struct drm_i915_private *dev_priv, i915_reg_t reg);
> +
>  struct intel_uncore_funcs {
>  	void (*force_wake_get)(struct drm_i915_private *dev_priv,
>  							enum forcewake_domains domains);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index a1db6a02cf23..cac387f38cf6 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -418,6 +418,7 @@ static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
>  				      struct drm_i915_gem_request *rq1)
>  {
>  	struct drm_i915_private *dev_priv = rq0->i915;
> +	unsigned int fw_domains = rq0->engine->fw_domains_elsp;

So I was thinking this was overly specific and that no hw engineer would
ever split the block of ring registers into separate power domains, and
then I realised they already had with the shadow_regs.

The only other place where we could make semantic use of this is in
intel_ringbuffer.c:init_ring_common. We don't make use of the block fw
put/get anywhere else - the only other candidate I can think of right
now would be gen6 bsd legacy tail writes. That is a block of register
writes were marking the intent to stay awake through the entire sequence
would be worth it.

> +#define __gen6_reg_read_fw_domains(offset) \
> +({ \
> +	enum forcewake_domains __fwd; \
> +	if (NEEDS_FORCE_WAKE(offset)) \
> +		__fwd = FORCEWAKE_RENDER; \
> +	else \
> +		__fwd = 0; \
> +	__fwd; \
> +})

[snip]
This split out looks right, but it is probably worth doing as a seperate
step and checking for changes in objdump. Just to be paranoid that we
didn't make an unexpected modification.

> +enum forcewake_domains
> +intel_reg_read_fw_domains(struct drm_i915_private *dev_priv, i915_reg_t reg)
> +{
> +	if (intel_vgpu_active(dev_priv->dev))
> +		return 0;
> +
> +	switch (INTEL_INFO(dev_priv)->gen) {
> +	case 9:
> +		return __gen9_reg_read_fw_domains(i915_mmio_reg_offset(reg));
> +	case 8:
> +		if (IS_CHERRYVIEW(dev_priv))
> +			return __chv_reg_read_fw_domains(i915_mmio_reg_offset(reg));
> +		else
> +			return __gen6_reg_read_fw_domains(i915_mmio_reg_offset(reg));
> +	case 7:
> +	case 6:
> +		if (IS_VALLEYVIEW(dev_priv))
> +			return __vlv_reg_read_fw_domains(i915_mmio_reg_offset(reg));
> +		else
> +			return __gen6_reg_read_fw_domains(i915_mmio_reg_offset(reg));
> +	default:
> +		/* It is a bug to think about forcewake on older platforms. */

Yes, but we might as well report them correctly as 0 and not throw a
warning. You are calling this an intel_() function, and not gen6_()
after all (that is inviting everybody to use it).

default:
> +		MISSING_CASE(INTEL_INFO(dev_priv)->gen);

case 5: /* forcewake was introduced with gen6 */
case 4:
case 3:
case 2:

> +		return 0;

A nice touch here would be to
WARN_ON(fw_domains & ~dev_priv->uncore.forcewake_domains);

R-b on the engine->fw_domains_*, a-b on the split, but let's try to use
the compiler to our advantage on that step.
-Chris
Chris Wilson April 6, 2016, 3:33 p.m. UTC | #2
On Wed, Apr 06, 2016 at 03:29:37PM -0000, Patchwork wrote:
> == Series Details ==
> 
> Series: drm/i915: Only grab correct forcewake for the engine with execlists
> URL   : https://patchwork.freedesktop.org/series/5375/
> State : success
> 
> == Summary ==
> 
> Series 5375v1 drm/i915: Only grab correct forcewake for the engine with execlists
> http://patchwork.freedesktop.org/api/1.0/series/5375/revisions/1/mbox/
> 
> Test drv_module_reload_basic:
>                 dmesg-warn -> PASS       (bsw-nuc-2)
> Test gem_ctx_param_basic:
>         Subgroup invalid-ctx-set:
>                 dmesg-warn -> PASS       (bsw-nuc-2)
> Test gem_storedw_loop:
>         Subgroup basic-blt:
>                 dmesg-fail -> PASS       (bsw-nuc-2)
>         Subgroup basic-bsd:
>                 skip       -> PASS       (bsw-nuc-2)
> Test kms_addfb_basic:
>         Subgroup bad-pitch-128:
>                 dmesg-warn -> PASS       (bsw-nuc-2)
> Test kms_flip:
>         Subgroup basic-flip-vs-dpms:
>                 dmesg-warn -> PASS       (bsw-nuc-2)
>         Subgroup basic-flip-vs-modeset:
>                 dmesg-warn -> PASS       (ilk-hp8440p) UNSTABLE
> Test kms_pipe_crc_basic:
>         Subgroup suspend-read-crc-pipe-b:
>                 incomplete -> PASS       (hsw-gt2)

Hmm, intriguing. I wonder if fortune holds if we send the patch again...
-Chris
Chris Wilson April 6, 2016, 11:45 p.m. UTC | #3
On Wed, Apr 06, 2016 at 03:49:55PM +0100, Tvrtko Ursulin wrote:
> +static const i915_reg_t gen9_shadowed_regs[] = {
> +	RING_TAIL(RENDER_RING_BASE),
> +	RING_TAIL(GEN6_BSD_RING_BASE),
> +	RING_TAIL(VEBOX_RING_BASE),
> +	RING_TAIL(BLT_RING_BASE),

It occurs to me that we will never use these in anger and so can drop
from the search table.

> +	FORCEWAKE_BLITTER_GEN9,
> +	FORCEWAKE_RENDER_GEN9,
> +	FORCEWAKE_MEDIA_GEN9,

These always use raw accessors (internal to intel_uncore.c).

> +	GEN6_RPNSWREQ,

Always written in conjunction with other registers notable by their
absence from this table

> +	GEN6_RC_VIDEO_FREQ,

Used once per load/resume/reset, whoop-a-do.

> +	/* TODO: Other registers are not yet used */

I hope there are more magic registers that we actually use.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a1f78f275c55..311217e7f917 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -633,6 +633,12 @@  enum forcewake_domains {
 			 FORCEWAKE_MEDIA)
 };
 
+enum forcewake_domains
+intel_reg_read_fw_domains(struct drm_i915_private *dev_priv, i915_reg_t reg);
+
+enum forcewake_domains
+intel_reg_write_fw_domains(struct drm_i915_private *dev_priv, i915_reg_t reg);
+
 struct intel_uncore_funcs {
 	void (*force_wake_get)(struct drm_i915_private *dev_priv,
 							enum forcewake_domains domains);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a1db6a02cf23..cac387f38cf6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -418,6 +418,7 @@  static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
 				      struct drm_i915_gem_request *rq1)
 {
 	struct drm_i915_private *dev_priv = rq0->i915;
+	unsigned int fw_domains = rq0->engine->fw_domains_elsp;
 
 	execlists_update_context(rq0);
 
@@ -425,11 +426,11 @@  static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
 		execlists_update_context(rq1);
 
 	spin_lock_irq(&dev_priv->uncore.lock);
-	intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
 
 	execlists_elsp_write(rq0, rq1);
 
-	intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
 	spin_unlock_irq(&dev_priv->uncore.lock);
 }
 
@@ -552,7 +553,7 @@  static void intel_lrc_irq_handler(unsigned long data)
 	unsigned int csb_read = 0, i;
 	unsigned int submit_contexts = 0;
 
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_get(dev_priv, engine->fw_domains_csb);
 
 	status_pointer = I915_READ_FW(RING_CONTEXT_STATUS_PTR(engine));
 
@@ -577,7 +578,7 @@  static void intel_lrc_irq_handler(unsigned long data)
 		      _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
 				    engine->next_context_status_buffer << 8));
 
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_put(dev_priv, engine->fw_domains_csb);
 
 	spin_lock(&engine->execlist_lock);
 
@@ -2077,7 +2078,8 @@  logical_ring_default_irqs(struct intel_engine_cs *engine, unsigned shift)
 static int
 logical_ring_init(struct drm_device *dev, struct intel_engine_cs *engine)
 {
-	struct intel_context *dctx = to_i915(dev)->kernel_context;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_context *dctx = dev_priv->kernel_context;
 	int ret;
 
 	/* Intentionally left blank. */
@@ -2099,6 +2101,12 @@  logical_ring_init(struct drm_device *dev, struct intel_engine_cs *engine)
 
 	logical_ring_init_platform_invariants(engine);
 
+	engine->fw_domains_elsp =
+		intel_reg_write_fw_domains(dev_priv, RING_ELSP(engine));
+	engine->fw_domains_csb =
+		intel_reg_write_fw_domains(dev_priv,
+					   RING_CONTEXT_STATUS_PTR(engine));
+
 	ret = i915_cmd_parser_init_ring(engine);
 	if (ret)
 		goto error;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 18074ab55f61..fd248cde7164 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -270,6 +270,7 @@  struct  intel_engine_cs {
 	spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
 	struct list_head execlist_queue;
 	struct list_head execlist_retired_req_list;
+	unsigned int fw_domains_elsp, fw_domains_csb;
 	unsigned int next_context_status_buffer;
 	unsigned int idle_lite_restore_wa;
 	bool disable_lite_restore_wa;
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index fbc1d215ca67..d0ec28626012 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -551,6 +551,16 @@  void assert_forcewakes_inactive(struct drm_i915_private *dev_priv)
 /* We give fast paths for the really cool registers */
 #define NEEDS_FORCE_WAKE(reg) ((reg) < 0x40000)
 
+#define __gen6_reg_read_fw_domains(offset) \
+({ \
+	enum forcewake_domains __fwd; \
+	if (NEEDS_FORCE_WAKE(offset)) \
+		__fwd = FORCEWAKE_RENDER; \
+	else \
+		__fwd = 0; \
+	__fwd; \
+})
+
 #define REG_RANGE(reg, start, end) ((reg) >= (start) && (reg) < (end))
 
 #define FORCEWAKE_VLV_RENDER_RANGE_OFFSET(reg) \
@@ -564,6 +574,49 @@  void assert_forcewakes_inactive(struct drm_i915_private *dev_priv)
 	 REG_RANGE((reg), 0x22000, 0x24000) || \
 	 REG_RANGE((reg), 0x30000, 0x40000))
 
+#define __vlv_reg_read_fw_domains(offset) \
+({ \
+	enum forcewake_domains __fwd = 0; \
+	if (!NEEDS_FORCE_WAKE(offset)) \
+		__fwd = 0; \
+	else if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_RENDER; \
+	else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_MEDIA; \
+	__fwd; \
+})
+
+static const i915_reg_t gen8_shadowed_regs[] = {
+	FORCEWAKE_MT,
+	GEN6_RPNSWREQ,
+	GEN6_RC_VIDEO_FREQ,
+	RING_TAIL(RENDER_RING_BASE),
+	RING_TAIL(GEN6_BSD_RING_BASE),
+	RING_TAIL(VEBOX_RING_BASE),
+	RING_TAIL(BLT_RING_BASE),
+	/* TODO: Other registers are not yet used */
+};
+
+static bool is_gen8_shadowed(u32 offset)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(gen8_shadowed_regs); i++)
+		if (offset == gen8_shadowed_regs[i].reg)
+			return true;
+
+	return false;
+}
+
+#define __gen8_reg_write_fw_domains(offset) \
+({ \
+	enum forcewake_domains __fwd; \
+	if (NEEDS_FORCE_WAKE(offset) && !is_gen8_shadowed(offset)) \
+		__fwd = FORCEWAKE_RENDER; \
+	else \
+		__fwd = 0; \
+	__fwd; \
+})
+
 #define FORCEWAKE_CHV_RENDER_RANGE_OFFSET(reg) \
 	(REG_RANGE((reg), 0x2000, 0x4000) || \
 	 REG_RANGE((reg), 0x5200, 0x8000) || \
@@ -586,6 +639,34 @@  void assert_forcewakes_inactive(struct drm_i915_private *dev_priv)
 	 REG_RANGE((reg), 0x9000, 0xB000) || \
 	 REG_RANGE((reg), 0xF000, 0x10000))
 
+#define __chv_reg_read_fw_domains(offset) \
+({ \
+	enum forcewake_domains __fwd = 0; \
+	if (!NEEDS_FORCE_WAKE(offset)) \
+		__fwd = 0; \
+	else if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_RENDER; \
+	else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_MEDIA; \
+	else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
+	__fwd; \
+})
+
+#define __chv_reg_write_fw_domains(offset) \
+({ \
+	enum forcewake_domains __fwd = 0; \
+	if (!NEEDS_FORCE_WAKE(offset) || is_gen8_shadowed(offset)) \
+		__fwd = 0; \
+	else if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_RENDER; \
+	else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_MEDIA; \
+	else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
+	__fwd; \
+})
+
 #define FORCEWAKE_GEN9_UNCORE_RANGE_OFFSET(reg) \
 	REG_RANGE((reg), 0xB00,  0x2000)
 
@@ -618,6 +699,64 @@  void assert_forcewakes_inactive(struct drm_i915_private *dev_priv)
 	 !FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(reg) && \
 	 !FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(reg))
 
+#define SKL_NEEDS_FORCE_WAKE(reg) \
+	((reg) < 0x40000 && !FORCEWAKE_GEN9_UNCORE_RANGE_OFFSET(reg))
+
+#define __gen9_reg_read_fw_domains(offset) \
+({ \
+	enum forcewake_domains __fwd; \
+	if (!SKL_NEEDS_FORCE_WAKE(offset)) \
+		__fwd = 0; \
+	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_RENDER; \
+	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_MEDIA; \
+	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
+	else \
+		__fwd = FORCEWAKE_BLITTER; \
+	__fwd; \
+})
+
+static const i915_reg_t gen9_shadowed_regs[] = {
+	RING_TAIL(RENDER_RING_BASE),
+	RING_TAIL(GEN6_BSD_RING_BASE),
+	RING_TAIL(VEBOX_RING_BASE),
+	RING_TAIL(BLT_RING_BASE),
+	FORCEWAKE_BLITTER_GEN9,
+	FORCEWAKE_RENDER_GEN9,
+	FORCEWAKE_MEDIA_GEN9,
+	GEN6_RPNSWREQ,
+	GEN6_RC_VIDEO_FREQ,
+	/* TODO: Other registers are not yet used */
+};
+
+static bool is_gen9_shadowed(u32 offset)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(gen9_shadowed_regs); i++)
+		if (offset == gen9_shadowed_regs[i].reg)
+			return true;
+
+	return false;
+}
+
+#define __gen9_reg_write_fw_domains(offset) \
+({ \
+	enum forcewake_domains __fwd; \
+	if (!SKL_NEEDS_FORCE_WAKE(offset) || is_gen9_shadowed(offset)) \
+		__fwd = 0; \
+	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_RENDER; \
+	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_MEDIA; \
+	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(offset)) \
+		__fwd = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
+	else \
+		__fwd = FORCEWAKE_BLITTER; \
+	__fwd; \
+})
+
 static void
 ilk_dummy_write(struct drm_i915_private *dev_priv)
 {
@@ -743,9 +882,11 @@  static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
 #define __gen6_read(x) \
 static u##x \
 gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
+	enum forcewake_domains fw_engine; \
 	GEN6_READ_HEADER(x); \
-	if (NEEDS_FORCE_WAKE(offset)) \
-		__force_wake_auto(dev_priv, FORCEWAKE_RENDER); \
+	fw_engine = __gen6_reg_read_fw_domains(offset); \
+	if (fw_engine) \
+		__force_wake_auto(dev_priv, fw_engine); \
 	val = __raw_i915_read##x(dev_priv, reg); \
 	GEN6_READ_FOOTER; \
 }
@@ -753,14 +894,9 @@  gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 #define __vlv_read(x) \
 static u##x \
 vlv_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
-	enum forcewake_domains fw_engine = 0; \
+	enum forcewake_domains fw_engine; \
 	GEN6_READ_HEADER(x); \
-	if (!NEEDS_FORCE_WAKE(offset)) \
-		fw_engine = 0; \
-	else if (FORCEWAKE_VLV_RENDER_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_RENDER; \
-	else if (FORCEWAKE_VLV_MEDIA_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_MEDIA; \
+	fw_engine = __vlv_reg_read_fw_domains(offset); \
 	if (fw_engine) \
 		__force_wake_auto(dev_priv, fw_engine); \
 	val = __raw_i915_read##x(dev_priv, reg); \
@@ -770,40 +906,21 @@  vlv_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 #define __chv_read(x) \
 static u##x \
 chv_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
-	enum forcewake_domains fw_engine = 0; \
+	enum forcewake_domains fw_engine; \
 	GEN6_READ_HEADER(x); \
-	if (!NEEDS_FORCE_WAKE(offset)) \
-		fw_engine = 0; \
-	else if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_RENDER; \
-	else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_MEDIA; \
-	else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
+	fw_engine = __chv_reg_read_fw_domains(offset); \
 	if (fw_engine) \
 		__force_wake_auto(dev_priv, fw_engine); \
 	val = __raw_i915_read##x(dev_priv, reg); \
 	GEN6_READ_FOOTER; \
 }
 
-#define SKL_NEEDS_FORCE_WAKE(reg) \
-	((reg) < 0x40000 && !FORCEWAKE_GEN9_UNCORE_RANGE_OFFSET(reg))
-
 #define __gen9_read(x) \
 static u##x \
 gen9_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \
 	enum forcewake_domains fw_engine; \
 	GEN6_READ_HEADER(x); \
-	if (!SKL_NEEDS_FORCE_WAKE(offset)) \
-		fw_engine = 0; \
-	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_RENDER; \
-	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_MEDIA; \
-	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
-	else \
-		fw_engine = FORCEWAKE_BLITTER; \
+	fw_engine = __gen9_reg_read_fw_domains(offset); \
 	if (fw_engine) \
 		__force_wake_auto(dev_priv, fw_engine); \
 	val = __raw_i915_read##x(dev_priv, reg); \
@@ -942,34 +1059,14 @@  hsw_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool t
 	GEN6_WRITE_FOOTER; \
 }
 
-static const i915_reg_t gen8_shadowed_regs[] = {
-	FORCEWAKE_MT,
-	GEN6_RPNSWREQ,
-	GEN6_RC_VIDEO_FREQ,
-	RING_TAIL(RENDER_RING_BASE),
-	RING_TAIL(GEN6_BSD_RING_BASE),
-	RING_TAIL(VEBOX_RING_BASE),
-	RING_TAIL(BLT_RING_BASE),
-	/* TODO: Other registers are not yet used */
-};
-
-static bool is_gen8_shadowed(struct drm_i915_private *dev_priv,
-			     i915_reg_t reg)
-{
-	int i;
-	for (i = 0; i < ARRAY_SIZE(gen8_shadowed_regs); i++)
-		if (i915_mmio_reg_equal(reg, gen8_shadowed_regs[i]))
-			return true;
-
-	return false;
-}
-
 #define __gen8_write(x) \
 static void \
 gen8_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
+	enum forcewake_domains fw_engine; \
 	GEN6_WRITE_HEADER; \
-	if (NEEDS_FORCE_WAKE(offset) && !is_gen8_shadowed(dev_priv, reg)) \
-		__force_wake_auto(dev_priv, FORCEWAKE_RENDER); \
+	fw_engine = __gen8_reg_write_fw_domains(offset); \
+	if (fw_engine) \
+		__force_wake_auto(dev_priv, fw_engine); \
 	__raw_i915_write##x(dev_priv, reg, val); \
 	GEN6_WRITE_FOOTER; \
 }
@@ -977,64 +1074,22 @@  gen8_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool
 #define __chv_write(x) \
 static void \
 chv_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \
-	enum forcewake_domains fw_engine = 0; \
+	enum forcewake_domains fw_engine; \
 	GEN6_WRITE_HEADER; \
-	if (!NEEDS_FORCE_WAKE(offset) || \
-	    is_gen8_shadowed(dev_priv, reg)) \
-		fw_engine = 0; \
-	else if (FORCEWAKE_CHV_RENDER_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_RENDER; \
-	else if (FORCEWAKE_CHV_MEDIA_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_MEDIA; \
-	else if (FORCEWAKE_CHV_COMMON_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
+	fw_engine = __chv_reg_write_fw_domains(offset); \
 	if (fw_engine) \
 		__force_wake_auto(dev_priv, fw_engine); \
 	__raw_i915_write##x(dev_priv, reg, val); \
 	GEN6_WRITE_FOOTER; \
 }
 
-static const i915_reg_t gen9_shadowed_regs[] = {
-	RING_TAIL(RENDER_RING_BASE),
-	RING_TAIL(GEN6_BSD_RING_BASE),
-	RING_TAIL(VEBOX_RING_BASE),
-	RING_TAIL(BLT_RING_BASE),
-	FORCEWAKE_BLITTER_GEN9,
-	FORCEWAKE_RENDER_GEN9,
-	FORCEWAKE_MEDIA_GEN9,
-	GEN6_RPNSWREQ,
-	GEN6_RC_VIDEO_FREQ,
-	/* TODO: Other registers are not yet used */
-};
-
-static bool is_gen9_shadowed(struct drm_i915_private *dev_priv,
-			     i915_reg_t reg)
-{
-	int i;
-	for (i = 0; i < ARRAY_SIZE(gen9_shadowed_regs); i++)
-		if (i915_mmio_reg_equal(reg, gen9_shadowed_regs[i]))
-			return true;
-
-	return false;
-}
-
 #define __gen9_write(x) \
 static void \
 gen9_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, \
 		bool trace) { \
 	enum forcewake_domains fw_engine; \
 	GEN6_WRITE_HEADER; \
-	if (!SKL_NEEDS_FORCE_WAKE(offset) || \
-	    is_gen9_shadowed(dev_priv, reg)) \
-		fw_engine = 0; \
-	else if (FORCEWAKE_GEN9_RENDER_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_RENDER; \
-	else if (FORCEWAKE_GEN9_MEDIA_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_MEDIA; \
-	else if (FORCEWAKE_GEN9_COMMON_RANGE_OFFSET(offset)) \
-		fw_engine = FORCEWAKE_RENDER | FORCEWAKE_MEDIA; \
-	else \
-		fw_engine = FORCEWAKE_BLITTER; \
+	fw_engine = __gen9_reg_write_fw_domains(offset); \
 	if (fw_engine) \
 		__force_wake_auto(dev_priv, fw_engine); \
 	__raw_i915_write##x(dev_priv, reg, val); \
@@ -1715,3 +1770,54 @@  intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv)
 
 	return false;
 }
+
+enum forcewake_domains
+intel_reg_read_fw_domains(struct drm_i915_private *dev_priv, i915_reg_t reg)
+{
+	if (intel_vgpu_active(dev_priv->dev))
+		return 0;
+
+	switch (INTEL_INFO(dev_priv)->gen) {
+	case 9:
+		return __gen9_reg_read_fw_domains(i915_mmio_reg_offset(reg));
+	case 8:
+		if (IS_CHERRYVIEW(dev_priv))
+			return __chv_reg_read_fw_domains(i915_mmio_reg_offset(reg));
+		else
+			return __gen6_reg_read_fw_domains(i915_mmio_reg_offset(reg));
+	case 7:
+	case 6:
+		if (IS_VALLEYVIEW(dev_priv))
+			return __vlv_reg_read_fw_domains(i915_mmio_reg_offset(reg));
+		else
+			return __gen6_reg_read_fw_domains(i915_mmio_reg_offset(reg));
+	default:
+		/* It is a bug to think about forcewake on older platforms. */
+		MISSING_CASE(INTEL_INFO(dev_priv)->gen);
+		return 0;
+	}
+}
+
+enum forcewake_domains
+intel_reg_write_fw_domains(struct drm_i915_private *dev_priv, i915_reg_t reg)
+{
+	if (intel_vgpu_active(dev_priv->dev))
+		return 0;
+
+	switch (INTEL_INFO(dev_priv)->gen) {
+	case 9:
+		return __gen9_reg_write_fw_domains(i915_mmio_reg_offset(reg));
+	case 8:
+		if (IS_CHERRYVIEW(dev_priv))
+			return __chv_reg_write_fw_domains(i915_mmio_reg_offset(reg));
+		else
+			return __gen8_reg_write_fw_domains(i915_mmio_reg_offset(reg));
+	case 7:
+	case 6:
+		return 0;
+	default:
+		/* It is a bug to think about forcewake on older platforms. */
+		MISSING_CASE(INTEL_INFO(dev_priv)->gen);
+		return 0;
+	}
+}