diff mbox

[v4] drm/i915/bxt: Broxton decoupled MMIO

Message ID 1479192015-4247-1-git-send-email-praveen.paneri@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Praveen Paneri Nov. 15, 2016, 6:40 a.m. UTC
Decoupled MMIO is an alternative way to access forcewake domain
registers, which requires less cycles for a single read/write and
avoids frequent software forcewake.
This certainly gives advantage over the forcewake as this new
mechanism “decouples” CPU cycles and allow them to complete even
when GT is in a CPD (frequency change) or C6 state.

This can co-exist with forcewake and we will continue to use forcewake
as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
separately and land into funny situations.

v2:
- Moved platform check out of the function and got rid of duplicate
 functions to find out decoupled power domain (Chris)
- Added a check for forcewake already held and skipped decoupled
 access (Chris)
- Skipped writing 64 bit registers through decoupled MMIO (Chris)

v3:
- Improved commit message with more info on decoupled mmio (Tvrtko)
- Changed decoupled operation to enum and used u32 instead of
 uint_32 data type for register offset (Tvrtko)
- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
- Added lookup table for converting fw_engine to pd_engine (Tvrtko)
- Improved __gen9_decoupled_read and __gen9_decoupled_write
 routines (Tvrtko)

v4:
- Fixed alignment and variable names (Chris)
- Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)

Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
 drivers/gpu/drm/i915/i915_pci.c     |   1 +
 drivers/gpu/drm/i915/i915_reg.h     |   7 +++
 drivers/gpu/drm/i915/intel_uncore.c | 109 ++++++++++++++++++++++++++++++++++++
 4 files changed, 134 insertions(+), 1 deletion(-)

Comments

Tvrtko Ursulin Nov. 15, 2016, 9:36 a.m. UTC | #1
On 15/11/2016 06:40, Praveen Paneri wrote:
> Decoupled MMIO is an alternative way to access forcewake domain
> registers, which requires less cycles for a single read/write and
> avoids frequent software forcewake.
> This certainly gives advantage over the forcewake as this new
> mechanism “decouples” CPU cycles and allow them to complete even
> when GT is in a CPD (frequency change) or C6 state.
>
> This can co-exist with forcewake and we will continue to use forcewake
> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
> separately and land into funny situations.
>
> v2:
> - Moved platform check out of the function and got rid of duplicate
>  functions to find out decoupled power domain (Chris)
> - Added a check for forcewake already held and skipped decoupled
>  access (Chris)
> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>
> v3:
> - Improved commit message with more info on decoupled mmio (Tvrtko)
> - Changed decoupled operation to enum and used u32 instead of
>  uint_32 data type for register offset (Tvrtko)
> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>  routines (Tvrtko)
>
> v4:
> - Fixed alignment and variable names (Chris)
> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>
> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>  drivers/gpu/drm/i915/i915_pci.c     |   1 +
>  drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>  drivers/gpu/drm/i915/intel_uncore.c | 109 ++++++++++++++++++++++++++++++++++++
>  4 files changed, 134 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4e7148a..158f05c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -549,6 +549,18 @@ enum forcewake_domains {
>  #define FW_REG_READ  (1)
>  #define FW_REG_WRITE (2)
>
> +enum decoupled_power_domain {
> +	GEN9_DECOUPLED_PD_BLITTER = 0,
> +	GEN9_DECOUPLED_PD_RENDER,
> +	GEN9_DECOUPLED_PD_MEDIA,
> +	GEN9_DECOUPLED_PD_ALL
> +};
> +
> +enum decoupled_ops {
> +	GEN9_DECOUPLED_OP_WRITE = 0,
> +	GEN9_DECOUPLED_OP_READ
> +};
> +
>  enum forcewake_domains
>  intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>  			       i915_reg_t reg, unsigned int op);
> @@ -683,7 +695,8 @@ struct intel_csr {
>  	func(cursor_needs_physical); \
>  	func(hws_needs_physical); \
>  	func(overlay_needs_physical); \
> -	func(supports_tv)
> +	func(supports_tv); \
> +	func(has_decoupled_mmio)
>
>  struct sseu_dev_info {
>  	u8 slice_mask;
> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
>  #define GT_FREQUENCY_MULTIPLIER 50
>  #define GEN9_FREQ_SCALER 3
>
> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
> +		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
> +

Looks like I've missed this before, but Would you mind renaming the dev 
argument to dev_priv?

>  #include "i915_trace.h"
>
>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 70a99ce..fce8e19 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -363,6 +363,7 @@
>  	.has_hw_contexts = 1,
>  	.has_logical_ring_contexts = 1,
>  	.has_guc = 1,
> +	.has_decoupled_mmio = 1,
>  	.ddb_size = 512,
>  	GEN_DEFAULT_PIPEOFFSETS,
>  	IVB_CURSOR_OFFSETS,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 3361d7f..c70c07a 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7342,6 +7342,13 @@ enum {
>  #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>  #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>
> +/* Decoupled MMIO register pair for kernel driver */
> +#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
> +#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
> +#define GEN9_DECOUPLED_DW1_GO			(1<<31)
> +#define GEN9_DECOUPLED_PD_SHIFT			28
> +#define GEN9_DECOUPLED_OP_SHIFT			24
> +
>  /* Per-pipe DDI Function Control */
>  #define _TRANS_DDI_FUNC_CTL_A		0x60400
>  #define _TRANS_DDI_FUNC_CTL_B		0x61400
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index e2b188d..e4c50cb 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset)
>  	__unclaimed_reg_debug(dev_priv, reg, read, before);
>  }
>
> +static const enum decoupled_power_domain fw2dpd_domain[] = {
> +	GEN9_DECOUPLED_PD_RENDER,
> +	GEN9_DECOUPLED_PD_BLITTER,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_MEDIA,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_ALL,
> +	GEN9_DECOUPLED_PD_ALL
> +};
> +
> +/*
> + * Decoupled MMIO access for only 1 DWORD
> + */
> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
> +					 u32 reg,
> +					 enum forcewake_domains fw_domain,
> +					 enum decoupled_ops operation)
> +{
> +	enum decoupled_power_domain dp_domain;
> +	u32 ctrl_reg_data = 0;
> +
> +	dp_domain = fw2dpd_domain[fw_domain - 1];
> +
> +	ctrl_reg_data |= reg;
> +	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> +	ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
> +	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> +	if (wait_for_atomic((__raw_i915_read32(dev_priv,
> +			    GEN9_DECOUPLED_REG0_DW1) &
> +			    GEN9_DECOUPLED_DW1_GO) == 0,
> +			    FORCEWAKE_ACK_TIMEOUT_MS))
> +		DRM_ERROR("Decoupled MMIO wait timed out\n");
> +}
> +
> +static inline u32
> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
> +			     u32 reg,
> +			     enum forcewake_domains fw_domain)
> +{
> +	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
> +				     GEN9_DECOUPLED_OP_READ);
> +
> +	return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
> +}
> +
> +static inline void
> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
> +			    u32 reg, u32 data,
> +			    enum forcewake_domains fw_domain)
> +{
> +
> +	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
> +
> +	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
> +				     GEN9_DECOUPLED_OP_WRITE);
> +}
> +
> +
>  #define GEN2_READ_HEADER(x) \
>  	u##x val = 0; \
>  	assert_rpm_wakelock_held(dev_priv);
> @@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
>  	GEN6_READ_FOOTER; \
>  }
>
> +#define __gen9_decoupled_read(x) \
> +static u##x \
> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
> +		       i915_reg_t reg, bool trace) { \
> +	enum forcewake_domains fw_engine; \
> +	GEN6_READ_HEADER(x); \
> +	fw_engine = __fwtable_reg_read_fw_domains(offset); \
> +	if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
> +		unsigned i; \
> +		u32 *ptr_data = (u32 *) &val; \
> +		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
> +			*ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
> +								 offset, \
> +								 fw_engine); \
> +	} else { \
> +		val = __raw_i915_read##x(dev_priv, reg); \
> +	} \
> +	GEN6_READ_FOOTER; \
> +}
> +
> +__gen9_decoupled_read(32)
> +__gen9_decoupled_read(64)
>  __fwtable_read(8)
>  __fwtable_read(16)
>  __fwtable_read(32)
> @@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
>  	GEN6_WRITE_FOOTER; \
>  }
>
> +#define __gen9_decoupled_write(x) \
> +static void \
> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
> +			i915_reg_t reg, u##x val, \
> +		bool trace) { \
> +	enum forcewake_domains fw_engine; \
> +	GEN6_WRITE_HEADER; \
> +	fw_engine = __fwtable_reg_write_fw_domains(offset); \
> +	if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
> +		__gen9_decoupled_mmio_write(dev_priv, \
> +					    offset, \
> +					    val, \
> +					    fw_engine); \
> +	else \
> +		__raw_i915_write##x(dev_priv, reg, val); \
> +	GEN6_WRITE_FOOTER; \
> +}
> +
> +__gen9_decoupled_write(32)
>  __fwtable_write(8)
>  __fwtable_write(16)
>  __fwtable_write(32)
> @@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv)
>  		ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
>  		ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
>  		ASSIGN_READ_MMIO_VFUNCS(fwtable);
> +		if (HAS_DECOUPLED_MMIO(dev_priv)) {
> +			dev_priv->uncore.funcs.mmio_readl =
> +						gen9_decoupled_read32;
> +			dev_priv->uncore.funcs.mmio_readq =
> +						gen9_decoupled_read64;
> +			dev_priv->uncore.funcs.mmio_writel =
> +						gen9_decoupled_write32;
> +		}
>  		break;
>  	case 8:
>  		if (IS_CHERRYVIEW(dev_priv)) {
>

With the rename fix above:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Chris Wilson Nov. 15, 2016, 10:07 a.m. UTC | #2
On Tue, Nov 15, 2016 at 09:36:34AM +0000, Tvrtko Ursulin wrote:
> 
> On 15/11/2016 06:40, Praveen Paneri wrote:
> >Decoupled MMIO is an alternative way to access forcewake domain
> >registers, which requires less cycles for a single read/write and
> >avoids frequent software forcewake.
> >This certainly gives advantage over the forcewake as this new
> >mechanism “decouples” CPU cycles and allow them to complete even
> >when GT is in a CPD (frequency change) or C6 state.
> >
> >This can co-exist with forcewake and we will continue to use forcewake
> >as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
> >separately and land into funny situations.
> >
> >v2:
> >- Moved platform check out of the function and got rid of duplicate
> > functions to find out decoupled power domain (Chris)
> >- Added a check for forcewake already held and skipped decoupled
> > access (Chris)
> >- Skipped writing 64 bit registers through decoupled MMIO (Chris)
> >
> >v3:
> >- Improved commit message with more info on decoupled mmio (Tvrtko)
> >- Changed decoupled operation to enum and used u32 instead of
> > uint_32 data type for register offset (Tvrtko)
> >- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
> >- Added lookup table for converting fw_engine to pd_engine (Tvrtko)
> >- Improved __gen9_decoupled_read and __gen9_decoupled_write
> > routines (Tvrtko)
> >
> >v4:
> >- Fixed alignment and variable names (Chris)
> >- Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
> >
> >Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
> >Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
> >---
> > drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
> > drivers/gpu/drm/i915/i915_pci.c     |   1 +
> > drivers/gpu/drm/i915/i915_reg.h     |   7 +++
> > drivers/gpu/drm/i915/intel_uncore.c | 109 ++++++++++++++++++++++++++++++++++++
> > 4 files changed, 134 insertions(+), 1 deletion(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index 4e7148a..158f05c 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -549,6 +549,18 @@ enum forcewake_domains {
> > #define FW_REG_READ  (1)
> > #define FW_REG_WRITE (2)
> >
> >+enum decoupled_power_domain {
> >+	GEN9_DECOUPLED_PD_BLITTER = 0,
> >+	GEN9_DECOUPLED_PD_RENDER,
> >+	GEN9_DECOUPLED_PD_MEDIA,
> >+	GEN9_DECOUPLED_PD_ALL
> >+};
> >+
> >+enum decoupled_ops {
> >+	GEN9_DECOUPLED_OP_WRITE = 0,
> >+	GEN9_DECOUPLED_OP_READ
> >+};
> >+
> > enum forcewake_domains
> > intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
> > 			       i915_reg_t reg, unsigned int op);
> >@@ -683,7 +695,8 @@ struct intel_csr {
> > 	func(cursor_needs_physical); \
> > 	func(hws_needs_physical); \
> > 	func(overlay_needs_physical); \
> >-	func(supports_tv)
> >+	func(supports_tv); \
> >+	func(has_decoupled_mmio)
> >
> > struct sseu_dev_info {
> > 	u8 slice_mask;
> >@@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
> > #define GT_FREQUENCY_MULTIPLIER 50
> > #define GEN9_FREQ_SCALER 3
> >
> >+#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
> >+		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
> >+
> 
> Looks like I've missed this before, but Would you mind renaming the
> dev argument to dev_priv?

And whilst you are there, fix up the code to set
info->has_decoupled_mmio = false when santizing.
-Chris
Praveen Paneri Nov. 15, 2016, 10:56 a.m. UTC | #3
Hi Tvrtko,

On Tuesday 15 November 2016 03:06 PM, Tvrtko Ursulin wrote:
>
> On 15/11/2016 06:40, Praveen Paneri wrote:
>> Decoupled MMIO is an alternative way to access forcewake domain
>> registers, which requires less cycles for a single read/write and
>> avoids frequent software forcewake.
>> This certainly gives advantage over the forcewake as this new
>> mechanism “decouples” CPU cycles and allow them to complete even
>> when GT is in a CPD (frequency change) or C6 state.
>>
>> This can co-exist with forcewake and we will continue to use forcewake
>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>> separately and land into funny situations.
>>
>> v2:
>> - Moved platform check out of the function and got rid of duplicate
>>  functions to find out decoupled power domain (Chris)
>> - Added a check for forcewake already held and skipped decoupled
>>  access (Chris)
>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>
>> v3:
>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>> - Changed decoupled operation to enum and used u32 instead of
>>  uint_32 data type for register offset (Tvrtko)
>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>>  routines (Tvrtko)
>>
>> v4:
>> - Fixed alignment and variable names (Chris)
>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>>
>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>>  drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>  drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>  drivers/gpu/drm/i915/intel_uncore.c | 109
>> ++++++++++++++++++++++++++++++++++++
>>  4 files changed, 134 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 4e7148a..158f05c 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -549,6 +549,18 @@ enum forcewake_domains {
>>  #define FW_REG_READ  (1)
>>  #define FW_REG_WRITE (2)
>>
>> +enum decoupled_power_domain {
>> +    GEN9_DECOUPLED_PD_BLITTER = 0,
>> +    GEN9_DECOUPLED_PD_RENDER,
>> +    GEN9_DECOUPLED_PD_MEDIA,
>> +    GEN9_DECOUPLED_PD_ALL
>> +};
>> +
>> +enum decoupled_ops {
>> +    GEN9_DECOUPLED_OP_WRITE = 0,
>> +    GEN9_DECOUPLED_OP_READ
>> +};
>> +
>>  enum forcewake_domains
>>  intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>                     i915_reg_t reg, unsigned int op);
>> @@ -683,7 +695,8 @@ struct intel_csr {
>>      func(cursor_needs_physical); \
>>      func(hws_needs_physical); \
>>      func(overlay_needs_physical); \
>> -    func(supports_tv)
>> +    func(supports_tv); \
>> +    func(has_decoupled_mmio)
>>
>>  struct sseu_dev_info {
>>      u8 slice_mask;
>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
>>  #define GT_FREQUENCY_MULTIPLIER 50
>>  #define GEN9_FREQ_SCALER 3
>>
>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
>> +        && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
>> +
>
> Looks like I've missed this before, but Would you mind renaming the dev
> argument to dev_priv?
>
>>  #include "i915_trace.h"
>>
>>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private
>> *dev_priv)
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c
>> b/drivers/gpu/drm/i915/i915_pci.c
>> index 70a99ce..fce8e19 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -363,6 +363,7 @@
>>      .has_hw_contexts = 1,
>>      .has_logical_ring_contexts = 1,
>>      .has_guc = 1,
>> +    .has_decoupled_mmio = 1,
>>      .ddb_size = 512,
>>      GEN_DEFAULT_PIPEOFFSETS,
>>      IVB_CURSOR_OFFSETS,
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index 3361d7f..c70c07a 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -7342,6 +7342,13 @@ enum {
>>  #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>>  #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>>
>> +/* Decoupled MMIO register pair for kernel driver */
>> +#define GEN9_DECOUPLED_REG0_DW0            _MMIO(0xF00)
>> +#define GEN9_DECOUPLED_REG0_DW1            _MMIO(0xF04)
>> +#define GEN9_DECOUPLED_DW1_GO            (1<<31)
>> +#define GEN9_DECOUPLED_PD_SHIFT            28
>> +#define GEN9_DECOUPLED_OP_SHIFT            24
>> +
>>  /* Per-pipe DDI Function Control */
>>  #define _TRANS_DDI_FUNC_CTL_A        0x60400
>>  #define _TRANS_DDI_FUNC_CTL_B        0x61400
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>> b/drivers/gpu/drm/i915/intel_uncore.c
>> index e2b188d..e4c50cb 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset)
>>      __unclaimed_reg_debug(dev_priv, reg, read, before);
>>  }
>>
>> +static const enum decoupled_power_domain fw2dpd_domain[] = {
>> +    GEN9_DECOUPLED_PD_RENDER,
>> +    GEN9_DECOUPLED_PD_BLITTER,
>> +    GEN9_DECOUPLED_PD_ALL,
>> +    GEN9_DECOUPLED_PD_MEDIA,
>> +    GEN9_DECOUPLED_PD_ALL,
>> +    GEN9_DECOUPLED_PD_ALL,
>> +    GEN9_DECOUPLED_PD_ALL
>> +};
>> +
>> +/*
>> + * Decoupled MMIO access for only 1 DWORD
>> + */
>> +static void __gen9_decoupled_mmio_access(struct drm_i915_private
>> *dev_priv,
>> +                     u32 reg,
>> +                     enum forcewake_domains fw_domain,
>> +                     enum decoupled_ops operation)
>> +{
>> +    enum decoupled_power_domain dp_domain;
>> +    u32 ctrl_reg_data = 0;
>> +
>> +    dp_domain = fw2dpd_domain[fw_domain - 1];
>> +
>> +    ctrl_reg_data |= reg;
>> +    ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>> +    ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
>> +    ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>> ctrl_reg_data);
>> +
>> +    if (wait_for_atomic((__raw_i915_read32(dev_priv,
>> +                GEN9_DECOUPLED_REG0_DW1) &
>> +                GEN9_DECOUPLED_DW1_GO) == 0,
>> +                FORCEWAKE_ACK_TIMEOUT_MS))
>> +        DRM_ERROR("Decoupled MMIO wait timed out\n");
>> +}
>> +
>> +static inline u32
>> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
>> +                 u32 reg,
>> +                 enum forcewake_domains fw_domain)
>> +{
>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>> +                     GEN9_DECOUPLED_OP_READ);
>> +
>> +    return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
>> +}
>> +
>> +static inline void
>> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
>> +                u32 reg, u32 data,
>> +                enum forcewake_domains fw_domain)
>> +{
>> +
>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
>> +
>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>> +                     GEN9_DECOUPLED_OP_WRITE);
>> +}
>> +
>> +
>>  #define GEN2_READ_HEADER(x) \
>>      u##x val = 0; \
>>      assert_rpm_wakelock_held(dev_priv);
>> @@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct
>> drm_i915_private *dev_priv,
>>      GEN6_READ_FOOTER; \
>>  }
>>
>> +#define __gen9_decoupled_read(x) \
>> +static u##x \
>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
>> +               i915_reg_t reg, bool trace) { \
>> +    enum forcewake_domains fw_engine; \
>> +    GEN6_READ_HEADER(x); \
>> +    fw_engine = __fwtable_reg_read_fw_domains(offset); \
>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
>> +        unsigned i; \
>> +        u32 *ptr_data = (u32 *) &val; \
>> +        for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
>> +            *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
>> +                                 offset, \
>> +                                 fw_engine); \
>> +    } else { \
>> +        val = __raw_i915_read##x(dev_priv, reg); \
>> +    } \
>> +    GEN6_READ_FOOTER; \
>> +}
>> +
>> +__gen9_decoupled_read(32)
>> +__gen9_decoupled_read(64)
>>  __fwtable_read(8)
>>  __fwtable_read(16)
>>  __fwtable_read(32)
>> @@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct
>> drm_i915_private *dev_priv,
>>      GEN6_WRITE_FOOTER; \
>>  }
>>
>> +#define __gen9_decoupled_write(x) \
>> +static void \
>> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
>> +            i915_reg_t reg, u##x val, \
>> +        bool trace) { \
>> +    enum forcewake_domains fw_engine; \
>> +    GEN6_WRITE_HEADER; \
>> +    fw_engine = __fwtable_reg_write_fw_domains(offset); \
>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
>> +        __gen9_decoupled_mmio_write(dev_priv, \
>> +                        offset, \
>> +                        val, \
>> +                        fw_engine); \
>> +    else \
>> +        __raw_i915_write##x(dev_priv, reg, val); \
>> +    GEN6_WRITE_FOOTER; \
>> +}
>> +
>> +__gen9_decoupled_write(32)
>>  __fwtable_write(8)
>>  __fwtable_write(16)
>>  __fwtable_write(32)
>> @@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private
>> *dev_priv)
>>          ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
>>          ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
>>          ASSIGN_READ_MMIO_VFUNCS(fwtable);
>> +        if (HAS_DECOUPLED_MMIO(dev_priv)) {
>> +            dev_priv->uncore.funcs.mmio_readl =
>> +                        gen9_decoupled_read32;
>> +            dev_priv->uncore.funcs.mmio_readq =
>> +                        gen9_decoupled_read64;
>> +            dev_priv->uncore.funcs.mmio_writel =
>> +                        gen9_decoupled_write32;
>> +        }
>>          break;
>>      case 8:
>>          if (IS_CHERRYVIEW(dev_priv)) {
>>
>
> With the rename fix above:
>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Thank you for your review. Will fix the name and re send.
>
> Regards,
>
> Tvrtko
Tvrtko Ursulin Nov. 15, 2016, 10:59 a.m. UTC | #4
On 15/11/2016 10:56, Praveen Paneri wrote:
> Hi Tvrtko,
>
> On Tuesday 15 November 2016 03:06 PM, Tvrtko Ursulin wrote:
>>
>> On 15/11/2016 06:40, Praveen Paneri wrote:
>>> Decoupled MMIO is an alternative way to access forcewake domain
>>> registers, which requires less cycles for a single read/write and
>>> avoids frequent software forcewake.
>>> This certainly gives advantage over the forcewake as this new
>>> mechanism “decouples” CPU cycles and allow them to complete even
>>> when GT is in a CPD (frequency change) or C6 state.
>>>
>>> This can co-exist with forcewake and we will continue to use forcewake
>>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>>> separately and land into funny situations.
>>>
>>> v2:
>>> - Moved platform check out of the function and got rid of duplicate
>>>  functions to find out decoupled power domain (Chris)
>>> - Added a check for forcewake already held and skipped decoupled
>>>  access (Chris)
>>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>>
>>> v3:
>>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>>> - Changed decoupled operation to enum and used u32 instead of
>>>  uint_32 data type for register offset (Tvrtko)
>>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>>> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>>>  routines (Tvrtko)
>>>
>>> v4:
>>> - Fixed alignment and variable names (Chris)
>>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>>>
>>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
>>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
>>> ---
>>>  drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>>>  drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>>  drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>>  drivers/gpu/drm/i915/intel_uncore.c | 109
>>> ++++++++++++++++++++++++++++++++++++
>>>  4 files changed, 134 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 4e7148a..158f05c 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -549,6 +549,18 @@ enum forcewake_domains {
>>>  #define FW_REG_READ  (1)
>>>  #define FW_REG_WRITE (2)
>>>
>>> +enum decoupled_power_domain {
>>> +    GEN9_DECOUPLED_PD_BLITTER = 0,
>>> +    GEN9_DECOUPLED_PD_RENDER,
>>> +    GEN9_DECOUPLED_PD_MEDIA,
>>> +    GEN9_DECOUPLED_PD_ALL
>>> +};
>>> +
>>> +enum decoupled_ops {
>>> +    GEN9_DECOUPLED_OP_WRITE = 0,
>>> +    GEN9_DECOUPLED_OP_READ
>>> +};
>>> +
>>>  enum forcewake_domains
>>>  intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>>                     i915_reg_t reg, unsigned int op);
>>> @@ -683,7 +695,8 @@ struct intel_csr {
>>>      func(cursor_needs_physical); \
>>>      func(hws_needs_physical); \
>>>      func(overlay_needs_physical); \
>>> -    func(supports_tv)
>>> +    func(supports_tv); \
>>> +    func(has_decoupled_mmio)
>>>
>>>  struct sseu_dev_info {
>>>      u8 slice_mask;
>>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
>>>  #define GT_FREQUENCY_MULTIPLIER 50
>>>  #define GEN9_FREQ_SCALER 3
>>>
>>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
>>> +        && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
>>> +
>>
>> Looks like I've missed this before, but Would you mind renaming the dev
>> argument to dev_priv?
>>
>>>  #include "i915_trace.h"
>>>
>>>  static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private
>>> *dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index 70a99ce..fce8e19 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -363,6 +363,7 @@
>>>      .has_hw_contexts = 1,
>>>      .has_logical_ring_contexts = 1,
>>>      .has_guc = 1,
>>> +    .has_decoupled_mmio = 1,
>>>      .ddb_size = 512,
>>>      GEN_DEFAULT_PIPEOFFSETS,
>>>      IVB_CURSOR_OFFSETS,
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>>> b/drivers/gpu/drm/i915/i915_reg.h
>>> index 3361d7f..c70c07a 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -7342,6 +7342,13 @@ enum {
>>>  #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
>>>  #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
>>>
>>> +/* Decoupled MMIO register pair for kernel driver */
>>> +#define GEN9_DECOUPLED_REG0_DW0            _MMIO(0xF00)
>>> +#define GEN9_DECOUPLED_REG0_DW1            _MMIO(0xF04)
>>> +#define GEN9_DECOUPLED_DW1_GO            (1<<31)
>>> +#define GEN9_DECOUPLED_PD_SHIFT            28
>>> +#define GEN9_DECOUPLED_OP_SHIFT            24
>>> +
>>>  /* Per-pipe DDI Function Control */
>>>  #define _TRANS_DDI_FUNC_CTL_A        0x60400
>>>  #define _TRANS_DDI_FUNC_CTL_B        0x61400
>>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>>> b/drivers/gpu/drm/i915/intel_uncore.c
>>> index e2b188d..e4c50cb 100644
>>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>>> @@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset)
>>>      __unclaimed_reg_debug(dev_priv, reg, read, before);
>>>  }
>>>
>>> +static const enum decoupled_power_domain fw2dpd_domain[] = {
>>> +    GEN9_DECOUPLED_PD_RENDER,
>>> +    GEN9_DECOUPLED_PD_BLITTER,
>>> +    GEN9_DECOUPLED_PD_ALL,
>>> +    GEN9_DECOUPLED_PD_MEDIA,
>>> +    GEN9_DECOUPLED_PD_ALL,
>>> +    GEN9_DECOUPLED_PD_ALL,
>>> +    GEN9_DECOUPLED_PD_ALL
>>> +};
>>> +
>>> +/*
>>> + * Decoupled MMIO access for only 1 DWORD
>>> + */
>>> +static void __gen9_decoupled_mmio_access(struct drm_i915_private
>>> *dev_priv,
>>> +                     u32 reg,
>>> +                     enum forcewake_domains fw_domain,
>>> +                     enum decoupled_ops operation)
>>> +{
>>> +    enum decoupled_power_domain dp_domain;
>>> +    u32 ctrl_reg_data = 0;
>>> +
>>> +    dp_domain = fw2dpd_domain[fw_domain - 1];
>>> +
>>> +    ctrl_reg_data |= reg;
>>> +    ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
>>> +    ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
>>> +    ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
>>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1,
>>> ctrl_reg_data);
>>> +
>>> +    if (wait_for_atomic((__raw_i915_read32(dev_priv,
>>> +                GEN9_DECOUPLED_REG0_DW1) &
>>> +                GEN9_DECOUPLED_DW1_GO) == 0,
>>> +                FORCEWAKE_ACK_TIMEOUT_MS))
>>> +        DRM_ERROR("Decoupled MMIO wait timed out\n");
>>> +}
>>> +
>>> +static inline u32
>>> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
>>> +                 u32 reg,
>>> +                 enum forcewake_domains fw_domain)
>>> +{
>>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>>> +                     GEN9_DECOUPLED_OP_READ);
>>> +
>>> +    return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
>>> +}
>>> +
>>> +static inline void
>>> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
>>> +                u32 reg, u32 data,
>>> +                enum forcewake_domains fw_domain)
>>> +{
>>> +
>>> +    __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
>>> +
>>> +    __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
>>> +                     GEN9_DECOUPLED_OP_WRITE);
>>> +}
>>> +
>>> +
>>>  #define GEN2_READ_HEADER(x) \
>>>      u##x val = 0; \
>>>      assert_rpm_wakelock_held(dev_priv);
>>> @@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct
>>> drm_i915_private *dev_priv,
>>>      GEN6_READ_FOOTER; \
>>>  }
>>>
>>> +#define __gen9_decoupled_read(x) \
>>> +static u##x \
>>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
>>> +               i915_reg_t reg, bool trace) { \
>>> +    enum forcewake_domains fw_engine; \
>>> +    GEN6_READ_HEADER(x); \
>>> +    fw_engine = __fwtable_reg_read_fw_domains(offset); \
>>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
>>> +        unsigned i; \
>>> +        u32 *ptr_data = (u32 *) &val; \
>>> +        for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
>>> +            *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
>>> +                                 offset, \
>>> +                                 fw_engine); \
>>> +    } else { \
>>> +        val = __raw_i915_read##x(dev_priv, reg); \
>>> +    } \
>>> +    GEN6_READ_FOOTER; \
>>> +}
>>> +
>>> +__gen9_decoupled_read(32)
>>> +__gen9_decoupled_read(64)
>>>  __fwtable_read(8)
>>>  __fwtable_read(16)
>>>  __fwtable_read(32)
>>> @@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct
>>> drm_i915_private *dev_priv,
>>>      GEN6_WRITE_FOOTER; \
>>>  }
>>>
>>> +#define __gen9_decoupled_write(x) \
>>> +static void \
>>> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
>>> +            i915_reg_t reg, u##x val, \
>>> +        bool trace) { \
>>> +    enum forcewake_domains fw_engine; \
>>> +    GEN6_WRITE_HEADER; \
>>> +    fw_engine = __fwtable_reg_write_fw_domains(offset); \
>>> +    if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
>>> +        __gen9_decoupled_mmio_write(dev_priv, \
>>> +                        offset, \
>>> +                        val, \
>>> +                        fw_engine); \
>>> +    else \
>>> +        __raw_i915_write##x(dev_priv, reg, val); \
>>> +    GEN6_WRITE_FOOTER; \
>>> +}
>>> +
>>> +__gen9_decoupled_write(32)
>>>  __fwtable_write(8)
>>>  __fwtable_write(16)
>>>  __fwtable_write(32)
>>> @@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private
>>> *dev_priv)
>>>          ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
>>>          ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
>>>          ASSIGN_READ_MMIO_VFUNCS(fwtable);
>>> +        if (HAS_DECOUPLED_MMIO(dev_priv)) {
>>> +            dev_priv->uncore.funcs.mmio_readl =
>>> +                        gen9_decoupled_read32;
>>> +            dev_priv->uncore.funcs.mmio_readq =
>>> +                        gen9_decoupled_read64;
>>> +            dev_priv->uncore.funcs.mmio_writel =
>>> +                        gen9_decoupled_write32;
>>> +        }
>>>          break;
>>>      case 8:
>>>          if (IS_CHERRYVIEW(dev_priv)) {
>>>
>>
>> With the rename fix above:
>>
>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Thank you for your review. Will fix the name and re send.

Chris also made a good suggestion since the HAS_DECOUPLED_MMIO is 
currently different from info->has_decoupled_mmio which is confusing.

I think his idea was to edit info->has_decoupled_mmio in 
intel_device_runtime_init using the stepping data and remove the same 
from the macro.

Regards,

Tvrtko
Praveen Paneri Nov. 15, 2016, 1:17 p.m. UTC | #5
Hi Chris,

On Tuesday 15 November 2016 03:37 PM, Chris Wilson wrote:
> On Tue, Nov 15, 2016 at 09:36:34AM +0000, Tvrtko Ursulin wrote:
>>
>> On 15/11/2016 06:40, Praveen Paneri wrote:
>>> Decoupled MMIO is an alternative way to access forcewake domain
>>> registers, which requires less cycles for a single read/write and
>>> avoids frequent software forcewake.
>>> This certainly gives advantage over the forcewake as this new
>>> mechanism “decouples” CPU cycles and allow them to complete even
>>> when GT is in a CPD (frequency change) or C6 state.
>>>
>>> This can co-exist with forcewake and we will continue to use forcewake
>>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>>> separately and land into funny situations.
>>>
>>> v2:
>>> - Moved platform check out of the function and got rid of duplicate
>>> functions to find out decoupled power domain (Chris)
>>> - Added a check for forcewake already held and skipped decoupled
>>> access (Chris)
>>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>>
>>> v3:
>>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>>> - Changed decoupled operation to enum and used u32 instead of
>>> uint_32 data type for register offset (Tvrtko)
>>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>>> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>>> routines (Tvrtko)
>>>
>>> v4:
>>> - Fixed alignment and variable names (Chris)
>>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>>>
>>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
>>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>>> drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>> drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>> drivers/gpu/drm/i915/intel_uncore.c | 109 ++++++++++++++++++++++++++++++++++++
>>> 4 files changed, 134 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 4e7148a..158f05c 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -549,6 +549,18 @@ enum forcewake_domains {
>>> #define FW_REG_READ  (1)
>>> #define FW_REG_WRITE (2)
>>>
>>> +enum decoupled_power_domain {
>>> +	GEN9_DECOUPLED_PD_BLITTER = 0,
>>> +	GEN9_DECOUPLED_PD_RENDER,
>>> +	GEN9_DECOUPLED_PD_MEDIA,
>>> +	GEN9_DECOUPLED_PD_ALL
>>> +};
>>> +
>>> +enum decoupled_ops {
>>> +	GEN9_DECOUPLED_OP_WRITE = 0,
>>> +	GEN9_DECOUPLED_OP_READ
>>> +};
>>> +
>>> enum forcewake_domains
>>> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>> 			       i915_reg_t reg, unsigned int op);
>>> @@ -683,7 +695,8 @@ struct intel_csr {
>>> 	func(cursor_needs_physical); \
>>> 	func(hws_needs_physical); \
>>> 	func(overlay_needs_physical); \
>>> -	func(supports_tv)
>>> +	func(supports_tv); \
>>> +	func(has_decoupled_mmio)
>>>
>>> struct sseu_dev_info {
>>> 	u8 slice_mask;
>>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
>>> #define GT_FREQUENCY_MULTIPLIER 50
>>> #define GEN9_FREQ_SCALER 3
>>>
>>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
>>> +		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
>>> +
>>
>> Looks like I've missed this before, but Would you mind renaming the
>> dev argument to dev_priv?
>
> And whilst you are there, fix up the code to set
> info->has_decoupled_mmio = false when santizing.
> -Chris
but dev_priv->info is declared as const.

Praveen
>
Tvrtko Ursulin Nov. 15, 2016, 2:44 p.m. UTC | #6
On 15/11/2016 13:17, Praveen Paneri wrote:
> Hi Chris,
>
> On Tuesday 15 November 2016 03:37 PM, Chris Wilson wrote:
>> On Tue, Nov 15, 2016 at 09:36:34AM +0000, Tvrtko Ursulin wrote:
>>>
>>> On 15/11/2016 06:40, Praveen Paneri wrote:
>>>> Decoupled MMIO is an alternative way to access forcewake domain
>>>> registers, which requires less cycles for a single read/write and
>>>> avoids frequent software forcewake.
>>>> This certainly gives advantage over the forcewake as this new
>>>> mechanism “decouples” CPU cycles and allow them to complete even
>>>> when GT is in a CPD (frequency change) or C6 state.
>>>>
>>>> This can co-exist with forcewake and we will continue to use forcewake
>>>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
>>>> separately and land into funny situations.
>>>>
>>>> v2:
>>>> - Moved platform check out of the function and got rid of duplicate
>>>> functions to find out decoupled power domain (Chris)
>>>> - Added a check for forcewake already held and skipped decoupled
>>>> access (Chris)
>>>> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>>>>
>>>> v3:
>>>> - Improved commit message with more info on decoupled mmio (Tvrtko)
>>>> - Changed decoupled operation to enum and used u32 instead of
>>>> uint_32 data type for register offset (Tvrtko)
>>>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
>>>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
>>>> - Improved __gen9_decoupled_read and __gen9_decoupled_write
>>>> routines (Tvrtko)
>>>>
>>>> v4:
>>>> - Fixed alignment and variable names (Chris)
>>>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang)
>>>>
>>>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
>>>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
>>>> ---
>>>> drivers/gpu/drm/i915/i915_drv.h     |  18 +++++-
>>>> drivers/gpu/drm/i915/i915_pci.c     |   1 +
>>>> drivers/gpu/drm/i915/i915_reg.h     |   7 +++
>>>> drivers/gpu/drm/i915/intel_uncore.c | 109
>>>> ++++++++++++++++++++++++++++++++++++
>>>> 4 files changed, 134 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>> index 4e7148a..158f05c 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -549,6 +549,18 @@ enum forcewake_domains {
>>>> #define FW_REG_READ  (1)
>>>> #define FW_REG_WRITE (2)
>>>>
>>>> +enum decoupled_power_domain {
>>>> +    GEN9_DECOUPLED_PD_BLITTER = 0,
>>>> +    GEN9_DECOUPLED_PD_RENDER,
>>>> +    GEN9_DECOUPLED_PD_MEDIA,
>>>> +    GEN9_DECOUPLED_PD_ALL
>>>> +};
>>>> +
>>>> +enum decoupled_ops {
>>>> +    GEN9_DECOUPLED_OP_WRITE = 0,
>>>> +    GEN9_DECOUPLED_OP_READ
>>>> +};
>>>> +
>>>> enum forcewake_domains
>>>> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
>>>>                    i915_reg_t reg, unsigned int op);
>>>> @@ -683,7 +695,8 @@ struct intel_csr {
>>>>     func(cursor_needs_physical); \
>>>>     func(hws_needs_physical); \
>>>>     func(overlay_needs_physical); \
>>>> -    func(supports_tv)
>>>> +    func(supports_tv); \
>>>> +    func(has_decoupled_mmio)
>>>>
>>>> struct sseu_dev_info {
>>>>     u8 slice_mask;
>>>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table {
>>>> #define GT_FREQUENCY_MULTIPLIER 50
>>>> #define GEN9_FREQ_SCALER 3
>>>>
>>>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
>>>> +        && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
>>>> +
>>>
>>> Looks like I've missed this before, but Would you mind renaming the
>>> dev argument to dev_priv?
>>
>> And whilst you are there, fix up the code to set
>> info->has_decoupled_mmio = false when santizing.
>> -Chris
> but dev_priv->info is declared as const.

Look in intel_device_info_runtime_init for how it is currently done 
(mkwrite_device_info).

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4e7148a..158f05c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -549,6 +549,18 @@  enum forcewake_domains {
 #define FW_REG_READ  (1)
 #define FW_REG_WRITE (2)
 
+enum decoupled_power_domain {
+	GEN9_DECOUPLED_PD_BLITTER = 0,
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+enum decoupled_ops {
+	GEN9_DECOUPLED_OP_WRITE = 0,
+	GEN9_DECOUPLED_OP_READ
+};
+
 enum forcewake_domains
 intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
 			       i915_reg_t reg, unsigned int op);
@@ -683,7 +695,8 @@  struct intel_csr {
 	func(cursor_needs_physical); \
 	func(hws_needs_physical); \
 	func(overlay_needs_physical); \
-	func(supports_tv)
+	func(supports_tv); \
+	func(has_decoupled_mmio)
 
 struct sseu_dev_info {
 	u8 slice_mask;
@@ -2652,6 +2665,9 @@  struct drm_i915_cmd_table {
 #define GT_FREQUENCY_MULTIPLIER 50
 #define GEN9_FREQ_SCALER 3
 
+#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
+		&& IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
+
 #include "i915_trace.h"
 
 static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 70a99ce..fce8e19 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -363,6 +363,7 @@ 
 	.has_hw_contexts = 1,
 	.has_logical_ring_contexts = 1,
 	.has_guc = 1,
+	.has_decoupled_mmio = 1,
 	.ddb_size = 512,
 	GEN_DEFAULT_PIPEOFFSETS,
 	IVB_CURSOR_OFFSETS,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3361d7f..c70c07a 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7342,6 +7342,13 @@  enum {
 #define  SKL_FUSE_PG1_DIST_STATUS              (1<<26)
 #define  SKL_FUSE_PG2_DIST_STATUS              (1<<25)
 
+/* Decoupled MMIO register pair for kernel driver */
+#define GEN9_DECOUPLED_REG0_DW0			_MMIO(0xF00)
+#define GEN9_DECOUPLED_REG0_DW1			_MMIO(0xF04)
+#define GEN9_DECOUPLED_DW1_GO			(1<<31)
+#define GEN9_DECOUPLED_PD_SHIFT			28
+#define GEN9_DECOUPLED_OP_SHIFT			24
+
 /* Per-pipe DDI Function Control */
 #define _TRANS_DDI_FUNC_CTL_A		0x60400
 #define _TRANS_DDI_FUNC_CTL_B		0x61400
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index e2b188d..e4c50cb 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -831,6 +831,66 @@  static bool is_gen8_shadowed(u32 offset)
 	__unclaimed_reg_debug(dev_priv, reg, read, before);
 }
 
+static const enum decoupled_power_domain fw2dpd_domain[] = {
+	GEN9_DECOUPLED_PD_RENDER,
+	GEN9_DECOUPLED_PD_BLITTER,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_MEDIA,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_ALL,
+	GEN9_DECOUPLED_PD_ALL
+};
+
+/*
+ * Decoupled MMIO access for only 1 DWORD
+ */
+static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
+					 u32 reg,
+					 enum forcewake_domains fw_domain,
+					 enum decoupled_ops operation)
+{
+	enum decoupled_power_domain dp_domain;
+	u32 ctrl_reg_data = 0;
+
+	dp_domain = fw2dpd_domain[fw_domain - 1];
+
+	ctrl_reg_data |= reg;
+	ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
+	ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT);
+	ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv,
+			    GEN9_DECOUPLED_REG0_DW1) &
+			    GEN9_DECOUPLED_DW1_GO) == 0,
+			    FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Decoupled MMIO wait timed out\n");
+}
+
+static inline u32
+__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv,
+			     u32 reg,
+			     enum forcewake_domains fw_domain)
+{
+	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
+				     GEN9_DECOUPLED_OP_READ);
+
+	return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0);
+}
+
+static inline void
+__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv,
+			    u32 reg, u32 data,
+			    enum forcewake_domains fw_domain)
+{
+
+	__raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data);
+
+	__gen9_decoupled_mmio_access(dev_priv, reg, fw_domain,
+				     GEN9_DECOUPLED_OP_WRITE);
+}
+
+
 #define GEN2_READ_HEADER(x) \
 	u##x val = 0; \
 	assert_rpm_wakelock_held(dev_priv);
@@ -935,6 +995,28 @@  static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
 	GEN6_READ_FOOTER; \
 }
 
+#define __gen9_decoupled_read(x) \
+static u##x \
+gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \
+		       i915_reg_t reg, bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_READ_HEADER(x); \
+	fw_engine = __fwtable_reg_read_fw_domains(offset); \
+	if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \
+		unsigned i; \
+		u32 *ptr_data = (u32 *) &val; \
+		for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \
+			*ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \
+								 offset, \
+								 fw_engine); \
+	} else { \
+		val = __raw_i915_read##x(dev_priv, reg); \
+	} \
+	GEN6_READ_FOOTER; \
+}
+
+__gen9_decoupled_read(32)
+__gen9_decoupled_read(64)
 __fwtable_read(8)
 __fwtable_read(16)
 __fwtable_read(32)
@@ -1064,6 +1146,25 @@  static inline void __force_wake_auto(struct drm_i915_private *dev_priv,
 	GEN6_WRITE_FOOTER; \
 }
 
+#define __gen9_decoupled_write(x) \
+static void \
+gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \
+			i915_reg_t reg, u##x val, \
+		bool trace) { \
+	enum forcewake_domains fw_engine; \
+	GEN6_WRITE_HEADER; \
+	fw_engine = __fwtable_reg_write_fw_domains(offset); \
+	if (fw_engine & ~dev_priv->uncore.fw_domains_active) \
+		__gen9_decoupled_mmio_write(dev_priv, \
+					    offset, \
+					    val, \
+					    fw_engine); \
+	else \
+		__raw_i915_write##x(dev_priv, reg, val); \
+	GEN6_WRITE_FOOTER; \
+}
+
+__gen9_decoupled_write(32)
 __fwtable_write(8)
 __fwtable_write(16)
 __fwtable_write(32)
@@ -1287,6 +1388,14 @@  void intel_uncore_init(struct drm_i915_private *dev_priv)
 		ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges);
 		ASSIGN_WRITE_MMIO_VFUNCS(fwtable);
 		ASSIGN_READ_MMIO_VFUNCS(fwtable);
+		if (HAS_DECOUPLED_MMIO(dev_priv)) {
+			dev_priv->uncore.funcs.mmio_readl =
+						gen9_decoupled_read32;
+			dev_priv->uncore.funcs.mmio_readq =
+						gen9_decoupled_read64;
+			dev_priv->uncore.funcs.mmio_writel =
+						gen9_decoupled_write32;
+		}
 		break;
 	case 8:
 		if (IS_CHERRYVIEW(dev_priv)) {