Message ID | 1479230360-22395-1-git-send-email-praveen.paneri@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 15/11/2016 17:19, Praveen Paneri wrote: > Decoupled MMIO is an alternative way to access forcewake domain > registers, which requires less cycles for a single read/write and > avoids frequent software forcewake. > This certainly gives advantage over the forcewake as this new > mechanism “decouples” CPU cycles and allow them to complete even > when GT is in a CPD (frequency change) or C6 state. > > This can co-exist with forcewake and we will continue to use forcewake > as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords > separately and land into funny situations. > > v2: > - Moved platform check out of the function and got rid of duplicate > functions to find out decoupled power domain (Chris) > - Added a check for forcewake already held and skipped decoupled > access (Chris) > - Skipped writing 64 bit registers through decoupled MMIO (Chris) > > v3: > - Improved commit message with more info on decoupled mmio (Tvrtko) > - Changed decoupled operation to enum and used u32 instead of > uint_32 data type for register offset (Tvrtko) > - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko) > - Added lookup table for converting fw_engine to pd_engine (Tvrtko) > - Improved __gen9_decoupled_read and __gen9_decoupled_write > routines (Tvrtko) > > v4: > - Fixed alignment and variable names (Chris) > - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang) > > v5: > - Changed HAS_DECOUPLED_MMIO() argument name to dev_priv (Tvrtko) > - Sanitize info->had_decoupled_mmio at init (Chris) > > Signed-off-by: Zhe Wang <zhe1.wang@intel.com> > Signed-off-by: Praveen Paneri <praveen.paneri@intel.com> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Please put "(v4)" when you carry over the r-b and it wasn't explicitly said you are OK to just keep it. > --- > drivers/gpu/drm/i915/i915_drv.h | 17 +++++- > drivers/gpu/drm/i915/i915_pci.c | 1 + > drivers/gpu/drm/i915/i915_reg.h | 7 +++ > drivers/gpu/drm/i915/intel_uncore.c | 115 ++++++++++++++++++++++++++++++++++++ > 4 files changed, 139 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 4e7148a..c1eec04 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -549,6 +549,18 @@ enum forcewake_domains { > #define FW_REG_READ (1) > #define FW_REG_WRITE (2) > > +enum decoupled_power_domain { > + GEN9_DECOUPLED_PD_BLITTER = 0, > + GEN9_DECOUPLED_PD_RENDER, > + GEN9_DECOUPLED_PD_MEDIA, > + GEN9_DECOUPLED_PD_ALL > +}; > + > +enum decoupled_ops { > + GEN9_DECOUPLED_OP_WRITE = 0, > + GEN9_DECOUPLED_OP_READ > +}; > + > enum forcewake_domains > intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, > i915_reg_t reg, unsigned int op); > @@ -683,7 +695,8 @@ struct intel_csr { > func(cursor_needs_physical); \ > func(hws_needs_physical); \ > func(overlay_needs_physical); \ > - func(supports_tv) > + func(supports_tv); \ > + func(has_decoupled_mmio) > > struct sseu_dev_info { > u8 slice_mask; > @@ -2652,6 +2665,8 @@ struct drm_i915_cmd_table { > #define GT_FREQUENCY_MULTIPLIER 50 > #define GEN9_FREQ_SCALER 3 > > +#define HAS_DECOUPLED_MMIO(dev_priv) (INTEL_INFO(dev_priv)->has_decoupled_mmio) > + > #include "i915_trace.h" > > static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index 70a99ce..fce8e19 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -363,6 +363,7 @@ > .has_hw_contexts = 1, > .has_logical_ring_contexts = 1, > .has_guc = 1, > + .has_decoupled_mmio = 1, > .ddb_size = 512, > GEN_DEFAULT_PIPEOFFSETS, > IVB_CURSOR_OFFSETS, > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 3361d7f..c70c07a 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -7342,6 +7342,13 @@ enum { > #define SKL_FUSE_PG1_DIST_STATUS (1<<26) > #define SKL_FUSE_PG2_DIST_STATUS (1<<25) > > +/* Decoupled MMIO register pair for kernel driver */ > +#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00) > +#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04) > +#define GEN9_DECOUPLED_DW1_GO (1<<31) > +#define GEN9_DECOUPLED_PD_SHIFT 28 > +#define GEN9_DECOUPLED_OP_SHIFT 24 > + > /* Per-pipe DDI Function Control */ > #define _TRANS_DDI_FUNC_CTL_A 0x60400 > #define _TRANS_DDI_FUNC_CTL_B 0x61400 > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index e2b188d..e953303 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -402,6 +402,8 @@ static void intel_uncore_edram_detect(struct drm_i915_private *dev_priv) > static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, > bool restore_forcewake) > { > + struct intel_device_info *info = mkwrite_device_info(dev_priv); > + > /* clear out unclaimed reg detection bit */ > if (check_for_unclaimed_mmio(dev_priv)) > DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n"); > @@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, > GT_FIFO_CTL_RC6_POLICY_STALL); > } > > + /* Enable Decoupled MMIO only on BXT C stepping onwards */ > + if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) > + info->has_decoupled_mmio = false; Why have you decided to put it in here? It doesn't make much difference except conceptually. This runs every time on resume while I thought intel_device_info_runtime_init would have been more appropriate. Regards, Tvrtko > + > intel_uncore_forcewake_reset(dev_priv, restore_forcewake); > } > > @@ -831,6 +837,66 @@ static bool is_gen8_shadowed(u32 offset) > __unclaimed_reg_debug(dev_priv, reg, read, before); > } > > +static const enum decoupled_power_domain fw2dpd_domain[] = { > + GEN9_DECOUPLED_PD_RENDER, > + GEN9_DECOUPLED_PD_BLITTER, > + GEN9_DECOUPLED_PD_ALL, > + GEN9_DECOUPLED_PD_MEDIA, > + GEN9_DECOUPLED_PD_ALL, > + GEN9_DECOUPLED_PD_ALL, > + GEN9_DECOUPLED_PD_ALL > +}; > + > +/* > + * Decoupled MMIO access for only 1 DWORD > + */ > +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv, > + u32 reg, > + enum forcewake_domains fw_domain, > + enum decoupled_ops operation) > +{ > + enum decoupled_power_domain dp_domain; > + u32 ctrl_reg_data = 0; > + > + dp_domain = fw2dpd_domain[fw_domain - 1]; > + > + ctrl_reg_data |= reg; > + ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT); > + ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT); > + ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO; > + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data); > + > + if (wait_for_atomic((__raw_i915_read32(dev_priv, > + GEN9_DECOUPLED_REG0_DW1) & > + GEN9_DECOUPLED_DW1_GO) == 0, > + FORCEWAKE_ACK_TIMEOUT_MS)) > + DRM_ERROR("Decoupled MMIO wait timed out\n"); > +} > + > +static inline u32 > +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv, > + u32 reg, > + enum forcewake_domains fw_domain) > +{ > + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, > + GEN9_DECOUPLED_OP_READ); > + > + return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0); > +} > + > +static inline void > +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv, > + u32 reg, u32 data, > + enum forcewake_domains fw_domain) > +{ > + > + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data); > + > + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, > + GEN9_DECOUPLED_OP_WRITE); > +} > + > + > #define GEN2_READ_HEADER(x) \ > u##x val = 0; \ > assert_rpm_wakelock_held(dev_priv); > @@ -935,6 +1001,28 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, > GEN6_READ_FOOTER; \ > } > > +#define __gen9_decoupled_read(x) \ > +static u##x \ > +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \ > + i915_reg_t reg, bool trace) { \ > + enum forcewake_domains fw_engine; \ > + GEN6_READ_HEADER(x); \ > + fw_engine = __fwtable_reg_read_fw_domains(offset); \ > + if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \ > + unsigned i; \ > + u32 *ptr_data = (u32 *) &val; \ > + for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \ > + *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \ > + offset, \ > + fw_engine); \ > + } else { \ > + val = __raw_i915_read##x(dev_priv, reg); \ > + } \ > + GEN6_READ_FOOTER; \ > +} > + > +__gen9_decoupled_read(32) > +__gen9_decoupled_read(64) > __fwtable_read(8) > __fwtable_read(16) > __fwtable_read(32) > @@ -1064,6 +1152,25 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, > GEN6_WRITE_FOOTER; \ > } > > +#define __gen9_decoupled_write(x) \ > +static void \ > +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \ > + i915_reg_t reg, u##x val, \ > + bool trace) { \ > + enum forcewake_domains fw_engine; \ > + GEN6_WRITE_HEADER; \ > + fw_engine = __fwtable_reg_write_fw_domains(offset); \ > + if (fw_engine & ~dev_priv->uncore.fw_domains_active) \ > + __gen9_decoupled_mmio_write(dev_priv, \ > + offset, \ > + val, \ > + fw_engine); \ > + else \ > + __raw_i915_write##x(dev_priv, reg, val); \ > + GEN6_WRITE_FOOTER; \ > +} > + > +__gen9_decoupled_write(32) > __fwtable_write(8) > __fwtable_write(16) > __fwtable_write(32) > @@ -1287,6 +1394,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) > ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); > ASSIGN_WRITE_MMIO_VFUNCS(fwtable); > ASSIGN_READ_MMIO_VFUNCS(fwtable); > + if (HAS_DECOUPLED_MMIO(dev_priv)) { > + dev_priv->uncore.funcs.mmio_readl = > + gen9_decoupled_read32; > + dev_priv->uncore.funcs.mmio_readq = > + gen9_decoupled_read64; > + dev_priv->uncore.funcs.mmio_writel = > + gen9_decoupled_write32; > + } > break; > case 8: > if (IS_CHERRYVIEW(dev_priv)) { >
Hi Tvrtko, On Wednesday 16 November 2016 01:55 PM, Tvrtko Ursulin wrote: > > On 15/11/2016 17:19, Praveen Paneri wrote: >> Decoupled MMIO is an alternative way to access forcewake domain >> registers, which requires less cycles for a single read/write and >> avoids frequent software forcewake. >> This certainly gives advantage over the forcewake as this new >> mechanism “decouples” CPU cycles and allow them to complete even >> when GT is in a CPD (frequency change) or C6 state. >> >> This can co-exist with forcewake and we will continue to use forcewake >> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords >> separately and land into funny situations. >> >> v2: >> - Moved platform check out of the function and got rid of duplicate >> functions to find out decoupled power domain (Chris) >> - Added a check for forcewake already held and skipped decoupled >> access (Chris) >> - Skipped writing 64 bit registers through decoupled MMIO (Chris) >> >> v3: >> - Improved commit message with more info on decoupled mmio (Tvrtko) >> - Changed decoupled operation to enum and used u32 instead of >> uint_32 data type for register offset (Tvrtko) >> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko) >> - Added lookup table for converting fw_engine to pd_engine (Tvrtko) >> - Improved __gen9_decoupled_read and __gen9_decoupled_write >> routines (Tvrtko) >> >> v4: >> - Fixed alignment and variable names (Chris) >> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang) >> >> v5: >> - Changed HAS_DECOUPLED_MMIO() argument name to dev_priv (Tvrtko) >> - Sanitize info->had_decoupled_mmio at init (Chris) >> >> Signed-off-by: Zhe Wang <zhe1.wang@intel.com> >> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com> >> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Please put "(v4)" when you carry over the r-b and it wasn't explicitly > said you are OK to just keep it. Sure will take care going fwd. > >> --- >> drivers/gpu/drm/i915/i915_drv.h | 17 +++++- >> drivers/gpu/drm/i915/i915_pci.c | 1 + >> drivers/gpu/drm/i915/i915_reg.h | 7 +++ >> drivers/gpu/drm/i915/intel_uncore.c | 115 >> ++++++++++++++++++++++++++++++++++++ >> 4 files changed, 139 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h >> b/drivers/gpu/drm/i915/i915_drv.h >> index 4e7148a..c1eec04 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -549,6 +549,18 @@ enum forcewake_domains { >> #define FW_REG_READ (1) >> #define FW_REG_WRITE (2) >> >> +enum decoupled_power_domain { >> + GEN9_DECOUPLED_PD_BLITTER = 0, >> + GEN9_DECOUPLED_PD_RENDER, >> + GEN9_DECOUPLED_PD_MEDIA, >> + GEN9_DECOUPLED_PD_ALL >> +}; >> + >> +enum decoupled_ops { >> + GEN9_DECOUPLED_OP_WRITE = 0, >> + GEN9_DECOUPLED_OP_READ >> +}; >> + >> enum forcewake_domains >> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, >> i915_reg_t reg, unsigned int op); >> @@ -683,7 +695,8 @@ struct intel_csr { >> func(cursor_needs_physical); \ >> func(hws_needs_physical); \ >> func(overlay_needs_physical); \ >> - func(supports_tv) >> + func(supports_tv); \ >> + func(has_decoupled_mmio) >> >> struct sseu_dev_info { >> u8 slice_mask; >> @@ -2652,6 +2665,8 @@ struct drm_i915_cmd_table { >> #define GT_FREQUENCY_MULTIPLIER 50 >> #define GEN9_FREQ_SCALER 3 >> >> +#define HAS_DECOUPLED_MMIO(dev_priv) >> (INTEL_INFO(dev_priv)->has_decoupled_mmio) >> + >> #include "i915_trace.h" >> >> static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private >> *dev_priv) >> diff --git a/drivers/gpu/drm/i915/i915_pci.c >> b/drivers/gpu/drm/i915/i915_pci.c >> index 70a99ce..fce8e19 100644 >> --- a/drivers/gpu/drm/i915/i915_pci.c >> +++ b/drivers/gpu/drm/i915/i915_pci.c >> @@ -363,6 +363,7 @@ >> .has_hw_contexts = 1, >> .has_logical_ring_contexts = 1, >> .has_guc = 1, >> + .has_decoupled_mmio = 1, >> .ddb_size = 512, >> GEN_DEFAULT_PIPEOFFSETS, >> IVB_CURSOR_OFFSETS, >> diff --git a/drivers/gpu/drm/i915/i915_reg.h >> b/drivers/gpu/drm/i915/i915_reg.h >> index 3361d7f..c70c07a 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -7342,6 +7342,13 @@ enum { >> #define SKL_FUSE_PG1_DIST_STATUS (1<<26) >> #define SKL_FUSE_PG2_DIST_STATUS (1<<25) >> >> +/* Decoupled MMIO register pair for kernel driver */ >> +#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00) >> +#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04) >> +#define GEN9_DECOUPLED_DW1_GO (1<<31) >> +#define GEN9_DECOUPLED_PD_SHIFT 28 >> +#define GEN9_DECOUPLED_OP_SHIFT 24 >> + >> /* Per-pipe DDI Function Control */ >> #define _TRANS_DDI_FUNC_CTL_A 0x60400 >> #define _TRANS_DDI_FUNC_CTL_B 0x61400 >> diff --git a/drivers/gpu/drm/i915/intel_uncore.c >> b/drivers/gpu/drm/i915/intel_uncore.c >> index e2b188d..e953303 100644 >> --- a/drivers/gpu/drm/i915/intel_uncore.c >> +++ b/drivers/gpu/drm/i915/intel_uncore.c >> @@ -402,6 +402,8 @@ static void intel_uncore_edram_detect(struct >> drm_i915_private *dev_priv) >> static void __intel_uncore_early_sanitize(struct drm_i915_private >> *dev_priv, >> bool restore_forcewake) >> { >> + struct intel_device_info *info = mkwrite_device_info(dev_priv); >> + >> /* clear out unclaimed reg detection bit */ >> if (check_for_unclaimed_mmio(dev_priv)) >> DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n"); >> @@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct >> drm_i915_private *dev_priv, >> GT_FIFO_CTL_RC6_POLICY_STALL); >> } >> >> + /* Enable Decoupled MMIO only on BXT C stepping onwards */ >> + if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) >> + info->has_decoupled_mmio = false; > > Why have you decided to put it in here? It doesn't make much difference > except conceptually. This runs every time on resume while I thought > intel_device_info_runtime_init would have been more appropriate. Sorry about picking the bad place for this but intel_uncore_init() which uses this info gets executed before intel_device_info_runtime_init() in driver_load sequence. If I am not wrong, we need to find an appropriate place for this. Thanks, Praveen > > Regards, > > Tvrtko > >> + >> intel_uncore_forcewake_reset(dev_priv, restore_forcewake); >> } >> >> @@ -831,6 +837,66 @@ static bool is_gen8_shadowed(u32 offset) >> __unclaimed_reg_debug(dev_priv, reg, read, before); >> } >> >> +static const enum decoupled_power_domain fw2dpd_domain[] = { >> + GEN9_DECOUPLED_PD_RENDER, >> + GEN9_DECOUPLED_PD_BLITTER, >> + GEN9_DECOUPLED_PD_ALL, >> + GEN9_DECOUPLED_PD_MEDIA, >> + GEN9_DECOUPLED_PD_ALL, >> + GEN9_DECOUPLED_PD_ALL, >> + GEN9_DECOUPLED_PD_ALL >> +}; >> + >> +/* >> + * Decoupled MMIO access for only 1 DWORD >> + */ >> +static void __gen9_decoupled_mmio_access(struct drm_i915_private >> *dev_priv, >> + u32 reg, >> + enum forcewake_domains fw_domain, >> + enum decoupled_ops operation) >> +{ >> + enum decoupled_power_domain dp_domain; >> + u32 ctrl_reg_data = 0; >> + >> + dp_domain = fw2dpd_domain[fw_domain - 1]; >> + >> + ctrl_reg_data |= reg; >> + ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT); >> + ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT); >> + ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO; >> + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, >> ctrl_reg_data); >> + >> + if (wait_for_atomic((__raw_i915_read32(dev_priv, >> + GEN9_DECOUPLED_REG0_DW1) & >> + GEN9_DECOUPLED_DW1_GO) == 0, >> + FORCEWAKE_ACK_TIMEOUT_MS)) >> + DRM_ERROR("Decoupled MMIO wait timed out\n"); >> +} >> + >> +static inline u32 >> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv, >> + u32 reg, >> + enum forcewake_domains fw_domain) >> +{ >> + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, >> + GEN9_DECOUPLED_OP_READ); >> + >> + return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0); >> +} >> + >> +static inline void >> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv, >> + u32 reg, u32 data, >> + enum forcewake_domains fw_domain) >> +{ >> + >> + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data); >> + >> + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, >> + GEN9_DECOUPLED_OP_WRITE); >> +} >> + >> + >> #define GEN2_READ_HEADER(x) \ >> u##x val = 0; \ >> assert_rpm_wakelock_held(dev_priv); >> @@ -935,6 +1001,28 @@ static inline void __force_wake_auto(struct >> drm_i915_private *dev_priv, >> GEN6_READ_FOOTER; \ >> } >> >> +#define __gen9_decoupled_read(x) \ >> +static u##x \ >> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \ >> + i915_reg_t reg, bool trace) { \ >> + enum forcewake_domains fw_engine; \ >> + GEN6_READ_HEADER(x); \ >> + fw_engine = __fwtable_reg_read_fw_domains(offset); \ >> + if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \ >> + unsigned i; \ >> + u32 *ptr_data = (u32 *) &val; \ >> + for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \ >> + *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \ >> + offset, \ >> + fw_engine); \ >> + } else { \ >> + val = __raw_i915_read##x(dev_priv, reg); \ >> + } \ >> + GEN6_READ_FOOTER; \ >> +} >> + >> +__gen9_decoupled_read(32) >> +__gen9_decoupled_read(64) >> __fwtable_read(8) >> __fwtable_read(16) >> __fwtable_read(32) >> @@ -1064,6 +1152,25 @@ static inline void __force_wake_auto(struct >> drm_i915_private *dev_priv, >> GEN6_WRITE_FOOTER; \ >> } >> >> +#define __gen9_decoupled_write(x) \ >> +static void \ >> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \ >> + i915_reg_t reg, u##x val, \ >> + bool trace) { \ >> + enum forcewake_domains fw_engine; \ >> + GEN6_WRITE_HEADER; \ >> + fw_engine = __fwtable_reg_write_fw_domains(offset); \ >> + if (fw_engine & ~dev_priv->uncore.fw_domains_active) \ >> + __gen9_decoupled_mmio_write(dev_priv, \ >> + offset, \ >> + val, \ >> + fw_engine); \ >> + else \ >> + __raw_i915_write##x(dev_priv, reg, val); \ >> + GEN6_WRITE_FOOTER; \ >> +} >> + >> +__gen9_decoupled_write(32) >> __fwtable_write(8) >> __fwtable_write(16) >> __fwtable_write(32) >> @@ -1287,6 +1394,14 @@ void intel_uncore_init(struct drm_i915_private >> *dev_priv) >> ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); >> ASSIGN_WRITE_MMIO_VFUNCS(fwtable); >> ASSIGN_READ_MMIO_VFUNCS(fwtable); >> + if (HAS_DECOUPLED_MMIO(dev_priv)) { >> + dev_priv->uncore.funcs.mmio_readl = >> + gen9_decoupled_read32; >> + dev_priv->uncore.funcs.mmio_readq = >> + gen9_decoupled_read64; >> + dev_priv->uncore.funcs.mmio_writel = >> + gen9_decoupled_write32; >> + } >> break; >> case 8: >> if (IS_CHERRYVIEW(dev_priv)) { >>
On 16/11/2016 09:03, Praveen Paneri wrote: > Hi Tvrtko, > > On Wednesday 16 November 2016 01:55 PM, Tvrtko Ursulin wrote: >> >> On 15/11/2016 17:19, Praveen Paneri wrote: >>> Decoupled MMIO is an alternative way to access forcewake domain >>> registers, which requires less cycles for a single read/write and >>> avoids frequent software forcewake. >>> This certainly gives advantage over the forcewake as this new >>> mechanism “decouples” CPU cycles and allow them to complete even >>> when GT is in a CPD (frequency change) or C6 state. >>> >>> This can co-exist with forcewake and we will continue to use forcewake >>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords >>> separately and land into funny situations. >>> >>> v2: >>> - Moved platform check out of the function and got rid of duplicate >>> functions to find out decoupled power domain (Chris) >>> - Added a check for forcewake already held and skipped decoupled >>> access (Chris) >>> - Skipped writing 64 bit registers through decoupled MMIO (Chris) >>> >>> v3: >>> - Improved commit message with more info on decoupled mmio (Tvrtko) >>> - Changed decoupled operation to enum and used u32 instead of >>> uint_32 data type for register offset (Tvrtko) >>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko) >>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko) >>> - Improved __gen9_decoupled_read and __gen9_decoupled_write >>> routines (Tvrtko) >>> >>> v4: >>> - Fixed alignment and variable names (Chris) >>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang) >>> >>> v5: >>> - Changed HAS_DECOUPLED_MMIO() argument name to dev_priv (Tvrtko) >>> - Sanitize info->had_decoupled_mmio at init (Chris) >>> >>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com> >>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com> >>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> Please put "(v4)" when you carry over the r-b and it wasn't explicitly >> said you are OK to just keep it. > Sure will take care going fwd. >> >>> --- >>> drivers/gpu/drm/i915/i915_drv.h | 17 +++++- >>> drivers/gpu/drm/i915/i915_pci.c | 1 + >>> drivers/gpu/drm/i915/i915_reg.h | 7 +++ >>> drivers/gpu/drm/i915/intel_uncore.c | 115 >>> ++++++++++++++++++++++++++++++++++++ >>> 4 files changed, 139 insertions(+), 1 deletion(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_drv.h >>> b/drivers/gpu/drm/i915/i915_drv.h >>> index 4e7148a..c1eec04 100644 >>> --- a/drivers/gpu/drm/i915/i915_drv.h >>> +++ b/drivers/gpu/drm/i915/i915_drv.h >>> @@ -549,6 +549,18 @@ enum forcewake_domains { >>> #define FW_REG_READ (1) >>> #define FW_REG_WRITE (2) >>> >>> +enum decoupled_power_domain { >>> + GEN9_DECOUPLED_PD_BLITTER = 0, >>> + GEN9_DECOUPLED_PD_RENDER, >>> + GEN9_DECOUPLED_PD_MEDIA, >>> + GEN9_DECOUPLED_PD_ALL >>> +}; >>> + >>> +enum decoupled_ops { >>> + GEN9_DECOUPLED_OP_WRITE = 0, >>> + GEN9_DECOUPLED_OP_READ >>> +}; >>> + >>> enum forcewake_domains >>> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, >>> i915_reg_t reg, unsigned int op); >>> @@ -683,7 +695,8 @@ struct intel_csr { >>> func(cursor_needs_physical); \ >>> func(hws_needs_physical); \ >>> func(overlay_needs_physical); \ >>> - func(supports_tv) >>> + func(supports_tv); \ >>> + func(has_decoupled_mmio) >>> >>> struct sseu_dev_info { >>> u8 slice_mask; >>> @@ -2652,6 +2665,8 @@ struct drm_i915_cmd_table { >>> #define GT_FREQUENCY_MULTIPLIER 50 >>> #define GEN9_FREQ_SCALER 3 >>> >>> +#define HAS_DECOUPLED_MMIO(dev_priv) >>> (INTEL_INFO(dev_priv)->has_decoupled_mmio) >>> + >>> #include "i915_trace.h" >>> >>> static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private >>> *dev_priv) >>> diff --git a/drivers/gpu/drm/i915/i915_pci.c >>> b/drivers/gpu/drm/i915/i915_pci.c >>> index 70a99ce..fce8e19 100644 >>> --- a/drivers/gpu/drm/i915/i915_pci.c >>> +++ b/drivers/gpu/drm/i915/i915_pci.c >>> @@ -363,6 +363,7 @@ >>> .has_hw_contexts = 1, >>> .has_logical_ring_contexts = 1, >>> .has_guc = 1, >>> + .has_decoupled_mmio = 1, >>> .ddb_size = 512, >>> GEN_DEFAULT_PIPEOFFSETS, >>> IVB_CURSOR_OFFSETS, >>> diff --git a/drivers/gpu/drm/i915/i915_reg.h >>> b/drivers/gpu/drm/i915/i915_reg.h >>> index 3361d7f..c70c07a 100644 >>> --- a/drivers/gpu/drm/i915/i915_reg.h >>> +++ b/drivers/gpu/drm/i915/i915_reg.h >>> @@ -7342,6 +7342,13 @@ enum { >>> #define SKL_FUSE_PG1_DIST_STATUS (1<<26) >>> #define SKL_FUSE_PG2_DIST_STATUS (1<<25) >>> >>> +/* Decoupled MMIO register pair for kernel driver */ >>> +#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00) >>> +#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04) >>> +#define GEN9_DECOUPLED_DW1_GO (1<<31) >>> +#define GEN9_DECOUPLED_PD_SHIFT 28 >>> +#define GEN9_DECOUPLED_OP_SHIFT 24 >>> + >>> /* Per-pipe DDI Function Control */ >>> #define _TRANS_DDI_FUNC_CTL_A 0x60400 >>> #define _TRANS_DDI_FUNC_CTL_B 0x61400 >>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c >>> b/drivers/gpu/drm/i915/intel_uncore.c >>> index e2b188d..e953303 100644 >>> --- a/drivers/gpu/drm/i915/intel_uncore.c >>> +++ b/drivers/gpu/drm/i915/intel_uncore.c >>> @@ -402,6 +402,8 @@ static void intel_uncore_edram_detect(struct >>> drm_i915_private *dev_priv) >>> static void __intel_uncore_early_sanitize(struct drm_i915_private >>> *dev_priv, >>> bool restore_forcewake) >>> { >>> + struct intel_device_info *info = mkwrite_device_info(dev_priv); >>> + >>> /* clear out unclaimed reg detection bit */ >>> if (check_for_unclaimed_mmio(dev_priv)) >>> DRM_DEBUG("unclaimed mmio detected on uncore init, >>> clearing\n"); >>> @@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct >>> drm_i915_private *dev_priv, >>> GT_FIFO_CTL_RC6_POLICY_STALL); >>> } >>> >>> + /* Enable Decoupled MMIO only on BXT C stepping onwards */ >>> + if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) >>> + info->has_decoupled_mmio = false; >> >> Why have you decided to put it in here? It doesn't make much difference >> except conceptually. This runs every time on resume while I thought >> intel_device_info_runtime_init would have been more appropriate. > Sorry about picking the bad place for this but intel_uncore_init() which > uses this info gets executed before intel_device_info_runtime_init() in > driver_load sequence. If I am not wrong, we need to find an appropriate > place for this. You are correct, very illogical. Someone else can fix that so: Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
On Wed, Nov 16, 2016 at 09:08:30AM +0000, Tvrtko Ursulin wrote: > > On 16/11/2016 09:03, Praveen Paneri wrote: > >Hi Tvrtko, > > > >On Wednesday 16 November 2016 01:55 PM, Tvrtko Ursulin wrote: > >> > >>On 15/11/2016 17:19, Praveen Paneri wrote: > >>>Decoupled MMIO is an alternative way to access forcewake domain > >>>registers, which requires less cycles for a single read/write and > >>>avoids frequent software forcewake. > >>>This certainly gives advantage over the forcewake as this new > >>>mechanism “decouples” CPU cycles and allow them to complete even > >>>when GT is in a CPD (frequency change) or C6 state. > >>> > >>>This can co-exist with forcewake and we will continue to use forcewake > >>>as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords > >>>separately and land into funny situations. > >>> > >>>v2: > >>>- Moved platform check out of the function and got rid of duplicate > >>> functions to find out decoupled power domain (Chris) > >>>- Added a check for forcewake already held and skipped decoupled > >>> access (Chris) > >>>- Skipped writing 64 bit registers through decoupled MMIO (Chris) > >>> > >>>v3: > >>>- Improved commit message with more info on decoupled mmio (Tvrtko) > >>>- Changed decoupled operation to enum and used u32 instead of > >>> uint_32 data type for register offset (Tvrtko) > >>>- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko) > >>>- Added lookup table for converting fw_engine to pd_engine (Tvrtko) > >>>- Improved __gen9_decoupled_read and __gen9_decoupled_write > >>> routines (Tvrtko) > >>> > >>>v4: > >>>- Fixed alignment and variable names (Chris) > >>>- Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang) > >>> > >>>v5: > >>>- Changed HAS_DECOUPLED_MMIO() argument name to dev_priv (Tvrtko) > >>>- Sanitize info->had_decoupled_mmio at init (Chris) > >>> > >>>Signed-off-by: Zhe Wang <zhe1.wang@intel.com> > >>>Signed-off-by: Praveen Paneri <praveen.paneri@intel.com> > >>>Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > >> > >>Please put "(v4)" when you carry over the r-b and it wasn't explicitly > >>said you are OK to just keep it. > >Sure will take care going fwd. > >> > >>>--- > >>> drivers/gpu/drm/i915/i915_drv.h | 17 +++++- > >>> drivers/gpu/drm/i915/i915_pci.c | 1 + > >>> drivers/gpu/drm/i915/i915_reg.h | 7 +++ > >>> drivers/gpu/drm/i915/intel_uncore.c | 115 > >>>++++++++++++++++++++++++++++++++++++ > >>> 4 files changed, 139 insertions(+), 1 deletion(-) > >>> > >>>diff --git a/drivers/gpu/drm/i915/i915_drv.h > >>>b/drivers/gpu/drm/i915/i915_drv.h > >>>index 4e7148a..c1eec04 100644 > >>>--- a/drivers/gpu/drm/i915/i915_drv.h > >>>+++ b/drivers/gpu/drm/i915/i915_drv.h > >>>@@ -549,6 +549,18 @@ enum forcewake_domains { > >>> #define FW_REG_READ (1) > >>> #define FW_REG_WRITE (2) > >>> > >>>+enum decoupled_power_domain { > >>>+ GEN9_DECOUPLED_PD_BLITTER = 0, > >>>+ GEN9_DECOUPLED_PD_RENDER, > >>>+ GEN9_DECOUPLED_PD_MEDIA, > >>>+ GEN9_DECOUPLED_PD_ALL > >>>+}; > >>>+ > >>>+enum decoupled_ops { > >>>+ GEN9_DECOUPLED_OP_WRITE = 0, > >>>+ GEN9_DECOUPLED_OP_READ > >>>+}; > >>>+ > >>> enum forcewake_domains > >>> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, > >>> i915_reg_t reg, unsigned int op); > >>>@@ -683,7 +695,8 @@ struct intel_csr { > >>> func(cursor_needs_physical); \ > >>> func(hws_needs_physical); \ > >>> func(overlay_needs_physical); \ > >>>- func(supports_tv) > >>>+ func(supports_tv); \ > >>>+ func(has_decoupled_mmio) > >>> > >>> struct sseu_dev_info { > >>> u8 slice_mask; > >>>@@ -2652,6 +2665,8 @@ struct drm_i915_cmd_table { > >>> #define GT_FREQUENCY_MULTIPLIER 50 > >>> #define GEN9_FREQ_SCALER 3 > >>> > >>>+#define HAS_DECOUPLED_MMIO(dev_priv) > >>>(INTEL_INFO(dev_priv)->has_decoupled_mmio) > >>>+ > >>> #include "i915_trace.h" > >>> > >>> static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private > >>>*dev_priv) > >>>diff --git a/drivers/gpu/drm/i915/i915_pci.c > >>>b/drivers/gpu/drm/i915/i915_pci.c > >>>index 70a99ce..fce8e19 100644 > >>>--- a/drivers/gpu/drm/i915/i915_pci.c > >>>+++ b/drivers/gpu/drm/i915/i915_pci.c > >>>@@ -363,6 +363,7 @@ > >>> .has_hw_contexts = 1, > >>> .has_logical_ring_contexts = 1, > >>> .has_guc = 1, > >>>+ .has_decoupled_mmio = 1, > >>> .ddb_size = 512, > >>> GEN_DEFAULT_PIPEOFFSETS, > >>> IVB_CURSOR_OFFSETS, > >>>diff --git a/drivers/gpu/drm/i915/i915_reg.h > >>>b/drivers/gpu/drm/i915/i915_reg.h > >>>index 3361d7f..c70c07a 100644 > >>>--- a/drivers/gpu/drm/i915/i915_reg.h > >>>+++ b/drivers/gpu/drm/i915/i915_reg.h > >>>@@ -7342,6 +7342,13 @@ enum { > >>> #define SKL_FUSE_PG1_DIST_STATUS (1<<26) > >>> #define SKL_FUSE_PG2_DIST_STATUS (1<<25) > >>> > >>>+/* Decoupled MMIO register pair for kernel driver */ > >>>+#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00) > >>>+#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04) > >>>+#define GEN9_DECOUPLED_DW1_GO (1<<31) > >>>+#define GEN9_DECOUPLED_PD_SHIFT 28 > >>>+#define GEN9_DECOUPLED_OP_SHIFT 24 > >>>+ > >>> /* Per-pipe DDI Function Control */ > >>> #define _TRANS_DDI_FUNC_CTL_A 0x60400 > >>> #define _TRANS_DDI_FUNC_CTL_B 0x61400 > >>>diff --git a/drivers/gpu/drm/i915/intel_uncore.c > >>>b/drivers/gpu/drm/i915/intel_uncore.c > >>>index e2b188d..e953303 100644 > >>>--- a/drivers/gpu/drm/i915/intel_uncore.c > >>>+++ b/drivers/gpu/drm/i915/intel_uncore.c > >>>@@ -402,6 +402,8 @@ static void intel_uncore_edram_detect(struct > >>>drm_i915_private *dev_priv) > >>> static void __intel_uncore_early_sanitize(struct drm_i915_private > >>>*dev_priv, > >>> bool restore_forcewake) > >>> { > >>>+ struct intel_device_info *info = mkwrite_device_info(dev_priv); > >>>+ > >>> /* clear out unclaimed reg detection bit */ > >>> if (check_for_unclaimed_mmio(dev_priv)) > >>> DRM_DEBUG("unclaimed mmio detected on uncore init, > >>>clearing\n"); > >>>@@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct > >>>drm_i915_private *dev_priv, > >>> GT_FIFO_CTL_RC6_POLICY_STALL); > >>> } > >>> > >>>+ /* Enable Decoupled MMIO only on BXT C stepping onwards */ > >>>+ if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) > >>>+ info->has_decoupled_mmio = false; > >> > >>Why have you decided to put it in here? It doesn't make much difference > >>except conceptually. This runs every time on resume while I thought > >>intel_device_info_runtime_init would have been more appropriate. > >Sorry about picking the bad place for this but intel_uncore_init() which > >uses this info gets executed before intel_device_info_runtime_init() in > >driver_load sequence. If I am not wrong, we need to find an appropriate > >place for this. > > You are correct, very illogical. Someone else can fix that so: Ugh. It has some post-display fixups as well as early sanitization. As soon as someone is finished with purging the mix of dev/dev_priv they may feel inclined to split that up appropriately... -Chris
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4e7148a..c1eec04 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -549,6 +549,18 @@ enum forcewake_domains { #define FW_REG_READ (1) #define FW_REG_WRITE (2) +enum decoupled_power_domain { + GEN9_DECOUPLED_PD_BLITTER = 0, + GEN9_DECOUPLED_PD_RENDER, + GEN9_DECOUPLED_PD_MEDIA, + GEN9_DECOUPLED_PD_ALL +}; + +enum decoupled_ops { + GEN9_DECOUPLED_OP_WRITE = 0, + GEN9_DECOUPLED_OP_READ +}; + enum forcewake_domains intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, i915_reg_t reg, unsigned int op); @@ -683,7 +695,8 @@ struct intel_csr { func(cursor_needs_physical); \ func(hws_needs_physical); \ func(overlay_needs_physical); \ - func(supports_tv) + func(supports_tv); \ + func(has_decoupled_mmio) struct sseu_dev_info { u8 slice_mask; @@ -2652,6 +2665,8 @@ struct drm_i915_cmd_table { #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 +#define HAS_DECOUPLED_MMIO(dev_priv) (INTEL_INFO(dev_priv)->has_decoupled_mmio) + #include "i915_trace.h" static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 70a99ce..fce8e19 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -363,6 +363,7 @@ .has_hw_contexts = 1, .has_logical_ring_contexts = 1, .has_guc = 1, + .has_decoupled_mmio = 1, .ddb_size = 512, GEN_DEFAULT_PIPEOFFSETS, IVB_CURSOR_OFFSETS, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3361d7f..c70c07a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7342,6 +7342,13 @@ enum { #define SKL_FUSE_PG1_DIST_STATUS (1<<26) #define SKL_FUSE_PG2_DIST_STATUS (1<<25) +/* Decoupled MMIO register pair for kernel driver */ +#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00) +#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04) +#define GEN9_DECOUPLED_DW1_GO (1<<31) +#define GEN9_DECOUPLED_PD_SHIFT 28 +#define GEN9_DECOUPLED_OP_SHIFT 24 + /* Per-pipe DDI Function Control */ #define _TRANS_DDI_FUNC_CTL_A 0x60400 #define _TRANS_DDI_FUNC_CTL_B 0x61400 diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index e2b188d..e953303 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -402,6 +402,8 @@ static void intel_uncore_edram_detect(struct drm_i915_private *dev_priv) static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, bool restore_forcewake) { + struct intel_device_info *info = mkwrite_device_info(dev_priv); + /* clear out unclaimed reg detection bit */ if (check_for_unclaimed_mmio(dev_priv)) DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n"); @@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, GT_FIFO_CTL_RC6_POLICY_STALL); } + /* Enable Decoupled MMIO only on BXT C stepping onwards */ + if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + info->has_decoupled_mmio = false; + intel_uncore_forcewake_reset(dev_priv, restore_forcewake); } @@ -831,6 +837,66 @@ static bool is_gen8_shadowed(u32 offset) __unclaimed_reg_debug(dev_priv, reg, read, before); } +static const enum decoupled_power_domain fw2dpd_domain[] = { + GEN9_DECOUPLED_PD_RENDER, + GEN9_DECOUPLED_PD_BLITTER, + GEN9_DECOUPLED_PD_ALL, + GEN9_DECOUPLED_PD_MEDIA, + GEN9_DECOUPLED_PD_ALL, + GEN9_DECOUPLED_PD_ALL, + GEN9_DECOUPLED_PD_ALL +}; + +/* + * Decoupled MMIO access for only 1 DWORD + */ +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv, + u32 reg, + enum forcewake_domains fw_domain, + enum decoupled_ops operation) +{ + enum decoupled_power_domain dp_domain; + u32 ctrl_reg_data = 0; + + dp_domain = fw2dpd_domain[fw_domain - 1]; + + ctrl_reg_data |= reg; + ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT); + ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT); + ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO; + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data); + + if (wait_for_atomic((__raw_i915_read32(dev_priv, + GEN9_DECOUPLED_REG0_DW1) & + GEN9_DECOUPLED_DW1_GO) == 0, + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Decoupled MMIO wait timed out\n"); +} + +static inline u32 +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv, + u32 reg, + enum forcewake_domains fw_domain) +{ + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, + GEN9_DECOUPLED_OP_READ); + + return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0); +} + +static inline void +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv, + u32 reg, u32 data, + enum forcewake_domains fw_domain) +{ + + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data); + + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, + GEN9_DECOUPLED_OP_WRITE); +} + + #define GEN2_READ_HEADER(x) \ u##x val = 0; \ assert_rpm_wakelock_held(dev_priv); @@ -935,6 +1001,28 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, GEN6_READ_FOOTER; \ } +#define __gen9_decoupled_read(x) \ +static u##x \ +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \ + i915_reg_t reg, bool trace) { \ + enum forcewake_domains fw_engine; \ + GEN6_READ_HEADER(x); \ + fw_engine = __fwtable_reg_read_fw_domains(offset); \ + if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \ + unsigned i; \ + u32 *ptr_data = (u32 *) &val; \ + for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \ + *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \ + offset, \ + fw_engine); \ + } else { \ + val = __raw_i915_read##x(dev_priv, reg); \ + } \ + GEN6_READ_FOOTER; \ +} + +__gen9_decoupled_read(32) +__gen9_decoupled_read(64) __fwtable_read(8) __fwtable_read(16) __fwtable_read(32) @@ -1064,6 +1152,25 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, GEN6_WRITE_FOOTER; \ } +#define __gen9_decoupled_write(x) \ +static void \ +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \ + i915_reg_t reg, u##x val, \ + bool trace) { \ + enum forcewake_domains fw_engine; \ + GEN6_WRITE_HEADER; \ + fw_engine = __fwtable_reg_write_fw_domains(offset); \ + if (fw_engine & ~dev_priv->uncore.fw_domains_active) \ + __gen9_decoupled_mmio_write(dev_priv, \ + offset, \ + val, \ + fw_engine); \ + else \ + __raw_i915_write##x(dev_priv, reg, val); \ + GEN6_WRITE_FOOTER; \ +} + +__gen9_decoupled_write(32) __fwtable_write(8) __fwtable_write(16) __fwtable_write(32) @@ -1287,6 +1394,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(fwtable); ASSIGN_READ_MMIO_VFUNCS(fwtable); + if (HAS_DECOUPLED_MMIO(dev_priv)) { + dev_priv->uncore.funcs.mmio_readl = + gen9_decoupled_read32; + dev_priv->uncore.funcs.mmio_readq = + gen9_decoupled_read64; + dev_priv->uncore.funcs.mmio_writel = + gen9_decoupled_write32; + } break; case 8: if (IS_CHERRYVIEW(dev_priv)) {