Message ID | 1479192015-4247-1-git-send-email-praveen.paneri@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 15/11/2016 06:40, Praveen Paneri wrote: > Decoupled MMIO is an alternative way to access forcewake domain > registers, which requires less cycles for a single read/write and > avoids frequent software forcewake. > This certainly gives advantage over the forcewake as this new > mechanism “decouples” CPU cycles and allow them to complete even > when GT is in a CPD (frequency change) or C6 state. > > This can co-exist with forcewake and we will continue to use forcewake > as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords > separately and land into funny situations. > > v2: > - Moved platform check out of the function and got rid of duplicate > functions to find out decoupled power domain (Chris) > - Added a check for forcewake already held and skipped decoupled > access (Chris) > - Skipped writing 64 bit registers through decoupled MMIO (Chris) > > v3: > - Improved commit message with more info on decoupled mmio (Tvrtko) > - Changed decoupled operation to enum and used u32 instead of > uint_32 data type for register offset (Tvrtko) > - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko) > - Added lookup table for converting fw_engine to pd_engine (Tvrtko) > - Improved __gen9_decoupled_read and __gen9_decoupled_write > routines (Tvrtko) > > v4: > - Fixed alignment and variable names (Chris) > - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang) > > Signed-off-by: Zhe Wang <zhe1.wang@intel.com> > Signed-off-by: Praveen Paneri <praveen.paneri@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 18 +++++- > drivers/gpu/drm/i915/i915_pci.c | 1 + > drivers/gpu/drm/i915/i915_reg.h | 7 +++ > drivers/gpu/drm/i915/intel_uncore.c | 109 ++++++++++++++++++++++++++++++++++++ > 4 files changed, 134 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 4e7148a..158f05c 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -549,6 +549,18 @@ enum forcewake_domains { > #define FW_REG_READ (1) > #define FW_REG_WRITE (2) > > +enum decoupled_power_domain { > + GEN9_DECOUPLED_PD_BLITTER = 0, > + GEN9_DECOUPLED_PD_RENDER, > + GEN9_DECOUPLED_PD_MEDIA, > + GEN9_DECOUPLED_PD_ALL > +}; > + > +enum decoupled_ops { > + GEN9_DECOUPLED_OP_WRITE = 0, > + GEN9_DECOUPLED_OP_READ > +}; > + > enum forcewake_domains > intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, > i915_reg_t reg, unsigned int op); > @@ -683,7 +695,8 @@ struct intel_csr { > func(cursor_needs_physical); \ > func(hws_needs_physical); \ > func(overlay_needs_physical); \ > - func(supports_tv) > + func(supports_tv); \ > + func(has_decoupled_mmio) > > struct sseu_dev_info { > u8 slice_mask; > @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table { > #define GT_FREQUENCY_MULTIPLIER 50 > #define GEN9_FREQ_SCALER 3 > > +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \ > + && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER)) > + Looks like I've missed this before, but Would you mind renaming the dev argument to dev_priv? > #include "i915_trace.h" > > static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index 70a99ce..fce8e19 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -363,6 +363,7 @@ > .has_hw_contexts = 1, > .has_logical_ring_contexts = 1, > .has_guc = 1, > + .has_decoupled_mmio = 1, > .ddb_size = 512, > GEN_DEFAULT_PIPEOFFSETS, > IVB_CURSOR_OFFSETS, > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 3361d7f..c70c07a 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -7342,6 +7342,13 @@ enum { > #define SKL_FUSE_PG1_DIST_STATUS (1<<26) > #define SKL_FUSE_PG2_DIST_STATUS (1<<25) > > +/* Decoupled MMIO register pair for kernel driver */ > +#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00) > +#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04) > +#define GEN9_DECOUPLED_DW1_GO (1<<31) > +#define GEN9_DECOUPLED_PD_SHIFT 28 > +#define GEN9_DECOUPLED_OP_SHIFT 24 > + > /* Per-pipe DDI Function Control */ > #define _TRANS_DDI_FUNC_CTL_A 0x60400 > #define _TRANS_DDI_FUNC_CTL_B 0x61400 > diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c > index e2b188d..e4c50cb 100644 > --- a/drivers/gpu/drm/i915/intel_uncore.c > +++ b/drivers/gpu/drm/i915/intel_uncore.c > @@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset) > __unclaimed_reg_debug(dev_priv, reg, read, before); > } > > +static const enum decoupled_power_domain fw2dpd_domain[] = { > + GEN9_DECOUPLED_PD_RENDER, > + GEN9_DECOUPLED_PD_BLITTER, > + GEN9_DECOUPLED_PD_ALL, > + GEN9_DECOUPLED_PD_MEDIA, > + GEN9_DECOUPLED_PD_ALL, > + GEN9_DECOUPLED_PD_ALL, > + GEN9_DECOUPLED_PD_ALL > +}; > + > +/* > + * Decoupled MMIO access for only 1 DWORD > + */ > +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv, > + u32 reg, > + enum forcewake_domains fw_domain, > + enum decoupled_ops operation) > +{ > + enum decoupled_power_domain dp_domain; > + u32 ctrl_reg_data = 0; > + > + dp_domain = fw2dpd_domain[fw_domain - 1]; > + > + ctrl_reg_data |= reg; > + ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT); > + ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT); > + ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO; > + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data); > + > + if (wait_for_atomic((__raw_i915_read32(dev_priv, > + GEN9_DECOUPLED_REG0_DW1) & > + GEN9_DECOUPLED_DW1_GO) == 0, > + FORCEWAKE_ACK_TIMEOUT_MS)) > + DRM_ERROR("Decoupled MMIO wait timed out\n"); > +} > + > +static inline u32 > +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv, > + u32 reg, > + enum forcewake_domains fw_domain) > +{ > + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, > + GEN9_DECOUPLED_OP_READ); > + > + return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0); > +} > + > +static inline void > +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv, > + u32 reg, u32 data, > + enum forcewake_domains fw_domain) > +{ > + > + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data); > + > + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, > + GEN9_DECOUPLED_OP_WRITE); > +} > + > + > #define GEN2_READ_HEADER(x) \ > u##x val = 0; \ > assert_rpm_wakelock_held(dev_priv); > @@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, > GEN6_READ_FOOTER; \ > } > > +#define __gen9_decoupled_read(x) \ > +static u##x \ > +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \ > + i915_reg_t reg, bool trace) { \ > + enum forcewake_domains fw_engine; \ > + GEN6_READ_HEADER(x); \ > + fw_engine = __fwtable_reg_read_fw_domains(offset); \ > + if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \ > + unsigned i; \ > + u32 *ptr_data = (u32 *) &val; \ > + for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \ > + *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \ > + offset, \ > + fw_engine); \ > + } else { \ > + val = __raw_i915_read##x(dev_priv, reg); \ > + } \ > + GEN6_READ_FOOTER; \ > +} > + > +__gen9_decoupled_read(32) > +__gen9_decoupled_read(64) > __fwtable_read(8) > __fwtable_read(16) > __fwtable_read(32) > @@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, > GEN6_WRITE_FOOTER; \ > } > > +#define __gen9_decoupled_write(x) \ > +static void \ > +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \ > + i915_reg_t reg, u##x val, \ > + bool trace) { \ > + enum forcewake_domains fw_engine; \ > + GEN6_WRITE_HEADER; \ > + fw_engine = __fwtable_reg_write_fw_domains(offset); \ > + if (fw_engine & ~dev_priv->uncore.fw_domains_active) \ > + __gen9_decoupled_mmio_write(dev_priv, \ > + offset, \ > + val, \ > + fw_engine); \ > + else \ > + __raw_i915_write##x(dev_priv, reg, val); \ > + GEN6_WRITE_FOOTER; \ > +} > + > +__gen9_decoupled_write(32) > __fwtable_write(8) > __fwtable_write(16) > __fwtable_write(32) > @@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) > ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); > ASSIGN_WRITE_MMIO_VFUNCS(fwtable); > ASSIGN_READ_MMIO_VFUNCS(fwtable); > + if (HAS_DECOUPLED_MMIO(dev_priv)) { > + dev_priv->uncore.funcs.mmio_readl = > + gen9_decoupled_read32; > + dev_priv->uncore.funcs.mmio_readq = > + gen9_decoupled_read64; > + dev_priv->uncore.funcs.mmio_writel = > + gen9_decoupled_write32; > + } > break; > case 8: > if (IS_CHERRYVIEW(dev_priv)) { > With the rename fix above: Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
On Tue, Nov 15, 2016 at 09:36:34AM +0000, Tvrtko Ursulin wrote: > > On 15/11/2016 06:40, Praveen Paneri wrote: > >Decoupled MMIO is an alternative way to access forcewake domain > >registers, which requires less cycles for a single read/write and > >avoids frequent software forcewake. > >This certainly gives advantage over the forcewake as this new > >mechanism “decouples” CPU cycles and allow them to complete even > >when GT is in a CPD (frequency change) or C6 state. > > > >This can co-exist with forcewake and we will continue to use forcewake > >as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords > >separately and land into funny situations. > > > >v2: > >- Moved platform check out of the function and got rid of duplicate > > functions to find out decoupled power domain (Chris) > >- Added a check for forcewake already held and skipped decoupled > > access (Chris) > >- Skipped writing 64 bit registers through decoupled MMIO (Chris) > > > >v3: > >- Improved commit message with more info on decoupled mmio (Tvrtko) > >- Changed decoupled operation to enum and used u32 instead of > > uint_32 data type for register offset (Tvrtko) > >- Moved HAS_DECOUPLED_MMIO to device info (Tvrtko) > >- Added lookup table for converting fw_engine to pd_engine (Tvrtko) > >- Improved __gen9_decoupled_read and __gen9_decoupled_write > > routines (Tvrtko) > > > >v4: > >- Fixed alignment and variable names (Chris) > >- Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang) > > > >Signed-off-by: Zhe Wang <zhe1.wang@intel.com> > >Signed-off-by: Praveen Paneri <praveen.paneri@intel.com> > >--- > > drivers/gpu/drm/i915/i915_drv.h | 18 +++++- > > drivers/gpu/drm/i915/i915_pci.c | 1 + > > drivers/gpu/drm/i915/i915_reg.h | 7 +++ > > drivers/gpu/drm/i915/intel_uncore.c | 109 ++++++++++++++++++++++++++++++++++++ > > 4 files changed, 134 insertions(+), 1 deletion(-) > > > >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > >index 4e7148a..158f05c 100644 > >--- a/drivers/gpu/drm/i915/i915_drv.h > >+++ b/drivers/gpu/drm/i915/i915_drv.h > >@@ -549,6 +549,18 @@ enum forcewake_domains { > > #define FW_REG_READ (1) > > #define FW_REG_WRITE (2) > > > >+enum decoupled_power_domain { > >+ GEN9_DECOUPLED_PD_BLITTER = 0, > >+ GEN9_DECOUPLED_PD_RENDER, > >+ GEN9_DECOUPLED_PD_MEDIA, > >+ GEN9_DECOUPLED_PD_ALL > >+}; > >+ > >+enum decoupled_ops { > >+ GEN9_DECOUPLED_OP_WRITE = 0, > >+ GEN9_DECOUPLED_OP_READ > >+}; > >+ > > enum forcewake_domains > > intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, > > i915_reg_t reg, unsigned int op); > >@@ -683,7 +695,8 @@ struct intel_csr { > > func(cursor_needs_physical); \ > > func(hws_needs_physical); \ > > func(overlay_needs_physical); \ > >- func(supports_tv) > >+ func(supports_tv); \ > >+ func(has_decoupled_mmio) > > > > struct sseu_dev_info { > > u8 slice_mask; > >@@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table { > > #define GT_FREQUENCY_MULTIPLIER 50 > > #define GEN9_FREQ_SCALER 3 > > > >+#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \ > >+ && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER)) > >+ > > Looks like I've missed this before, but Would you mind renaming the > dev argument to dev_priv? And whilst you are there, fix up the code to set info->has_decoupled_mmio = false when santizing. -Chris
Hi Tvrtko, On Tuesday 15 November 2016 03:06 PM, Tvrtko Ursulin wrote: > > On 15/11/2016 06:40, Praveen Paneri wrote: >> Decoupled MMIO is an alternative way to access forcewake domain >> registers, which requires less cycles for a single read/write and >> avoids frequent software forcewake. >> This certainly gives advantage over the forcewake as this new >> mechanism “decouples” CPU cycles and allow them to complete even >> when GT is in a CPD (frequency change) or C6 state. >> >> This can co-exist with forcewake and we will continue to use forcewake >> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords >> separately and land into funny situations. >> >> v2: >> - Moved platform check out of the function and got rid of duplicate >> functions to find out decoupled power domain (Chris) >> - Added a check for forcewake already held and skipped decoupled >> access (Chris) >> - Skipped writing 64 bit registers through decoupled MMIO (Chris) >> >> v3: >> - Improved commit message with more info on decoupled mmio (Tvrtko) >> - Changed decoupled operation to enum and used u32 instead of >> uint_32 data type for register offset (Tvrtko) >> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko) >> - Added lookup table for converting fw_engine to pd_engine (Tvrtko) >> - Improved __gen9_decoupled_read and __gen9_decoupled_write >> routines (Tvrtko) >> >> v4: >> - Fixed alignment and variable names (Chris) >> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang) >> >> Signed-off-by: Zhe Wang <zhe1.wang@intel.com> >> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 18 +++++- >> drivers/gpu/drm/i915/i915_pci.c | 1 + >> drivers/gpu/drm/i915/i915_reg.h | 7 +++ >> drivers/gpu/drm/i915/intel_uncore.c | 109 >> ++++++++++++++++++++++++++++++++++++ >> 4 files changed, 134 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h >> b/drivers/gpu/drm/i915/i915_drv.h >> index 4e7148a..158f05c 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -549,6 +549,18 @@ enum forcewake_domains { >> #define FW_REG_READ (1) >> #define FW_REG_WRITE (2) >> >> +enum decoupled_power_domain { >> + GEN9_DECOUPLED_PD_BLITTER = 0, >> + GEN9_DECOUPLED_PD_RENDER, >> + GEN9_DECOUPLED_PD_MEDIA, >> + GEN9_DECOUPLED_PD_ALL >> +}; >> + >> +enum decoupled_ops { >> + GEN9_DECOUPLED_OP_WRITE = 0, >> + GEN9_DECOUPLED_OP_READ >> +}; >> + >> enum forcewake_domains >> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, >> i915_reg_t reg, unsigned int op); >> @@ -683,7 +695,8 @@ struct intel_csr { >> func(cursor_needs_physical); \ >> func(hws_needs_physical); \ >> func(overlay_needs_physical); \ >> - func(supports_tv) >> + func(supports_tv); \ >> + func(has_decoupled_mmio) >> >> struct sseu_dev_info { >> u8 slice_mask; >> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table { >> #define GT_FREQUENCY_MULTIPLIER 50 >> #define GEN9_FREQ_SCALER 3 >> >> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \ >> + && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER)) >> + > > Looks like I've missed this before, but Would you mind renaming the dev > argument to dev_priv? > >> #include "i915_trace.h" >> >> static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private >> *dev_priv) >> diff --git a/drivers/gpu/drm/i915/i915_pci.c >> b/drivers/gpu/drm/i915/i915_pci.c >> index 70a99ce..fce8e19 100644 >> --- a/drivers/gpu/drm/i915/i915_pci.c >> +++ b/drivers/gpu/drm/i915/i915_pci.c >> @@ -363,6 +363,7 @@ >> .has_hw_contexts = 1, >> .has_logical_ring_contexts = 1, >> .has_guc = 1, >> + .has_decoupled_mmio = 1, >> .ddb_size = 512, >> GEN_DEFAULT_PIPEOFFSETS, >> IVB_CURSOR_OFFSETS, >> diff --git a/drivers/gpu/drm/i915/i915_reg.h >> b/drivers/gpu/drm/i915/i915_reg.h >> index 3361d7f..c70c07a 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -7342,6 +7342,13 @@ enum { >> #define SKL_FUSE_PG1_DIST_STATUS (1<<26) >> #define SKL_FUSE_PG2_DIST_STATUS (1<<25) >> >> +/* Decoupled MMIO register pair for kernel driver */ >> +#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00) >> +#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04) >> +#define GEN9_DECOUPLED_DW1_GO (1<<31) >> +#define GEN9_DECOUPLED_PD_SHIFT 28 >> +#define GEN9_DECOUPLED_OP_SHIFT 24 >> + >> /* Per-pipe DDI Function Control */ >> #define _TRANS_DDI_FUNC_CTL_A 0x60400 >> #define _TRANS_DDI_FUNC_CTL_B 0x61400 >> diff --git a/drivers/gpu/drm/i915/intel_uncore.c >> b/drivers/gpu/drm/i915/intel_uncore.c >> index e2b188d..e4c50cb 100644 >> --- a/drivers/gpu/drm/i915/intel_uncore.c >> +++ b/drivers/gpu/drm/i915/intel_uncore.c >> @@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset) >> __unclaimed_reg_debug(dev_priv, reg, read, before); >> } >> >> +static const enum decoupled_power_domain fw2dpd_domain[] = { >> + GEN9_DECOUPLED_PD_RENDER, >> + GEN9_DECOUPLED_PD_BLITTER, >> + GEN9_DECOUPLED_PD_ALL, >> + GEN9_DECOUPLED_PD_MEDIA, >> + GEN9_DECOUPLED_PD_ALL, >> + GEN9_DECOUPLED_PD_ALL, >> + GEN9_DECOUPLED_PD_ALL >> +}; >> + >> +/* >> + * Decoupled MMIO access for only 1 DWORD >> + */ >> +static void __gen9_decoupled_mmio_access(struct drm_i915_private >> *dev_priv, >> + u32 reg, >> + enum forcewake_domains fw_domain, >> + enum decoupled_ops operation) >> +{ >> + enum decoupled_power_domain dp_domain; >> + u32 ctrl_reg_data = 0; >> + >> + dp_domain = fw2dpd_domain[fw_domain - 1]; >> + >> + ctrl_reg_data |= reg; >> + ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT); >> + ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT); >> + ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO; >> + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, >> ctrl_reg_data); >> + >> + if (wait_for_atomic((__raw_i915_read32(dev_priv, >> + GEN9_DECOUPLED_REG0_DW1) & >> + GEN9_DECOUPLED_DW1_GO) == 0, >> + FORCEWAKE_ACK_TIMEOUT_MS)) >> + DRM_ERROR("Decoupled MMIO wait timed out\n"); >> +} >> + >> +static inline u32 >> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv, >> + u32 reg, >> + enum forcewake_domains fw_domain) >> +{ >> + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, >> + GEN9_DECOUPLED_OP_READ); >> + >> + return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0); >> +} >> + >> +static inline void >> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv, >> + u32 reg, u32 data, >> + enum forcewake_domains fw_domain) >> +{ >> + >> + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data); >> + >> + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, >> + GEN9_DECOUPLED_OP_WRITE); >> +} >> + >> + >> #define GEN2_READ_HEADER(x) \ >> u##x val = 0; \ >> assert_rpm_wakelock_held(dev_priv); >> @@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct >> drm_i915_private *dev_priv, >> GEN6_READ_FOOTER; \ >> } >> >> +#define __gen9_decoupled_read(x) \ >> +static u##x \ >> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \ >> + i915_reg_t reg, bool trace) { \ >> + enum forcewake_domains fw_engine; \ >> + GEN6_READ_HEADER(x); \ >> + fw_engine = __fwtable_reg_read_fw_domains(offset); \ >> + if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \ >> + unsigned i; \ >> + u32 *ptr_data = (u32 *) &val; \ >> + for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \ >> + *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \ >> + offset, \ >> + fw_engine); \ >> + } else { \ >> + val = __raw_i915_read##x(dev_priv, reg); \ >> + } \ >> + GEN6_READ_FOOTER; \ >> +} >> + >> +__gen9_decoupled_read(32) >> +__gen9_decoupled_read(64) >> __fwtable_read(8) >> __fwtable_read(16) >> __fwtable_read(32) >> @@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct >> drm_i915_private *dev_priv, >> GEN6_WRITE_FOOTER; \ >> } >> >> +#define __gen9_decoupled_write(x) \ >> +static void \ >> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \ >> + i915_reg_t reg, u##x val, \ >> + bool trace) { \ >> + enum forcewake_domains fw_engine; \ >> + GEN6_WRITE_HEADER; \ >> + fw_engine = __fwtable_reg_write_fw_domains(offset); \ >> + if (fw_engine & ~dev_priv->uncore.fw_domains_active) \ >> + __gen9_decoupled_mmio_write(dev_priv, \ >> + offset, \ >> + val, \ >> + fw_engine); \ >> + else \ >> + __raw_i915_write##x(dev_priv, reg, val); \ >> + GEN6_WRITE_FOOTER; \ >> +} >> + >> +__gen9_decoupled_write(32) >> __fwtable_write(8) >> __fwtable_write(16) >> __fwtable_write(32) >> @@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private >> *dev_priv) >> ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); >> ASSIGN_WRITE_MMIO_VFUNCS(fwtable); >> ASSIGN_READ_MMIO_VFUNCS(fwtable); >> + if (HAS_DECOUPLED_MMIO(dev_priv)) { >> + dev_priv->uncore.funcs.mmio_readl = >> + gen9_decoupled_read32; >> + dev_priv->uncore.funcs.mmio_readq = >> + gen9_decoupled_read64; >> + dev_priv->uncore.funcs.mmio_writel = >> + gen9_decoupled_write32; >> + } >> break; >> case 8: >> if (IS_CHERRYVIEW(dev_priv)) { >> > > With the rename fix above: > > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Thank you for your review. Will fix the name and re send. > > Regards, > > Tvrtko
On 15/11/2016 10:56, Praveen Paneri wrote: > Hi Tvrtko, > > On Tuesday 15 November 2016 03:06 PM, Tvrtko Ursulin wrote: >> >> On 15/11/2016 06:40, Praveen Paneri wrote: >>> Decoupled MMIO is an alternative way to access forcewake domain >>> registers, which requires less cycles for a single read/write and >>> avoids frequent software forcewake. >>> This certainly gives advantage over the forcewake as this new >>> mechanism “decouples” CPU cycles and allow them to complete even >>> when GT is in a CPD (frequency change) or C6 state. >>> >>> This can co-exist with forcewake and we will continue to use forcewake >>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords >>> separately and land into funny situations. >>> >>> v2: >>> - Moved platform check out of the function and got rid of duplicate >>> functions to find out decoupled power domain (Chris) >>> - Added a check for forcewake already held and skipped decoupled >>> access (Chris) >>> - Skipped writing 64 bit registers through decoupled MMIO (Chris) >>> >>> v3: >>> - Improved commit message with more info on decoupled mmio (Tvrtko) >>> - Changed decoupled operation to enum and used u32 instead of >>> uint_32 data type for register offset (Tvrtko) >>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko) >>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko) >>> - Improved __gen9_decoupled_read and __gen9_decoupled_write >>> routines (Tvrtko) >>> >>> v4: >>> - Fixed alignment and variable names (Chris) >>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang) >>> >>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com> >>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com> >>> --- >>> drivers/gpu/drm/i915/i915_drv.h | 18 +++++- >>> drivers/gpu/drm/i915/i915_pci.c | 1 + >>> drivers/gpu/drm/i915/i915_reg.h | 7 +++ >>> drivers/gpu/drm/i915/intel_uncore.c | 109 >>> ++++++++++++++++++++++++++++++++++++ >>> 4 files changed, 134 insertions(+), 1 deletion(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_drv.h >>> b/drivers/gpu/drm/i915/i915_drv.h >>> index 4e7148a..158f05c 100644 >>> --- a/drivers/gpu/drm/i915/i915_drv.h >>> +++ b/drivers/gpu/drm/i915/i915_drv.h >>> @@ -549,6 +549,18 @@ enum forcewake_domains { >>> #define FW_REG_READ (1) >>> #define FW_REG_WRITE (2) >>> >>> +enum decoupled_power_domain { >>> + GEN9_DECOUPLED_PD_BLITTER = 0, >>> + GEN9_DECOUPLED_PD_RENDER, >>> + GEN9_DECOUPLED_PD_MEDIA, >>> + GEN9_DECOUPLED_PD_ALL >>> +}; >>> + >>> +enum decoupled_ops { >>> + GEN9_DECOUPLED_OP_WRITE = 0, >>> + GEN9_DECOUPLED_OP_READ >>> +}; >>> + >>> enum forcewake_domains >>> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, >>> i915_reg_t reg, unsigned int op); >>> @@ -683,7 +695,8 @@ struct intel_csr { >>> func(cursor_needs_physical); \ >>> func(hws_needs_physical); \ >>> func(overlay_needs_physical); \ >>> - func(supports_tv) >>> + func(supports_tv); \ >>> + func(has_decoupled_mmio) >>> >>> struct sseu_dev_info { >>> u8 slice_mask; >>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table { >>> #define GT_FREQUENCY_MULTIPLIER 50 >>> #define GEN9_FREQ_SCALER 3 >>> >>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \ >>> + && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER)) >>> + >> >> Looks like I've missed this before, but Would you mind renaming the dev >> argument to dev_priv? >> >>> #include "i915_trace.h" >>> >>> static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private >>> *dev_priv) >>> diff --git a/drivers/gpu/drm/i915/i915_pci.c >>> b/drivers/gpu/drm/i915/i915_pci.c >>> index 70a99ce..fce8e19 100644 >>> --- a/drivers/gpu/drm/i915/i915_pci.c >>> +++ b/drivers/gpu/drm/i915/i915_pci.c >>> @@ -363,6 +363,7 @@ >>> .has_hw_contexts = 1, >>> .has_logical_ring_contexts = 1, >>> .has_guc = 1, >>> + .has_decoupled_mmio = 1, >>> .ddb_size = 512, >>> GEN_DEFAULT_PIPEOFFSETS, >>> IVB_CURSOR_OFFSETS, >>> diff --git a/drivers/gpu/drm/i915/i915_reg.h >>> b/drivers/gpu/drm/i915/i915_reg.h >>> index 3361d7f..c70c07a 100644 >>> --- a/drivers/gpu/drm/i915/i915_reg.h >>> +++ b/drivers/gpu/drm/i915/i915_reg.h >>> @@ -7342,6 +7342,13 @@ enum { >>> #define SKL_FUSE_PG1_DIST_STATUS (1<<26) >>> #define SKL_FUSE_PG2_DIST_STATUS (1<<25) >>> >>> +/* Decoupled MMIO register pair for kernel driver */ >>> +#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00) >>> +#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04) >>> +#define GEN9_DECOUPLED_DW1_GO (1<<31) >>> +#define GEN9_DECOUPLED_PD_SHIFT 28 >>> +#define GEN9_DECOUPLED_OP_SHIFT 24 >>> + >>> /* Per-pipe DDI Function Control */ >>> #define _TRANS_DDI_FUNC_CTL_A 0x60400 >>> #define _TRANS_DDI_FUNC_CTL_B 0x61400 >>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c >>> b/drivers/gpu/drm/i915/intel_uncore.c >>> index e2b188d..e4c50cb 100644 >>> --- a/drivers/gpu/drm/i915/intel_uncore.c >>> +++ b/drivers/gpu/drm/i915/intel_uncore.c >>> @@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset) >>> __unclaimed_reg_debug(dev_priv, reg, read, before); >>> } >>> >>> +static const enum decoupled_power_domain fw2dpd_domain[] = { >>> + GEN9_DECOUPLED_PD_RENDER, >>> + GEN9_DECOUPLED_PD_BLITTER, >>> + GEN9_DECOUPLED_PD_ALL, >>> + GEN9_DECOUPLED_PD_MEDIA, >>> + GEN9_DECOUPLED_PD_ALL, >>> + GEN9_DECOUPLED_PD_ALL, >>> + GEN9_DECOUPLED_PD_ALL >>> +}; >>> + >>> +/* >>> + * Decoupled MMIO access for only 1 DWORD >>> + */ >>> +static void __gen9_decoupled_mmio_access(struct drm_i915_private >>> *dev_priv, >>> + u32 reg, >>> + enum forcewake_domains fw_domain, >>> + enum decoupled_ops operation) >>> +{ >>> + enum decoupled_power_domain dp_domain; >>> + u32 ctrl_reg_data = 0; >>> + >>> + dp_domain = fw2dpd_domain[fw_domain - 1]; >>> + >>> + ctrl_reg_data |= reg; >>> + ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT); >>> + ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT); >>> + ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO; >>> + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, >>> ctrl_reg_data); >>> + >>> + if (wait_for_atomic((__raw_i915_read32(dev_priv, >>> + GEN9_DECOUPLED_REG0_DW1) & >>> + GEN9_DECOUPLED_DW1_GO) == 0, >>> + FORCEWAKE_ACK_TIMEOUT_MS)) >>> + DRM_ERROR("Decoupled MMIO wait timed out\n"); >>> +} >>> + >>> +static inline u32 >>> +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv, >>> + u32 reg, >>> + enum forcewake_domains fw_domain) >>> +{ >>> + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, >>> + GEN9_DECOUPLED_OP_READ); >>> + >>> + return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0); >>> +} >>> + >>> +static inline void >>> +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv, >>> + u32 reg, u32 data, >>> + enum forcewake_domains fw_domain) >>> +{ >>> + >>> + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data); >>> + >>> + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, >>> + GEN9_DECOUPLED_OP_WRITE); >>> +} >>> + >>> + >>> #define GEN2_READ_HEADER(x) \ >>> u##x val = 0; \ >>> assert_rpm_wakelock_held(dev_priv); >>> @@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct >>> drm_i915_private *dev_priv, >>> GEN6_READ_FOOTER; \ >>> } >>> >>> +#define __gen9_decoupled_read(x) \ >>> +static u##x \ >>> +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \ >>> + i915_reg_t reg, bool trace) { \ >>> + enum forcewake_domains fw_engine; \ >>> + GEN6_READ_HEADER(x); \ >>> + fw_engine = __fwtable_reg_read_fw_domains(offset); \ >>> + if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \ >>> + unsigned i; \ >>> + u32 *ptr_data = (u32 *) &val; \ >>> + for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \ >>> + *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \ >>> + offset, \ >>> + fw_engine); \ >>> + } else { \ >>> + val = __raw_i915_read##x(dev_priv, reg); \ >>> + } \ >>> + GEN6_READ_FOOTER; \ >>> +} >>> + >>> +__gen9_decoupled_read(32) >>> +__gen9_decoupled_read(64) >>> __fwtable_read(8) >>> __fwtable_read(16) >>> __fwtable_read(32) >>> @@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct >>> drm_i915_private *dev_priv, >>> GEN6_WRITE_FOOTER; \ >>> } >>> >>> +#define __gen9_decoupled_write(x) \ >>> +static void \ >>> +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \ >>> + i915_reg_t reg, u##x val, \ >>> + bool trace) { \ >>> + enum forcewake_domains fw_engine; \ >>> + GEN6_WRITE_HEADER; \ >>> + fw_engine = __fwtable_reg_write_fw_domains(offset); \ >>> + if (fw_engine & ~dev_priv->uncore.fw_domains_active) \ >>> + __gen9_decoupled_mmio_write(dev_priv, \ >>> + offset, \ >>> + val, \ >>> + fw_engine); \ >>> + else \ >>> + __raw_i915_write##x(dev_priv, reg, val); \ >>> + GEN6_WRITE_FOOTER; \ >>> +} >>> + >>> +__gen9_decoupled_write(32) >>> __fwtable_write(8) >>> __fwtable_write(16) >>> __fwtable_write(32) >>> @@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private >>> *dev_priv) >>> ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); >>> ASSIGN_WRITE_MMIO_VFUNCS(fwtable); >>> ASSIGN_READ_MMIO_VFUNCS(fwtable); >>> + if (HAS_DECOUPLED_MMIO(dev_priv)) { >>> + dev_priv->uncore.funcs.mmio_readl = >>> + gen9_decoupled_read32; >>> + dev_priv->uncore.funcs.mmio_readq = >>> + gen9_decoupled_read64; >>> + dev_priv->uncore.funcs.mmio_writel = >>> + gen9_decoupled_write32; >>> + } >>> break; >>> case 8: >>> if (IS_CHERRYVIEW(dev_priv)) { >>> >> >> With the rename fix above: >> >> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Thank you for your review. Will fix the name and re send. Chris also made a good suggestion since the HAS_DECOUPLED_MMIO is currently different from info->has_decoupled_mmio which is confusing. I think his idea was to edit info->has_decoupled_mmio in intel_device_runtime_init using the stepping data and remove the same from the macro. Regards, Tvrtko
Hi Chris, On Tuesday 15 November 2016 03:37 PM, Chris Wilson wrote: > On Tue, Nov 15, 2016 at 09:36:34AM +0000, Tvrtko Ursulin wrote: >> >> On 15/11/2016 06:40, Praveen Paneri wrote: >>> Decoupled MMIO is an alternative way to access forcewake domain >>> registers, which requires less cycles for a single read/write and >>> avoids frequent software forcewake. >>> This certainly gives advantage over the forcewake as this new >>> mechanism “decouples” CPU cycles and allow them to complete even >>> when GT is in a CPD (frequency change) or C6 state. >>> >>> This can co-exist with forcewake and we will continue to use forcewake >>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords >>> separately and land into funny situations. >>> >>> v2: >>> - Moved platform check out of the function and got rid of duplicate >>> functions to find out decoupled power domain (Chris) >>> - Added a check for forcewake already held and skipped decoupled >>> access (Chris) >>> - Skipped writing 64 bit registers through decoupled MMIO (Chris) >>> >>> v3: >>> - Improved commit message with more info on decoupled mmio (Tvrtko) >>> - Changed decoupled operation to enum and used u32 instead of >>> uint_32 data type for register offset (Tvrtko) >>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko) >>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko) >>> - Improved __gen9_decoupled_read and __gen9_decoupled_write >>> routines (Tvrtko) >>> >>> v4: >>> - Fixed alignment and variable names (Chris) >>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang) >>> >>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com> >>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com> >>> --- >>> drivers/gpu/drm/i915/i915_drv.h | 18 +++++- >>> drivers/gpu/drm/i915/i915_pci.c | 1 + >>> drivers/gpu/drm/i915/i915_reg.h | 7 +++ >>> drivers/gpu/drm/i915/intel_uncore.c | 109 ++++++++++++++++++++++++++++++++++++ >>> 4 files changed, 134 insertions(+), 1 deletion(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >>> index 4e7148a..158f05c 100644 >>> --- a/drivers/gpu/drm/i915/i915_drv.h >>> +++ b/drivers/gpu/drm/i915/i915_drv.h >>> @@ -549,6 +549,18 @@ enum forcewake_domains { >>> #define FW_REG_READ (1) >>> #define FW_REG_WRITE (2) >>> >>> +enum decoupled_power_domain { >>> + GEN9_DECOUPLED_PD_BLITTER = 0, >>> + GEN9_DECOUPLED_PD_RENDER, >>> + GEN9_DECOUPLED_PD_MEDIA, >>> + GEN9_DECOUPLED_PD_ALL >>> +}; >>> + >>> +enum decoupled_ops { >>> + GEN9_DECOUPLED_OP_WRITE = 0, >>> + GEN9_DECOUPLED_OP_READ >>> +}; >>> + >>> enum forcewake_domains >>> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, >>> i915_reg_t reg, unsigned int op); >>> @@ -683,7 +695,8 @@ struct intel_csr { >>> func(cursor_needs_physical); \ >>> func(hws_needs_physical); \ >>> func(overlay_needs_physical); \ >>> - func(supports_tv) >>> + func(supports_tv); \ >>> + func(has_decoupled_mmio) >>> >>> struct sseu_dev_info { >>> u8 slice_mask; >>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table { >>> #define GT_FREQUENCY_MULTIPLIER 50 >>> #define GEN9_FREQ_SCALER 3 >>> >>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \ >>> + && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER)) >>> + >> >> Looks like I've missed this before, but Would you mind renaming the >> dev argument to dev_priv? > > And whilst you are there, fix up the code to set > info->has_decoupled_mmio = false when santizing. > -Chris but dev_priv->info is declared as const. Praveen >
On 15/11/2016 13:17, Praveen Paneri wrote: > Hi Chris, > > On Tuesday 15 November 2016 03:37 PM, Chris Wilson wrote: >> On Tue, Nov 15, 2016 at 09:36:34AM +0000, Tvrtko Ursulin wrote: >>> >>> On 15/11/2016 06:40, Praveen Paneri wrote: >>>> Decoupled MMIO is an alternative way to access forcewake domain >>>> registers, which requires less cycles for a single read/write and >>>> avoids frequent software forcewake. >>>> This certainly gives advantage over the forcewake as this new >>>> mechanism “decouples” CPU cycles and allow them to complete even >>>> when GT is in a CPD (frequency change) or C6 state. >>>> >>>> This can co-exist with forcewake and we will continue to use forcewake >>>> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords >>>> separately and land into funny situations. >>>> >>>> v2: >>>> - Moved platform check out of the function and got rid of duplicate >>>> functions to find out decoupled power domain (Chris) >>>> - Added a check for forcewake already held and skipped decoupled >>>> access (Chris) >>>> - Skipped writing 64 bit registers through decoupled MMIO (Chris) >>>> >>>> v3: >>>> - Improved commit message with more info on decoupled mmio (Tvrtko) >>>> - Changed decoupled operation to enum and used u32 instead of >>>> uint_32 data type for register offset (Tvrtko) >>>> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko) >>>> - Added lookup table for converting fw_engine to pd_engine (Tvrtko) >>>> - Improved __gen9_decoupled_read and __gen9_decoupled_write >>>> routines (Tvrtko) >>>> >>>> v4: >>>> - Fixed alignment and variable names (Chris) >>>> - Write GEN9_DECOUPLED_REG0_DW1 register in just one go (Zhe Wang) >>>> >>>> Signed-off-by: Zhe Wang <zhe1.wang@intel.com> >>>> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com> >>>> --- >>>> drivers/gpu/drm/i915/i915_drv.h | 18 +++++- >>>> drivers/gpu/drm/i915/i915_pci.c | 1 + >>>> drivers/gpu/drm/i915/i915_reg.h | 7 +++ >>>> drivers/gpu/drm/i915/intel_uncore.c | 109 >>>> ++++++++++++++++++++++++++++++++++++ >>>> 4 files changed, 134 insertions(+), 1 deletion(-) >>>> >>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h >>>> b/drivers/gpu/drm/i915/i915_drv.h >>>> index 4e7148a..158f05c 100644 >>>> --- a/drivers/gpu/drm/i915/i915_drv.h >>>> +++ b/drivers/gpu/drm/i915/i915_drv.h >>>> @@ -549,6 +549,18 @@ enum forcewake_domains { >>>> #define FW_REG_READ (1) >>>> #define FW_REG_WRITE (2) >>>> >>>> +enum decoupled_power_domain { >>>> + GEN9_DECOUPLED_PD_BLITTER = 0, >>>> + GEN9_DECOUPLED_PD_RENDER, >>>> + GEN9_DECOUPLED_PD_MEDIA, >>>> + GEN9_DECOUPLED_PD_ALL >>>> +}; >>>> + >>>> +enum decoupled_ops { >>>> + GEN9_DECOUPLED_OP_WRITE = 0, >>>> + GEN9_DECOUPLED_OP_READ >>>> +}; >>>> + >>>> enum forcewake_domains >>>> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, >>>> i915_reg_t reg, unsigned int op); >>>> @@ -683,7 +695,8 @@ struct intel_csr { >>>> func(cursor_needs_physical); \ >>>> func(hws_needs_physical); \ >>>> func(overlay_needs_physical); \ >>>> - func(supports_tv) >>>> + func(supports_tv); \ >>>> + func(has_decoupled_mmio) >>>> >>>> struct sseu_dev_info { >>>> u8 slice_mask; >>>> @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table { >>>> #define GT_FREQUENCY_MULTIPLIER 50 >>>> #define GEN9_FREQ_SCALER 3 >>>> >>>> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \ >>>> + && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER)) >>>> + >>> >>> Looks like I've missed this before, but Would you mind renaming the >>> dev argument to dev_priv? >> >> And whilst you are there, fix up the code to set >> info->has_decoupled_mmio = false when santizing. >> -Chris > but dev_priv->info is declared as const. Look in intel_device_info_runtime_init for how it is currently done (mkwrite_device_info). Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4e7148a..158f05c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -549,6 +549,18 @@ enum forcewake_domains { #define FW_REG_READ (1) #define FW_REG_WRITE (2) +enum decoupled_power_domain { + GEN9_DECOUPLED_PD_BLITTER = 0, + GEN9_DECOUPLED_PD_RENDER, + GEN9_DECOUPLED_PD_MEDIA, + GEN9_DECOUPLED_PD_ALL +}; + +enum decoupled_ops { + GEN9_DECOUPLED_OP_WRITE = 0, + GEN9_DECOUPLED_OP_READ +}; + enum forcewake_domains intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, i915_reg_t reg, unsigned int op); @@ -683,7 +695,8 @@ struct intel_csr { func(cursor_needs_physical); \ func(hws_needs_physical); \ func(overlay_needs_physical); \ - func(supports_tv) + func(supports_tv); \ + func(has_decoupled_mmio) struct sseu_dev_info { u8 slice_mask; @@ -2652,6 +2665,9 @@ struct drm_i915_cmd_table { #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \ + && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER)) + #include "i915_trace.h" static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 70a99ce..fce8e19 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -363,6 +363,7 @@ .has_hw_contexts = 1, .has_logical_ring_contexts = 1, .has_guc = 1, + .has_decoupled_mmio = 1, .ddb_size = 512, GEN_DEFAULT_PIPEOFFSETS, IVB_CURSOR_OFFSETS, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3361d7f..c70c07a 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7342,6 +7342,13 @@ enum { #define SKL_FUSE_PG1_DIST_STATUS (1<<26) #define SKL_FUSE_PG2_DIST_STATUS (1<<25) +/* Decoupled MMIO register pair for kernel driver */ +#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00) +#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04) +#define GEN9_DECOUPLED_DW1_GO (1<<31) +#define GEN9_DECOUPLED_PD_SHIFT 28 +#define GEN9_DECOUPLED_OP_SHIFT 24 + /* Per-pipe DDI Function Control */ #define _TRANS_DDI_FUNC_CTL_A 0x60400 #define _TRANS_DDI_FUNC_CTL_B 0x61400 diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index e2b188d..e4c50cb 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -831,6 +831,66 @@ static bool is_gen8_shadowed(u32 offset) __unclaimed_reg_debug(dev_priv, reg, read, before); } +static const enum decoupled_power_domain fw2dpd_domain[] = { + GEN9_DECOUPLED_PD_RENDER, + GEN9_DECOUPLED_PD_BLITTER, + GEN9_DECOUPLED_PD_ALL, + GEN9_DECOUPLED_PD_MEDIA, + GEN9_DECOUPLED_PD_ALL, + GEN9_DECOUPLED_PD_ALL, + GEN9_DECOUPLED_PD_ALL +}; + +/* + * Decoupled MMIO access for only 1 DWORD + */ +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv, + u32 reg, + enum forcewake_domains fw_domain, + enum decoupled_ops operation) +{ + enum decoupled_power_domain dp_domain; + u32 ctrl_reg_data = 0; + + dp_domain = fw2dpd_domain[fw_domain - 1]; + + ctrl_reg_data |= reg; + ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT); + ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT); + ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO; + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data); + + if (wait_for_atomic((__raw_i915_read32(dev_priv, + GEN9_DECOUPLED_REG0_DW1) & + GEN9_DECOUPLED_DW1_GO) == 0, + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Decoupled MMIO wait timed out\n"); +} + +static inline u32 +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv, + u32 reg, + enum forcewake_domains fw_domain) +{ + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, + GEN9_DECOUPLED_OP_READ); + + return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0); +} + +static inline void +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv, + u32 reg, u32 data, + enum forcewake_domains fw_domain) +{ + + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data); + + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, + GEN9_DECOUPLED_OP_WRITE); +} + + #define GEN2_READ_HEADER(x) \ u##x val = 0; \ assert_rpm_wakelock_held(dev_priv); @@ -935,6 +995,28 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, GEN6_READ_FOOTER; \ } +#define __gen9_decoupled_read(x) \ +static u##x \ +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \ + i915_reg_t reg, bool trace) { \ + enum forcewake_domains fw_engine; \ + GEN6_READ_HEADER(x); \ + fw_engine = __fwtable_reg_read_fw_domains(offset); \ + if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \ + unsigned i; \ + u32 *ptr_data = (u32 *) &val; \ + for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \ + *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \ + offset, \ + fw_engine); \ + } else { \ + val = __raw_i915_read##x(dev_priv, reg); \ + } \ + GEN6_READ_FOOTER; \ +} + +__gen9_decoupled_read(32) +__gen9_decoupled_read(64) __fwtable_read(8) __fwtable_read(16) __fwtable_read(32) @@ -1064,6 +1146,25 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, GEN6_WRITE_FOOTER; \ } +#define __gen9_decoupled_write(x) \ +static void \ +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \ + i915_reg_t reg, u##x val, \ + bool trace) { \ + enum forcewake_domains fw_engine; \ + GEN6_WRITE_HEADER; \ + fw_engine = __fwtable_reg_write_fw_domains(offset); \ + if (fw_engine & ~dev_priv->uncore.fw_domains_active) \ + __gen9_decoupled_mmio_write(dev_priv, \ + offset, \ + val, \ + fw_engine); \ + else \ + __raw_i915_write##x(dev_priv, reg, val); \ + GEN6_WRITE_FOOTER; \ +} + +__gen9_decoupled_write(32) __fwtable_write(8) __fwtable_write(16) __fwtable_write(32) @@ -1287,6 +1388,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(fwtable); ASSIGN_READ_MMIO_VFUNCS(fwtable); + if (HAS_DECOUPLED_MMIO(dev_priv)) { + dev_priv->uncore.funcs.mmio_readl = + gen9_decoupled_read32; + dev_priv->uncore.funcs.mmio_readq = + gen9_decoupled_read64; + dev_priv->uncore.funcs.mmio_writel = + gen9_decoupled_write32; + } break; case 8: if (IS_CHERRYVIEW(dev_priv)) {