diff mbox

drm/i915/bdw: Check for slice, subslice and EU count for BDW

Message ID 1439469528-1016-1-git-send-email-lukasz.daniluk@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

?ukasz Daniluk Aug. 13, 2015, 12:38 p.m. UTC
Added checks for available slices, subslices and EUs for Broadwell. This
information is filled in intel_device_info and is available to user with
GET_PARAM.
Added checks for enabled slices, subslices and EU for Broadwell. This
information is based on available counts but takes power gated slices
into account. It can be read in debugfs.
Introduce new register defines that contain information on slices on
Broadwell.

Cc: Jeff Mcgee <jeff.mcgee@intel.com>
Signed-off-by: ?ukasz Daniluk <lukasz.daniluk@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c | 35 +++++++++++++--
 drivers/gpu/drm/i915/i915_dma.c     | 89 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h     | 19 +++++++-
 3 files changed, 138 insertions(+), 5 deletions(-)

Comments

Bish, Jim Aug. 13, 2015, 9:34 p.m. UTC | #1
On Thu, 2015-08-13 at 14:38 +0200, ?ukasz Daniluk wrote:
> Added checks for available slices, subslices and EUs for Broadwell. 

> This

> information is filled in intel_device_info and is available to user 

> with

> GET_PARAM.

> Added checks for enabled slices, subslices and EU for Broadwell. This

> information is based on available counts but takes power gated slices

> into account. It can be read in debugfs.

> Introduce new register defines that contain information on slices on

> Broadwell.

> 

> Cc: Jeff Mcgee <jeff.mcgee@intel.com>

> Signed-off-by: ?ukasz Daniluk <lukasz.daniluk@intel.com>

> ---

>  drivers/gpu/drm/i915/i915_debugfs.c | 35 +++++++++++++--

>  drivers/gpu/drm/i915/i915_dma.c     | 89 

> +++++++++++++++++++++++++++++++++++++

>  drivers/gpu/drm/i915/i915_reg.h     | 19 +++++++-

>  3 files changed, 138 insertions(+), 5 deletions(-)

> 

> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 

> b/drivers/gpu/drm/i915/i915_debugfs.c

> index 23a69307..a17f912 100644

> --- a/drivers/gpu/drm/i915/i915_debugfs.c

> +++ b/drivers/gpu/drm/i915/i915_debugfs.c

> @@ -4838,7 +4838,6 @@ struct sseu_dev_status {

>  static void cherryview_sseu_device_status(struct drm_device *dev,

>                                         struct sseu_dev_status *stat)

>  {

> -     struct drm_i915_private *dev_priv = dev->dev_private;

>       const int ss_max = 2;

>       int ss;

>       u32 sig1[ss_max], sig2[ss_max];

> @@ -4870,7 +4869,6 @@ static void 

> cherryview_sseu_device_status(struct drm_device *dev,

>  static void gen9_sseu_device_status(struct drm_device *dev,

>                                   struct sseu_dev_status *stat)

>  {

> -     struct drm_i915_private *dev_priv = dev->dev_private;

>       int s_max = 3, ss_max = 4;

>       int s, ss;

>       u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2];

> @@ -4932,13 +4930,42 @@ static void gen9_sseu_device_status(struct 

> drm_device *dev,

>       }

>  }

>  

> +static void broadwell_sseu_device_status(struct drm_device *dev,

> +                                      struct sseu_dev_status *stat)

> +{

> +     int s;

> +     u32 slice_info = I915_READ(GEN8_R_PWR_CLK_STATE);

> +

> +     /*

> +      * If first bit of slice_info is 0, there is no specific power

> +      * state set. Otherwise we read the count of enabled slices

> +      * from it.

> +      */

> +     if (slice_info & (1<<31))

> +             stat->slice_total = (slice_info & GEN8_RPCS_S_CNT_MASK)

> +                     >> GEN8_RPCS_S_CNT_SHIFT;

> +     else

> +             stat->slice_total = INTEL_INFO(dev)->slice_total;

> +

> +     stat->subslice_per_slice = INTEL_INFO(dev)->subslice_per_slice;

> +     stat->eu_per_subslice = INTEL_INFO(dev)->eu_per_subslice;

> +     stat->subslice_total = stat->slice_total * stat-

> >subslice_per_slice;

> +     stat->eu_total = stat->eu_per_subslice * stat->subslice_total;

> +

> +     /* subtract fused off EU(s) from enabled slice(s) */

> +     for (s = 0; s < stat.slice_total; s++) {

> +             u8 subslice_7eu = INTEL_INFO(dev)->subslice_7eu[s];

> +             stat->eu_total -= hweight8(subslice_7eu);

> +     }

> +}

> +

>  static int i915_sseu_status(struct seq_file *m, void *unused)

>  {

>       struct drm_info_node *node = (struct drm_info_node *) m->private;

>       struct drm_device *dev = node->minor->dev;

>       struct sseu_dev_status stat;

>  

> -     if ((INTEL_INFO(dev)->gen < 8) || IS_BROADWELL(dev))

> +     if ((INTEL_INFO(dev)->gen < 8))

>               return -ENODEV;

>  

>       seq_puts(m, "SSEU Device Info\n");

> @@ -4963,6 +4990,8 @@ static int i915_sseu_status(struct seq_file 

> *m, void *unused)

>       memset(&stat, 0, sizeof(stat));

>       if (IS_CHERRYVIEW(dev)) {

>               cherryview_sseu_device_status(dev, &stat);

> +     } else if (IS_BROADWELL(dev)) {

> +             broadwell_sseu_device_status(dev, &stat);

>       } else if (INTEL_INFO(dev)->gen >= 9) {

>               gen9_sseu_device_status(dev, &stat);

>       }

> diff --git a/drivers/gpu/drm/i915/i915_dma.c 

> b/drivers/gpu/drm/i915/i915_dma.c

> index ab37d11..2d52b1e 100644

> --- a/drivers/gpu/drm/i915/i915_dma.c

> +++ b/drivers/gpu/drm/i915/i915_dma.c

> @@ -705,6 +705,93 @@ static void gen9_sseu_info_init(struct 

> drm_device *dev)

>       info->has_eu_pg = (info->eu_per_subslice > 2);

>  }

>  

> +static void broadwell_sseu_info_init(struct drm_device *dev)

> +{

> +     struct drm_i915_private *dev_priv = dev->dev_private;

> +     struct intel_device_info *info;

> +     const int s_max = 3, ss_max = 3, eu_max = 8;

> +     int s, ss;

> +     u32 fuse2, eu_disable[s_max], s_enable, ss_disable;

> +

> +     fuse2 = I915_READ(GEN8_FUSE2);

> +     s_enable = (fuse2 & GEN8_F2_S_ENA_MASK) >>

> +             GEN8_F2_S_ENA_SHIFT;

> +     ss_disable = (fuse2 & GEN8_F2_SS_DIS_MASK) >>

> +             GEN8_F2_SS_DIS_SHIFT;

> +

> +     eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) &

> +             GEN8_EU_DIS0_S0_MASK;

> +     eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >>

> +                     GEN8_EU_DIS0_S1_SHIFT) |

> +             ((I915_READ(GEN8_EU_DISABLE1) &

> +               GEN8_EU_DIS1_S1_MASK) <<

> +              (32 - GEN8_EU_DIS0_S1_SHIFT));

> +     eu_disable[2] = (I915_READ(GEN8_EU_DISABLE1) >>

> +                     GEN8_EU_DIS1_S2_SHIFT) |

> +             ((I915_READ(GEN8_EU_DISABLE2) &

> +               GEN8_EU_DIS2_S2_MASK) <<

> +              (32 - GEN8_EU_DIS1_S2_SHIFT));

> +

> +

> +     info = (struct intel_device_info *)&dev_priv->info;

> +     info->slice_total = hweight32(s_enable);

> +

> +     /*

> +      * The subslice disable field is global, i.e. it applies

> +      * to each of the enabled slices.

> +      */

> +     info->subslice_per_slice = ss_max - hweight32(ss_disable);

> +     info->subslice_total = info->slice_total *

> +             info->subslice_per_slice;

> +

> +     /*

> +      * Iterate through enabled slices and subslices to

> +      * count the total enabled EU.

> +      */

> +     for (s = 0; s < s_max; s++) {

> +             if (!(s_enable & (0x1 << s)))

> +                     /* skip disabled slice */

> +                     continue;

> +

> +             for (ss = 0; ss < ss_max; ss++) {

> +                     u32 n_disabled;

> +

> +                     if (ss_disable & (0x1 << ss))

> +                             /* skip disabled subslice */

> +                             continue;

> +

> +                     n_disabled = hweight8(eu_disable[s] >>

> +                                     (ss * eu_max));

> +

> +                     /*

> +                      * Record which subslice(s) has(have) 7 EUs. we

> +                      * can tune the hash used to spread work among

> +                      * subslices if they are unbalanced.

> +                      */

> +                     if (eu_max - n_disabled == 7)

> +                             info->subslice_7eu[s] |= 1 << ss;

> +

> +                     info->eu_total += eu_max - n_disabled;

> +             }

> +     }

> +

> +     /*

> +      * BDW is expected to always have a uniform distribution of EU 

> across

> +      * subslices with the exception that any one EU in any one 

> subslice may

> +      * be fused off for die recovery.

I like comments but wondering if fused off for die recovery is ok.  W
ould it be better to indicate for sku options or something?
> +      */

> +     info->eu_per_subslice = info->subslice_total ?

> +             DIV_ROUND_UP(info->eu_total, info->subslice_total) : 0;

> +

> +     /*

> +      * BDW supports slice power gating on devices with more than

> +      * one slice.

> +      */

> +     info->has_slice_pg = (info->slice_total > 1);

> +     info->has_subslice_pg = 0;

> +     info->has_eu_pg = 0;

> +}

> +

>  /*

>   * Determine various intel_device_info fields at runtime.

>   *

> @@ -775,6 +862,8 @@ static void 

> intel_device_info_runtime_init(struct drm_device *dev)

>       /* Initialize slice/subslice/EU info */

>       if (IS_CHERRYVIEW(dev))

>               cherryview_sseu_info_init(dev);

> +     else if (IS_BROADWELL(dev))

> +             broadwell_sseu_info_init(dev);

>       else if (INTEL_INFO(dev)->gen >= 9)

>               gen9_sseu_info_init(dev);

>  

> diff --git a/drivers/gpu/drm/i915/i915_reg.h 

> b/drivers/gpu/drm/i915/i915_reg.h

> index be87e3b..77e043e 100644

> --- a/drivers/gpu/drm/i915/i915_reg.h

> +++ b/drivers/gpu/drm/i915/i915_reg.h

> @@ -1841,11 +1841,26 @@ enum skl_disp_power_wells {

>  #define   CHV_FGT_EU_DIS_SS1_R1_MASK (0xf << 

> CHV_FGT_EU_DIS_SS1_R1_SHIFT)

>  

>  #define GEN8_FUSE2                   0x9120

> +#define   GEN8_F2_SS_DIS_SHIFT               21

> +#define   GEN8_F2_SS_DIS_MASK                (0x7 << GEN8_F2_SS_DIS_SHIFT)

>  #define   GEN8_F2_S_ENA_SHIFT                25

>  #define   GEN8_F2_S_ENA_MASK         (0x7 << GEN8_F2_S_ENA_SHIFT)

>  

> -#define   GEN9_F2_SS_DIS_SHIFT               20

> -#define   GEN9_F2_SS_DIS_MASK                (0xf << GEN9_F2_SS_DIS_SHIFT)

> +#define GEN8_EU_DISABLE0             0x9134

> +#define   GEN8_EU_DIS0_S0_MASK               0xffffff

> +#define   GEN8_EU_DIS0_S1_SHIFT              24

> +#define   GEN8_EU_DIS0_S1_MASK               (0xff << GEN8_EU_DIS0_S1_SHIFT)

> +

> +#define GEN8_EU_DISABLE1             0x9138

> +#define   GEN8_EU_DIS1_S1_MASK               0xffff

> +#define   GEN8_EU_DIS1_S2_SHIFT              16

> +#define   GEN8_EU_DIS1_S2_MASK               (0xffff << GEN8_EU_DIS1_S2_SHIFT)

> +

> +#define GEN8_EU_DISABLE2             0x913c

> +#define   GEN8_EU_DIS2_S2_MASK               0xff

> +

> +#define GEN9_F2_SS_DIS_SHIFT         20

> +#define GEN9_F2_SS_DIS_MASK          (0xf << GEN9_F2_SS_DIS_SHIFT)

>  

>  #define GEN9_EU_DISABLE(slice)               (0x9134 + (slice)*0x4)

>
jeff.mcgee@intel.com Aug. 14, 2015, 12:11 a.m. UTC | #2
On Thu, Aug 13, 2015 at 02:38:48PM +0200, ?ukasz Daniluk wrote:
> Added checks for available slices, subslices and EUs for Broadwell. This
> information is filled in intel_device_info and is available to user with
> GET_PARAM.
> Added checks for enabled slices, subslices and EU for Broadwell. This
> information is based on available counts but takes power gated slices
> into account. It can be read in debugfs.
> Introduce new register defines that contain information on slices on
> Broadwell.
> 
> Cc: Jeff Mcgee <jeff.mcgee@intel.com>
> Signed-off-by: ?ukasz Daniluk <lukasz.daniluk@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 35 +++++++++++++--
>  drivers/gpu/drm/i915/i915_dma.c     | 89 +++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_reg.h     | 19 +++++++-
>  3 files changed, 138 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 23a69307..a17f912 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4838,7 +4838,6 @@ struct sseu_dev_status {
>  static void cherryview_sseu_device_status(struct drm_device *dev,
>  					  struct sseu_dev_status *stat)
>  {
> -	struct drm_i915_private *dev_priv = dev->dev_private;
Did you compile this? I915_READ macro requires dev_priv.

>  	const int ss_max = 2;
>  	int ss;
>  	u32 sig1[ss_max], sig2[ss_max];
> @@ -4870,7 +4869,6 @@ static void cherryview_sseu_device_status(struct drm_device *dev,
>  static void gen9_sseu_device_status(struct drm_device *dev,
>  				    struct sseu_dev_status *stat)
>  {
> -	struct drm_i915_private *dev_priv = dev->dev_private;
Did you compile this? I915_READ macro requires dev_priv.
>  	int s_max = 3, ss_max = 4;
>  	int s, ss;
>  	u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2];
> @@ -4932,13 +4930,42 @@ static void gen9_sseu_device_status(struct drm_device *dev,
>  	}
>  }
>  
> +static void broadwell_sseu_device_status(struct drm_device *dev,
> +					 struct sseu_dev_status *stat)
> +{
> +	int s;
> +	u32 slice_info = I915_READ(GEN8_R_PWR_CLK_STATE);
RPCS register only shows what slice state was requested, not the actual
slice state. You need to use the GEN8_GT_SLICE_INFO register that I
shared with you in my original patch for this.
> +
> +	/*
> +	 * If first bit of slice_info is 0, there is no specific power
> +	 * state set. Otherwise we read the count of enabled slices
> +	 * from it.
> +	 */
> +	if (slice_info & (1<<31))
> +		stat->slice_total = (slice_info & GEN8_RPCS_S_CNT_MASK)
> +			>> GEN8_RPCS_S_CNT_SHIFT;
> +	else
> +		stat->slice_total = INTEL_INFO(dev)->slice_total;
> +
> +	stat->subslice_per_slice = INTEL_INFO(dev)->subslice_per_slice;
> +	stat->eu_per_subslice = INTEL_INFO(dev)->eu_per_subslice;
> +	stat->subslice_total = stat->slice_total * stat->subslice_per_slice;
> +	stat->eu_total = stat->eu_per_subslice * stat->subslice_total;
> +
> +	/* subtract fused off EU(s) from enabled slice(s) */
> +	for (s = 0; s < stat.slice_total; s++) {
> +		u8 subslice_7eu = INTEL_INFO(dev)->subslice_7eu[s];
> +		stat->eu_total -= hweight8(subslice_7eu);
> +	}
> +}
> +
>  static int i915_sseu_status(struct seq_file *m, void *unused)
>  {
>  	struct drm_info_node *node = (struct drm_info_node *) m->private;
>  	struct drm_device *dev = node->minor->dev;
>  	struct sseu_dev_status stat;
>  
> -	if ((INTEL_INFO(dev)->gen < 8) || IS_BROADWELL(dev))
> +	if ((INTEL_INFO(dev)->gen < 8))
>  		return -ENODEV;
>  
>  	seq_puts(m, "SSEU Device Info\n");
> @@ -4963,6 +4990,8 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
>  	memset(&stat, 0, sizeof(stat));
>  	if (IS_CHERRYVIEW(dev)) {
>  		cherryview_sseu_device_status(dev, &stat);
> +	} else if (IS_BROADWELL(dev)) {
> +		broadwell_sseu_device_status(dev, &stat);
>  	} else if (INTEL_INFO(dev)->gen >= 9) {
>  		gen9_sseu_device_status(dev, &stat);
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index ab37d11..2d52b1e 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -705,6 +705,93 @@ static void gen9_sseu_info_init(struct drm_device *dev)
>  	info->has_eu_pg = (info->eu_per_subslice > 2);
>  }
>  
> +static void broadwell_sseu_info_init(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_device_info *info;
> +	const int s_max = 3, ss_max = 3, eu_max = 8;
> +	int s, ss;
> +	u32 fuse2, eu_disable[s_max], s_enable, ss_disable;
> +
> +	fuse2 = I915_READ(GEN8_FUSE2);
> +	s_enable = (fuse2 & GEN8_F2_S_ENA_MASK) >>
> +		GEN8_F2_S_ENA_SHIFT;
> +	ss_disable = (fuse2 & GEN8_F2_SS_DIS_MASK) >>
> +		GEN8_F2_SS_DIS_SHIFT;
> +
> +	eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) &
> +		GEN8_EU_DIS0_S0_MASK;
> +	eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >>
> +			GEN8_EU_DIS0_S1_SHIFT) |
> +		((I915_READ(GEN8_EU_DISABLE1) &
> +		  GEN8_EU_DIS1_S1_MASK) <<
> +		 (32 - GEN8_EU_DIS0_S1_SHIFT));
> +	eu_disable[2] = (I915_READ(GEN8_EU_DISABLE1) >>
> +			GEN8_EU_DIS1_S2_SHIFT) |
> +		((I915_READ(GEN8_EU_DISABLE2) &
> +		  GEN8_EU_DIS2_S2_MASK) <<
> +		 (32 - GEN8_EU_DIS1_S2_SHIFT));
> +
> +
> +	info = (struct intel_device_info *)&dev_priv->info;
> +	info->slice_total = hweight32(s_enable);
> +
> +	/*
> +	 * The subslice disable field is global, i.e. it applies
> +	 * to each of the enabled slices.
> +	 */
> +	info->subslice_per_slice = ss_max - hweight32(ss_disable);
> +	info->subslice_total = info->slice_total *
> +		info->subslice_per_slice;
> +
> +	/*
> +	 * Iterate through enabled slices and subslices to
> +	 * count the total enabled EU.
> +	 */
> +	for (s = 0; s < s_max; s++) {
> +		if (!(s_enable & (0x1 << s)))
> +			/* skip disabled slice */
> +			continue;
> +
> +		for (ss = 0; ss < ss_max; ss++) {
> +			u32 n_disabled;
> +
> +			if (ss_disable & (0x1 << ss))
> +				/* skip disabled subslice */
> +				continue;
> +
> +			n_disabled = hweight8(eu_disable[s] >>
> +					(ss * eu_max));
> +
> +			/*
> +			 * Record which subslice(s) has(have) 7 EUs. we
> +			 * can tune the hash used to spread work among
> +			 * subslices if they are unbalanced.
> +			 */
> +			if (eu_max - n_disabled == 7)
> +				info->subslice_7eu[s] |= 1 << ss;
> +
> +			info->eu_total += eu_max - n_disabled;
> +		}
> +	}
> +
> +	/*
> +	 * BDW is expected to always have a uniform distribution of EU across
> +	 * subslices with the exception that any one EU in any one subslice may
> +	 * be fused off for die recovery.
> +	 */
> +	info->eu_per_subslice = info->subslice_total ?
> +		DIV_ROUND_UP(info->eu_total, info->subslice_total) : 0;
> +
> +	/*
> +	 * BDW supports slice power gating on devices with more than
> +	 * one slice.
> +	 */
> +	info->has_slice_pg = (info->slice_total > 1);
> +	info->has_subslice_pg = 0;
> +	info->has_eu_pg = 0;
> +}
> +
>  /*
>   * Determine various intel_device_info fields at runtime.
>   *
> @@ -775,6 +862,8 @@ static void intel_device_info_runtime_init(struct drm_device *dev)
>  	/* Initialize slice/subslice/EU info */
>  	if (IS_CHERRYVIEW(dev))
>  		cherryview_sseu_info_init(dev);
> +	else if (IS_BROADWELL(dev))
> +		broadwell_sseu_info_init(dev);
>  	else if (INTEL_INFO(dev)->gen >= 9)
>  		gen9_sseu_info_init(dev);
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index be87e3b..77e043e 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1841,11 +1841,26 @@ enum skl_disp_power_wells {
>  #define   CHV_FGT_EU_DIS_SS1_R1_MASK	(0xf << CHV_FGT_EU_DIS_SS1_R1_SHIFT)
>  
>  #define GEN8_FUSE2			0x9120
> +#define   GEN8_F2_SS_DIS_SHIFT		21
> +#define   GEN8_F2_SS_DIS_MASK		(0x7 << GEN8_F2_SS_DIS_SHIFT)
>  #define   GEN8_F2_S_ENA_SHIFT		25
>  #define   GEN8_F2_S_ENA_MASK		(0x7 << GEN8_F2_S_ENA_SHIFT)
>  
> -#define   GEN9_F2_SS_DIS_SHIFT		20
> -#define   GEN9_F2_SS_DIS_MASK		(0xf << GEN9_F2_SS_DIS_SHIFT)
> +#define GEN8_EU_DISABLE0		0x9134
> +#define   GEN8_EU_DIS0_S0_MASK		0xffffff
> +#define   GEN8_EU_DIS0_S1_SHIFT		24
> +#define   GEN8_EU_DIS0_S1_MASK		(0xff << GEN8_EU_DIS0_S1_SHIFT)
> +
> +#define GEN8_EU_DISABLE1		0x9138
> +#define   GEN8_EU_DIS1_S1_MASK		0xffff
> +#define   GEN8_EU_DIS1_S2_SHIFT		16
> +#define   GEN8_EU_DIS1_S2_MASK		(0xffff << GEN8_EU_DIS1_S2_SHIFT)
> +
> +#define GEN8_EU_DISABLE2		0x913c
> +#define   GEN8_EU_DIS2_S2_MASK		0xff
> +
> +#define GEN9_F2_SS_DIS_SHIFT		20
> +#define GEN9_F2_SS_DIS_MASK		(0xf << GEN9_F2_SS_DIS_SHIFT)
>  
>  #define GEN9_EU_DISABLE(slice)		(0x9134 + (slice)*0x4)
>  
> -- 
> Lukasz Daniluk
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 23a69307..a17f912 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4838,7 +4838,6 @@  struct sseu_dev_status {
 static void cherryview_sseu_device_status(struct drm_device *dev,
 					  struct sseu_dev_status *stat)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	const int ss_max = 2;
 	int ss;
 	u32 sig1[ss_max], sig2[ss_max];
@@ -4870,7 +4869,6 @@  static void cherryview_sseu_device_status(struct drm_device *dev,
 static void gen9_sseu_device_status(struct drm_device *dev,
 				    struct sseu_dev_status *stat)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	int s_max = 3, ss_max = 4;
 	int s, ss;
 	u32 s_reg[s_max], eu_reg[2*s_max], eu_mask[2];
@@ -4932,13 +4930,42 @@  static void gen9_sseu_device_status(struct drm_device *dev,
 	}
 }
 
+static void broadwell_sseu_device_status(struct drm_device *dev,
+					 struct sseu_dev_status *stat)
+{
+	int s;
+	u32 slice_info = I915_READ(GEN8_R_PWR_CLK_STATE);
+
+	/*
+	 * If first bit of slice_info is 0, there is no specific power
+	 * state set. Otherwise we read the count of enabled slices
+	 * from it.
+	 */
+	if (slice_info & (1<<31))
+		stat->slice_total = (slice_info & GEN8_RPCS_S_CNT_MASK)
+			>> GEN8_RPCS_S_CNT_SHIFT;
+	else
+		stat->slice_total = INTEL_INFO(dev)->slice_total;
+
+	stat->subslice_per_slice = INTEL_INFO(dev)->subslice_per_slice;
+	stat->eu_per_subslice = INTEL_INFO(dev)->eu_per_subslice;
+	stat->subslice_total = stat->slice_total * stat->subslice_per_slice;
+	stat->eu_total = stat->eu_per_subslice * stat->subslice_total;
+
+	/* subtract fused off EU(s) from enabled slice(s) */
+	for (s = 0; s < stat.slice_total; s++) {
+		u8 subslice_7eu = INTEL_INFO(dev)->subslice_7eu[s];
+		stat->eu_total -= hweight8(subslice_7eu);
+	}
+}
+
 static int i915_sseu_status(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct sseu_dev_status stat;
 
-	if ((INTEL_INFO(dev)->gen < 8) || IS_BROADWELL(dev))
+	if ((INTEL_INFO(dev)->gen < 8))
 		return -ENODEV;
 
 	seq_puts(m, "SSEU Device Info\n");
@@ -4963,6 +4990,8 @@  static int i915_sseu_status(struct seq_file *m, void *unused)
 	memset(&stat, 0, sizeof(stat));
 	if (IS_CHERRYVIEW(dev)) {
 		cherryview_sseu_device_status(dev, &stat);
+	} else if (IS_BROADWELL(dev)) {
+		broadwell_sseu_device_status(dev, &stat);
 	} else if (INTEL_INFO(dev)->gen >= 9) {
 		gen9_sseu_device_status(dev, &stat);
 	}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index ab37d11..2d52b1e 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -705,6 +705,93 @@  static void gen9_sseu_info_init(struct drm_device *dev)
 	info->has_eu_pg = (info->eu_per_subslice > 2);
 }
 
+static void broadwell_sseu_info_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_device_info *info;
+	const int s_max = 3, ss_max = 3, eu_max = 8;
+	int s, ss;
+	u32 fuse2, eu_disable[s_max], s_enable, ss_disable;
+
+	fuse2 = I915_READ(GEN8_FUSE2);
+	s_enable = (fuse2 & GEN8_F2_S_ENA_MASK) >>
+		GEN8_F2_S_ENA_SHIFT;
+	ss_disable = (fuse2 & GEN8_F2_SS_DIS_MASK) >>
+		GEN8_F2_SS_DIS_SHIFT;
+
+	eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) &
+		GEN8_EU_DIS0_S0_MASK;
+	eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >>
+			GEN8_EU_DIS0_S1_SHIFT) |
+		((I915_READ(GEN8_EU_DISABLE1) &
+		  GEN8_EU_DIS1_S1_MASK) <<
+		 (32 - GEN8_EU_DIS0_S1_SHIFT));
+	eu_disable[2] = (I915_READ(GEN8_EU_DISABLE1) >>
+			GEN8_EU_DIS1_S2_SHIFT) |
+		((I915_READ(GEN8_EU_DISABLE2) &
+		  GEN8_EU_DIS2_S2_MASK) <<
+		 (32 - GEN8_EU_DIS1_S2_SHIFT));
+
+
+	info = (struct intel_device_info *)&dev_priv->info;
+	info->slice_total = hweight32(s_enable);
+
+	/*
+	 * The subslice disable field is global, i.e. it applies
+	 * to each of the enabled slices.
+	 */
+	info->subslice_per_slice = ss_max - hweight32(ss_disable);
+	info->subslice_total = info->slice_total *
+		info->subslice_per_slice;
+
+	/*
+	 * Iterate through enabled slices and subslices to
+	 * count the total enabled EU.
+	 */
+	for (s = 0; s < s_max; s++) {
+		if (!(s_enable & (0x1 << s)))
+			/* skip disabled slice */
+			continue;
+
+		for (ss = 0; ss < ss_max; ss++) {
+			u32 n_disabled;
+
+			if (ss_disable & (0x1 << ss))
+				/* skip disabled subslice */
+				continue;
+
+			n_disabled = hweight8(eu_disable[s] >>
+					(ss * eu_max));
+
+			/*
+			 * Record which subslice(s) has(have) 7 EUs. we
+			 * can tune the hash used to spread work among
+			 * subslices if they are unbalanced.
+			 */
+			if (eu_max - n_disabled == 7)
+				info->subslice_7eu[s] |= 1 << ss;
+
+			info->eu_total += eu_max - n_disabled;
+		}
+	}
+
+	/*
+	 * BDW is expected to always have a uniform distribution of EU across
+	 * subslices with the exception that any one EU in any one subslice may
+	 * be fused off for die recovery.
+	 */
+	info->eu_per_subslice = info->subslice_total ?
+		DIV_ROUND_UP(info->eu_total, info->subslice_total) : 0;
+
+	/*
+	 * BDW supports slice power gating on devices with more than
+	 * one slice.
+	 */
+	info->has_slice_pg = (info->slice_total > 1);
+	info->has_subslice_pg = 0;
+	info->has_eu_pg = 0;
+}
+
 /*
  * Determine various intel_device_info fields at runtime.
  *
@@ -775,6 +862,8 @@  static void intel_device_info_runtime_init(struct drm_device *dev)
 	/* Initialize slice/subslice/EU info */
 	if (IS_CHERRYVIEW(dev))
 		cherryview_sseu_info_init(dev);
+	else if (IS_BROADWELL(dev))
+		broadwell_sseu_info_init(dev);
 	else if (INTEL_INFO(dev)->gen >= 9)
 		gen9_sseu_info_init(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index be87e3b..77e043e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1841,11 +1841,26 @@  enum skl_disp_power_wells {
 #define   CHV_FGT_EU_DIS_SS1_R1_MASK	(0xf << CHV_FGT_EU_DIS_SS1_R1_SHIFT)
 
 #define GEN8_FUSE2			0x9120
+#define   GEN8_F2_SS_DIS_SHIFT		21
+#define   GEN8_F2_SS_DIS_MASK		(0x7 << GEN8_F2_SS_DIS_SHIFT)
 #define   GEN8_F2_S_ENA_SHIFT		25
 #define   GEN8_F2_S_ENA_MASK		(0x7 << GEN8_F2_S_ENA_SHIFT)
 
-#define   GEN9_F2_SS_DIS_SHIFT		20
-#define   GEN9_F2_SS_DIS_MASK		(0xf << GEN9_F2_SS_DIS_SHIFT)
+#define GEN8_EU_DISABLE0		0x9134
+#define   GEN8_EU_DIS0_S0_MASK		0xffffff
+#define   GEN8_EU_DIS0_S1_SHIFT		24
+#define   GEN8_EU_DIS0_S1_MASK		(0xff << GEN8_EU_DIS0_S1_SHIFT)
+
+#define GEN8_EU_DISABLE1		0x9138
+#define   GEN8_EU_DIS1_S1_MASK		0xffff
+#define   GEN8_EU_DIS1_S2_SHIFT		16
+#define   GEN8_EU_DIS1_S2_MASK		(0xffff << GEN8_EU_DIS1_S2_SHIFT)
+
+#define GEN8_EU_DISABLE2		0x913c
+#define   GEN8_EU_DIS2_S2_MASK		0xff
+
+#define GEN9_F2_SS_DIS_SHIFT		20
+#define GEN9_F2_SS_DIS_MASK		(0xf << GEN9_F2_SS_DIS_SHIFT)
 
 #define GEN9_EU_DISABLE(slice)		(0x9134 + (slice)*0x4)