diff mbox

[v2,6/9] drm/i915: expose command stream timestamp frequency to userspace

Message ID 20171102162949.22221-7-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lionel Landwerlin Nov. 2, 2017, 4:29 p.m. UTC
We use to have this fixed per generation, but starting with CNL userspace
cannot tell just off the PCI ID. Let's make this information available. This
is particularly useful for performance monitoring where much of the
normalization work is done using those timestamps (this include pipeline
statistics in both GL & Vulkan as well as OA reports).

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  2 +
 drivers/gpu/drm/i915/i915_drv.c          |  3 +
 drivers/gpu/drm/i915/i915_drv.h          |  2 +
 drivers/gpu/drm/i915/i915_reg.h          | 21 +++++++
 drivers/gpu/drm/i915/intel_device_info.c | 99 ++++++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h              |  6 ++
 6 files changed, 133 insertions(+)

Comments

Rafael Antognolli Nov. 7, 2017, 12:01 a.m. UTC | #1
This patch, along with the respective ones for Mesa, does fix the gl
timestamp query piglit failures on CNL. So it is

Tested-by: Rafael Antognolli <rafael.antognolli@intel.com>

On Thu, Nov 02, 2017 at 04:29:46PM +0000, Lionel Landwerlin wrote:
> We use to have this fixed per generation, but starting with CNL userspace
> cannot tell just off the PCI ID. Let's make this information available. This
> is particularly useful for performance monitoring where much of the
> normalization work is done using those timestamps (this include pipeline
> statistics in both GL & Vulkan as well as OA reports).
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      |  2 +
>  drivers/gpu/drm/i915/i915_drv.c          |  3 +
>  drivers/gpu/drm/i915/i915_drv.h          |  2 +
>  drivers/gpu/drm/i915/i915_reg.h          | 21 +++++++
>  drivers/gpu/drm/i915/intel_device_info.c | 99 ++++++++++++++++++++++++++++++++
>  include/uapi/drm/i915_drm.h              |  6 ++
>  6 files changed, 133 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 39883cd915db..0897fd616a1f 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3246,6 +3246,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>  		   yesno(dev_priv->gt.awake));
>  	seq_printf(m, "Global active requests: %d\n",
>  		   dev_priv->gt.active_requests);
> +	seq_printf(m, "CS timestamp frequency: %llu\n",
> +		   dev_priv->info.cs_timestamp_frequency);
>  
>  	p = drm_seq_file_printer(m);
>  	for_each_engine(engine, dev_priv, id)
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index e7e9e061073b..fdd23e79fb46 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -416,6 +416,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  		if (!value)
>  			return -ENODEV;
>  		break;
> +	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
> +		value = INTEL_INFO(dev_priv)->cs_timestamp_frequency;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6cb7cd7f9420..4e804aaeaae1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -886,6 +886,8 @@ struct intel_device_info {
>  	/* Slice/subslice/EU info */
>  	struct sseu_dev_info sseu;
>  
> +	uint64_t cs_timestamp_frequency;
> +
>  	struct color_luts {
>  		u16 degamma_lut_size;
>  		u16 gamma_lut_size;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index a2223f01ee2a..f392f28f2cfa 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1119,9 +1119,24 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>  
>  /* RPM unit config (Gen8+) */
>  #define RPM_CONFIG0	    _MMIO(0x0D00)
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	0
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	1
> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
> +
>  #define RPM_CONFIG1	    _MMIO(0x0D04)
>  #define  GEN10_GT_NOA_ENABLE  (1 << 9)
>  
> +/* GPM unit config (assuming Gen8+, documentation is fuzzy...) */
> +#define GEN8_CTC_MODE			_MMIO(0xA26C)
> +#define  GEN8_CTC_SOURCE_PARAMETER_MASK 1
> +#define  GEN8_CTC_SOURCE_CRYSTAL_CLOCK	0
> +#define  GEN8_CTC_SOURCE_DIVIDE_LOGIC	1
> +#define  GEN8_CTC_SHIFT_PARAMETER_SHIFT	1
> +#define  GEN8_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN8_CTC_SHIFT_PARAMETER_SHIFT)
> +
>  /* RPC unit config (Gen8+) */
>  #define RPC_CONFIG	    _MMIO(0x0D08)
>  
> @@ -8865,6 +8880,12 @@ enum skl_power_gate {
>  #define ILK_TIMESTAMP_HI	_MMIO(0x70070)
>  #define IVB_TIMESTAMP_CTR	_MMIO(0x44070)
>  
> +#define GEN8_TIMESTAMP_OVERRIDE				_MMIO(0x44074)
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT		0
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK		0x3ff
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	12
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	(0xf << 12)
> +
>  #define _PIPE_FRMTMSTMP_A		0x70048
>  #define PIPE_FRMTMSTMP(pipe)		\
>  			_MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index db03d179fc85..9b71a9b6d80e 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -329,6 +329,100 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>  	sseu->has_eu_pg = 0;
>  }
>  
> +static u64 read_timestamp_frequency_from_divide(struct drm_i915_private *dev_priv)
> +{
> +	u32 ts_override = I915_READ(GEN8_TIMESTAMP_OVERRIDE);
> +	u64 base_freq, frac_freq;
> +
> +	base_freq = ((ts_override & GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK) >>
> +		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT) + 1;
> +	base_freq *= 1000000;
> +
> +	frac_freq = ((ts_override &
> +		      GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
> +		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
> +	if (frac_freq != 0)
> +		frac_freq = 1000000 / (frac_freq + 1);
> +
> +	return base_freq + frac_freq;
> +}
> +
> +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv)
> +{
> +	if (INTEL_GEN(dev_priv) <= 4) {
> +		/* PRMs say:
> +		 *
> +		 *     "The value in this register increments once every 16
> +		 *      hclks." ("CLKCFG" register)
> +		 *
> +		 * Since dev_priv->rawclk_freq stores the value in kHz divided
> +		 * by 4, we just need to divide it again by 4.
> +		 */
> +		return (dev_priv->rawclk_freq * 1000) / 4;
> +	} else if (INTEL_GEN(dev_priv) <= 7) {
> +		/* PRMs say:
> +		 *
> +		 *     "The PCU TSC counts 10ns increments; this timestamp
> +		 *      reflects bits 38:3 of the TSC (i.e. 80ns granularity,
> +		 *      rolling over every 1.5 hours).
> +		 */
> +		return 12500000;
> +	} else if (INTEL_GEN(dev_priv) <= 9) {
> +		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
> +		u64 freq = 0;
> +
> +		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC)
> +			freq = read_timestamp_frequency_from_divide(dev_priv);
> +		else
> +			freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
> +
> +		/* Now figure out how the command stream's timestamp register
> +		 * increments from this frequency (it might increment only
> +		 * every few clock cycle).
> +		 */
> +		freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
> +			      GEN8_CTC_SHIFT_PARAMETER_SHIFT);
> +
> +		return freq;
> +	} else if (INTEL_GEN(dev_priv) <= 10) {
> +		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
> +		u64 freq = 0;
> +		u32 rpm_config_reg = 0;
> +
> +		/* First figure out the reference frequency. There are 2 ways
> +		 * we can compute the frequency, either through the
> +		 * TIMESTAMP_OVERRIDE register or through CTC_MODE &
> +		 * RPM_CONFIG & CTC_MODE registers. CTC_MODE tells us which
> +		 * one we should use.
> +		 */
> +		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC) {
> +			freq = read_timestamp_frequency_from_divide(dev_priv);
> +		} else {
> +			u32 crystal_clock;
> +
> +			rpm_config_reg = I915_READ(RPM_CONFIG0);
> +			crystal_clock = (rpm_config_reg &
> +					 GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
> +				GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
> +			freq = crystal_clock == GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ ?
> +				19200000 : 24000000;
> +		}
> +
> +		/* Now figure out how the command stream's timestamp register
> +		 * increments from this frequency (it might increment only
> +		 * every few clock cycle).
> +		 */
> +		freq >>= 3 - ((rpm_config_reg &
> +			       GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
> +			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
> +
> +		return freq;
> +	}
> +
> +	DRM_ERROR("Unknown gen, unable to compute command stream timestamp frequency\n");
> +	return 0;
> +}
> +
>  /*
>   * Determine various intel_device_info fields at runtime.
>   *
> @@ -450,6 +544,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>  	else if (INTEL_GEN(dev_priv) >= 10)
>  		gen10_sseu_info_init(dev_priv);
>  
> +	/* Initialize command stream timestamp frequency */
> +	info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv);
> +
>  	DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
>  	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
>  	DRM_DEBUG_DRIVER("subslice total: %u\n",
> @@ -465,4 +562,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>  			 info->sseu.has_subslice_pg ? "y" : "n");
>  	DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>  			 info->sseu.has_eu_pg ? "y" : "n");
> +	DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
> +			 info->cs_timestamp_frequency);
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 125bde7d9504..c3ff0d4947af 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -450,6 +450,12 @@ typedef struct drm_i915_irq_wait {
>   */
>  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>  
> +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
> + * registers. This used to be fixed per platform but from CNL onwards, this
> + * might vary depending on the parts.
> + */
> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY   50
> +
>  typedef struct drm_i915_getparam {
>  	__s32 param;
>  	/*
> -- 
> 2.15.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Musial, Ewelina Nov. 7, 2017, 11:30 a.m. UTC | #2
On Thu, Nov 02, 2017 at 04:29:46PM +0000, Lionel Landwerlin wrote:
> We use to have this fixed per generation, but starting with CNL userspace
> cannot tell just off the PCI ID. Let's make this information available. This
> is particularly useful for performance monitoring where much of the
> normalization work is done using those timestamps (this include pipeline
> statistics in both GL & Vulkan as well as OA reports).
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c      |  2 +
>  drivers/gpu/drm/i915/i915_drv.c          |  3 +
>  drivers/gpu/drm/i915/i915_drv.h          |  2 +
>  drivers/gpu/drm/i915/i915_reg.h          | 21 +++++++
>  drivers/gpu/drm/i915/intel_device_info.c | 99 ++++++++++++++++++++++++++++++++
>  include/uapi/drm/i915_drm.h              |  6 ++
>  6 files changed, 133 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 39883cd915db..0897fd616a1f 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3246,6 +3246,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>  		   yesno(dev_priv->gt.awake));
>  	seq_printf(m, "Global active requests: %d\n",
>  		   dev_priv->gt.active_requests);
> +	seq_printf(m, "CS timestamp frequency: %llu\n",
> +		   dev_priv->info.cs_timestamp_frequency);
>  
>  	p = drm_seq_file_printer(m);
>  	for_each_engine(engine, dev_priv, id)
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index e7e9e061073b..fdd23e79fb46 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -416,6 +416,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  		if (!value)
>  			return -ENODEV;
>  		break;
> +	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
> +		value = INTEL_INFO(dev_priv)->cs_timestamp_frequency;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6cb7cd7f9420..4e804aaeaae1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -886,6 +886,8 @@ struct intel_device_info {
>  	/* Slice/subslice/EU info */
>  	struct sseu_dev_info sseu;
>  
> +	uint64_t cs_timestamp_frequency;
> +
>  	struct color_luts {
>  		u16 degamma_lut_size;
>  		u16 gamma_lut_size;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index a2223f01ee2a..f392f28f2cfa 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1119,9 +1119,24 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>  
>  /* RPM unit config (Gen8+) */
>  #define RPM_CONFIG0	    _MMIO(0x0D00)
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	0
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	1
> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
> +
>  #define RPM_CONFIG1	    _MMIO(0x0D04)
>  #define  GEN10_GT_NOA_ENABLE  (1 << 9)
>  
> +/* GPM unit config (assuming Gen8+, documentation is fuzzy...) */
> +#define GEN8_CTC_MODE			_MMIO(0xA26C)
> +#define  GEN8_CTC_SOURCE_PARAMETER_MASK 1
> +#define  GEN8_CTC_SOURCE_CRYSTAL_CLOCK	0
> +#define  GEN8_CTC_SOURCE_DIVIDE_LOGIC	1
> +#define  GEN8_CTC_SHIFT_PARAMETER_SHIFT	1
> +#define  GEN8_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN8_CTC_SHIFT_PARAMETER_SHIFT)
> +
>  /* RPC unit config (Gen8+) */
>  #define RPC_CONFIG	    _MMIO(0x0D08)
>  
> @@ -8865,6 +8880,12 @@ enum skl_power_gate {
>  #define ILK_TIMESTAMP_HI	_MMIO(0x70070)
>  #define IVB_TIMESTAMP_CTR	_MMIO(0x44070)
>  
> +#define GEN8_TIMESTAMP_OVERRIDE				_MMIO(0x44074)
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT		0
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK		0x3ff
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	12
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	(0xf << 12)
> +
>  #define _PIPE_FRMTMSTMP_A		0x70048
>  #define PIPE_FRMTMSTMP(pipe)		\
>  			_MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index db03d179fc85..9b71a9b6d80e 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -329,6 +329,100 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>  	sseu->has_eu_pg = 0;
>  }
>  
> +static u64 read_timestamp_frequency_from_divide(struct drm_i915_private *dev_priv)
> +{
> +	u32 ts_override = I915_READ(GEN8_TIMESTAMP_OVERRIDE);
> +	u64 base_freq, frac_freq;
> +
> +	base_freq = ((ts_override & GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK) >>
> +		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT) + 1;
> +	base_freq *= 1000000;
> +
> +	frac_freq = ((ts_override &
> +		      GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
> +		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
> +	if (frac_freq != 0)
> +		frac_freq = 1000000 / (frac_freq + 1);
> +
> +	return base_freq + frac_freq;
> +}
> +
> +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv)
> +{
> +	if (INTEL_GEN(dev_priv) <= 4) {
> +		/* PRMs say:
> +		 *
> +		 *     "The value in this register increments once every 16
> +		 *      hclks." ("CLKCFG" register)
> +		 *
> +		 * Since dev_priv->rawclk_freq stores the value in kHz divided
> +		 * by 4, we just need to divide it again by 4.
> +		 */
> +		return (dev_priv->rawclk_freq * 1000) / 4;
> +	} else if (INTEL_GEN(dev_priv) <= 7) {
> +		/* PRMs say:
> +		 *
> +		 *     "The PCU TSC counts 10ns increments; this timestamp
> +		 *      reflects bits 38:3 of the TSC (i.e. 80ns granularity,
> +		 *      rolling over every 1.5 hours).
> +		 */
> +		return 12500000;
> +	} else if (INTEL_GEN(dev_priv) <= 9) {
> +		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
> +		u64 freq = 0;
> +
> +		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC)
> +			freq = read_timestamp_frequency_from_divide(dev_priv);
> +		else
> +			freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
What means those values? It looks like some 'magic numbers' here.
Some comment or define could be helpful.
> +
> +		/* Now figure out how the command stream's timestamp register
> +		 * increments from this frequency (it might increment only
> +		 * every few clock cycle).
> +		 */
> +		freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
> +			      GEN8_CTC_SHIFT_PARAMETER_SHIFT);
> +
> +		return freq;
> +	} else if (INTEL_GEN(dev_priv) <= 10) {
> +		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
> +		u64 freq = 0;
> +		u32 rpm_config_reg = 0;
> +
> +		/* First figure out the reference frequency. There are 2 ways
> +		 * we can compute the frequency, either through the
> +		 * TIMESTAMP_OVERRIDE register or through CTC_MODE &
> +		 * RPM_CONFIG & CTC_MODE registers. CTC_MODE tells us which
> +		 * one we should use.
> +		 */
> +		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC) {
> +			freq = read_timestamp_frequency_from_divide(dev_priv);
> +		} else {
> +			u32 crystal_clock;
> +
> +			rpm_config_reg = I915_READ(RPM_CONFIG0);
> +			crystal_clock = (rpm_config_reg &
> +					 GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
> +				GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
> +			freq = crystal_clock == GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ ?
> +				19200000 : 24000000;
The same here.
- Ewelina
> +		}
> +
> +		/* Now figure out how the command stream's timestamp register
> +		 * increments from this frequency (it might increment only
> +		 * every few clock cycle).
> +		 */
> +		freq >>= 3 - ((rpm_config_reg &
> +			       GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
> +			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
> +
> +		return freq;
> +	}
> +
> +	DRM_ERROR("Unknown gen, unable to compute command stream timestamp frequency\n");
> +	return 0;
> +}
> +
>  /*
>   * Determine various intel_device_info fields at runtime.
>   *
> @@ -450,6 +544,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>  	else if (INTEL_GEN(dev_priv) >= 10)
>  		gen10_sseu_info_init(dev_priv);
>  
> +	/* Initialize command stream timestamp frequency */
> +	info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv);
> +
>  	DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
>  	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
>  	DRM_DEBUG_DRIVER("subslice total: %u\n",
> @@ -465,4 +562,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>  			 info->sseu.has_subslice_pg ? "y" : "n");
>  	DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>  			 info->sseu.has_eu_pg ? "y" : "n");
> +	DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
> +			 info->cs_timestamp_frequency);
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 125bde7d9504..c3ff0d4947af 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -450,6 +450,12 @@ typedef struct drm_i915_irq_wait {
>   */
>  #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>  
> +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
> + * registers. This used to be fixed per platform but from CNL onwards, this
> + * might vary depending on the parts.
> + */
> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY   50
> +
>  typedef struct drm_i915_getparam {
>  	__s32 param;
>  	/*
> -- 
> 2.15.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lionel Landwerlin Nov. 7, 2017, 12:07 p.m. UTC | #3
On 07/11/17 11:30, Ewelina Musial wrote:
> On Thu, Nov 02, 2017 at 04:29:46PM +0000, Lionel Landwerlin wrote:
>> We use to have this fixed per generation, but starting with CNL userspace
>> cannot tell just off the PCI ID. Let's make this information available. This
>> is particularly useful for performance monitoring where much of the
>> normalization work is done using those timestamps (this include pipeline
>> statistics in both GL & Vulkan as well as OA reports).
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c      |  2 +
>>   drivers/gpu/drm/i915/i915_drv.c          |  3 +
>>   drivers/gpu/drm/i915/i915_drv.h          |  2 +
>>   drivers/gpu/drm/i915/i915_reg.h          | 21 +++++++
>>   drivers/gpu/drm/i915/intel_device_info.c | 99 ++++++++++++++++++++++++++++++++
>>   include/uapi/drm/i915_drm.h              |  6 ++
>>   6 files changed, 133 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 39883cd915db..0897fd616a1f 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -3246,6 +3246,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>>   		   yesno(dev_priv->gt.awake));
>>   	seq_printf(m, "Global active requests: %d\n",
>>   		   dev_priv->gt.active_requests);
>> +	seq_printf(m, "CS timestamp frequency: %llu\n",
>> +		   dev_priv->info.cs_timestamp_frequency);
>>   
>>   	p = drm_seq_file_printer(m);
>>   	for_each_engine(engine, dev_priv, id)
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
>> index e7e9e061073b..fdd23e79fb46 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -416,6 +416,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>>   		if (!value)
>>   			return -ENODEV;
>>   		break;
>> +	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
>> +		value = INTEL_INFO(dev_priv)->cs_timestamp_frequency;
>> +		break;
>>   	default:
>>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>>   		return -EINVAL;
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 6cb7cd7f9420..4e804aaeaae1 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -886,6 +886,8 @@ struct intel_device_info {
>>   	/* Slice/subslice/EU info */
>>   	struct sseu_dev_info sseu;
>>   
>> +	uint64_t cs_timestamp_frequency;
>> +
>>   	struct color_luts {
>>   		u16 degamma_lut_size;
>>   		u16 gamma_lut_size;
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index a2223f01ee2a..f392f28f2cfa 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -1119,9 +1119,24 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>>   
>>   /* RPM unit config (Gen8+) */
>>   #define RPM_CONFIG0	    _MMIO(0x0D00)
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	0
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	1
>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
>> +
>>   #define RPM_CONFIG1	    _MMIO(0x0D04)
>>   #define  GEN10_GT_NOA_ENABLE  (1 << 9)
>>   
>> +/* GPM unit config (assuming Gen8+, documentation is fuzzy...) */
>> +#define GEN8_CTC_MODE			_MMIO(0xA26C)
>> +#define  GEN8_CTC_SOURCE_PARAMETER_MASK 1
>> +#define  GEN8_CTC_SOURCE_CRYSTAL_CLOCK	0
>> +#define  GEN8_CTC_SOURCE_DIVIDE_LOGIC	1
>> +#define  GEN8_CTC_SHIFT_PARAMETER_SHIFT	1
>> +#define  GEN8_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN8_CTC_SHIFT_PARAMETER_SHIFT)
>> +
>>   /* RPC unit config (Gen8+) */
>>   #define RPC_CONFIG	    _MMIO(0x0D08)
>>   
>> @@ -8865,6 +8880,12 @@ enum skl_power_gate {
>>   #define ILK_TIMESTAMP_HI	_MMIO(0x70070)
>>   #define IVB_TIMESTAMP_CTR	_MMIO(0x44070)
>>   
>> +#define GEN8_TIMESTAMP_OVERRIDE				_MMIO(0x44074)
>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT		0
>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK		0x3ff
>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	12
>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	(0xf << 12)
>> +
>>   #define _PIPE_FRMTMSTMP_A		0x70048
>>   #define PIPE_FRMTMSTMP(pipe)		\
>>   			_MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
>> index db03d179fc85..9b71a9b6d80e 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -329,6 +329,100 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>>   	sseu->has_eu_pg = 0;
>>   }
>>   
>> +static u64 read_timestamp_frequency_from_divide(struct drm_i915_private *dev_priv)
>> +{
>> +	u32 ts_override = I915_READ(GEN8_TIMESTAMP_OVERRIDE);
>> +	u64 base_freq, frac_freq;
>> +
>> +	base_freq = ((ts_override & GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK) >>
>> +		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT) + 1;
>> +	base_freq *= 1000000;
>> +
>> +	frac_freq = ((ts_override &
>> +		      GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
>> +		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
>> +	if (frac_freq != 0)
>> +		frac_freq = 1000000 / (frac_freq + 1);
>> +
>> +	return base_freq + frac_freq;
>> +}
>> +
>> +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv)
>> +{
>> +	if (INTEL_GEN(dev_priv) <= 4) {
>> +		/* PRMs say:
>> +		 *
>> +		 *     "The value in this register increments once every 16
>> +		 *      hclks." ("CLKCFG" register)
>> +		 *
>> +		 * Since dev_priv->rawclk_freq stores the value in kHz divided
>> +		 * by 4, we just need to divide it again by 4.
>> +		 */
>> +		return (dev_priv->rawclk_freq * 1000) / 4;
>> +	} else if (INTEL_GEN(dev_priv) <= 7) {
>> +		/* PRMs say:
>> +		 *
>> +		 *     "The PCU TSC counts 10ns increments; this timestamp
>> +		 *      reflects bits 38:3 of the TSC (i.e. 80ns granularity,
>> +		 *      rolling over every 1.5 hours).
>> +		 */
>> +		return 12500000;
>> +	} else if (INTEL_GEN(dev_priv) <= 9) {
>> +		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>> +		u64 freq = 0;
>> +
>> +		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC)
>> +			freq = read_timestamp_frequency_from_divide(dev_priv);
>> +		else
>> +			freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
> What means those values? It looks like some 'magic numbers' here.
> Some comment or define could be helpful.

That's 19.2MHz or 24MHz.
Thanks, will add.

>> +
>> +		/* Now figure out how the command stream's timestamp register
>> +		 * increments from this frequency (it might increment only
>> +		 * every few clock cycle).
>> +		 */
>> +		freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
>> +			      GEN8_CTC_SHIFT_PARAMETER_SHIFT);
>> +
>> +		return freq;
>> +	} else if (INTEL_GEN(dev_priv) <= 10) {
>> +		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>> +		u64 freq = 0;
>> +		u32 rpm_config_reg = 0;
>> +
>> +		/* First figure out the reference frequency. There are 2 ways
>> +		 * we can compute the frequency, either through the
>> +		 * TIMESTAMP_OVERRIDE register or through CTC_MODE &
>> +		 * RPM_CONFIG & CTC_MODE registers. CTC_MODE tells us which
>> +		 * one we should use.
>> +		 */
>> +		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC) {
>> +			freq = read_timestamp_frequency_from_divide(dev_priv);
>> +		} else {
>> +			u32 crystal_clock;
>> +
>> +			rpm_config_reg = I915_READ(RPM_CONFIG0);
>> +			crystal_clock = (rpm_config_reg &
>> +					 GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
>> +				GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
>> +			freq = crystal_clock == GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ ?
>> +				19200000 : 24000000;
> The same here.
> - Ewelina
>> +		}
>> +
>> +		/* Now figure out how the command stream's timestamp register
>> +		 * increments from this frequency (it might increment only
>> +		 * every few clock cycle).
>> +		 */
>> +		freq >>= 3 - ((rpm_config_reg &
>> +			       GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
>> +			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
>> +
>> +		return freq;
>> +	}
>> +
>> +	DRM_ERROR("Unknown gen, unable to compute command stream timestamp frequency\n");
>> +	return 0;
>> +}
>> +
>>   /*
>>    * Determine various intel_device_info fields at runtime.
>>    *
>> @@ -450,6 +544,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>>   	else if (INTEL_GEN(dev_priv) >= 10)
>>   		gen10_sseu_info_init(dev_priv);
>>   
>> +	/* Initialize command stream timestamp frequency */
>> +	info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv);
>> +
>>   	DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
>>   	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
>>   	DRM_DEBUG_DRIVER("subslice total: %u\n",
>> @@ -465,4 +562,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>>   			 info->sseu.has_subslice_pg ? "y" : "n");
>>   	DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>>   			 info->sseu.has_eu_pg ? "y" : "n");
>> +	DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
>> +			 info->cs_timestamp_frequency);
>>   }
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 125bde7d9504..c3ff0d4947af 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -450,6 +450,12 @@ typedef struct drm_i915_irq_wait {
>>    */
>>   #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>>   
>> +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
>> + * registers. This used to be fixed per platform but from CNL onwards, this
>> + * might vary depending on the parts.
>> + */
>> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY   50
>> +
>>   typedef struct drm_i915_getparam {
>>   	__s32 param;
>>   	/*
>> -- 
>> 2.15.0
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lionel Landwerlin Nov. 8, 2017, 5:36 p.m. UTC | #4
Is there anyone with spare time to review this patch?
It's kind of required for userspace to make sense of timestamps on CNL.

Thanks a lot,

-
Lionel

On 02/11/17 16:29, Lionel Landwerlin wrote:
> We use to have this fixed per generation, but starting with CNL userspace
> cannot tell just off the PCI ID. Let's make this information available. This
> is particularly useful for performance monitoring where much of the
> normalization work is done using those timestamps (this include pipeline
> statistics in both GL & Vulkan as well as OA reports).
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c      |  2 +
>   drivers/gpu/drm/i915/i915_drv.c          |  3 +
>   drivers/gpu/drm/i915/i915_drv.h          |  2 +
>   drivers/gpu/drm/i915/i915_reg.h          | 21 +++++++
>   drivers/gpu/drm/i915/intel_device_info.c | 99 ++++++++++++++++++++++++++++++++
>   include/uapi/drm/i915_drm.h              |  6 ++
>   6 files changed, 133 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 39883cd915db..0897fd616a1f 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3246,6 +3246,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>   		   yesno(dev_priv->gt.awake));
>   	seq_printf(m, "Global active requests: %d\n",
>   		   dev_priv->gt.active_requests);
> +	seq_printf(m, "CS timestamp frequency: %llu\n",
> +		   dev_priv->info.cs_timestamp_frequency);
>   
>   	p = drm_seq_file_printer(m);
>   	for_each_engine(engine, dev_priv, id)
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index e7e9e061073b..fdd23e79fb46 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -416,6 +416,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   		if (!value)
>   			return -ENODEV;
>   		break;
> +	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
> +		value = INTEL_INFO(dev_priv)->cs_timestamp_frequency;
> +		break;
>   	default:
>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>   		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6cb7cd7f9420..4e804aaeaae1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -886,6 +886,8 @@ struct intel_device_info {
>   	/* Slice/subslice/EU info */
>   	struct sseu_dev_info sseu;
>   
> +	uint64_t cs_timestamp_frequency;
> +
>   	struct color_luts {
>   		u16 degamma_lut_size;
>   		u16 gamma_lut_size;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index a2223f01ee2a..f392f28f2cfa 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1119,9 +1119,24 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   
>   /* RPM unit config (Gen8+) */
>   #define RPM_CONFIG0	    _MMIO(0x0D00)
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	0
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	1
> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
> +
>   #define RPM_CONFIG1	    _MMIO(0x0D04)
>   #define  GEN10_GT_NOA_ENABLE  (1 << 9)
>   
> +/* GPM unit config (assuming Gen8+, documentation is fuzzy...) */
> +#define GEN8_CTC_MODE			_MMIO(0xA26C)
> +#define  GEN8_CTC_SOURCE_PARAMETER_MASK 1
> +#define  GEN8_CTC_SOURCE_CRYSTAL_CLOCK	0
> +#define  GEN8_CTC_SOURCE_DIVIDE_LOGIC	1
> +#define  GEN8_CTC_SHIFT_PARAMETER_SHIFT	1
> +#define  GEN8_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN8_CTC_SHIFT_PARAMETER_SHIFT)
> +
>   /* RPC unit config (Gen8+) */
>   #define RPC_CONFIG	    _MMIO(0x0D08)
>   
> @@ -8865,6 +8880,12 @@ enum skl_power_gate {
>   #define ILK_TIMESTAMP_HI	_MMIO(0x70070)
>   #define IVB_TIMESTAMP_CTR	_MMIO(0x44070)
>   
> +#define GEN8_TIMESTAMP_OVERRIDE				_MMIO(0x44074)
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT		0
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK		0x3ff
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	12
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	(0xf << 12)
> +
>   #define _PIPE_FRMTMSTMP_A		0x70048
>   #define PIPE_FRMTMSTMP(pipe)		\
>   			_MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index db03d179fc85..9b71a9b6d80e 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -329,6 +329,100 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>   	sseu->has_eu_pg = 0;
>   }
>   
> +static u64 read_timestamp_frequency_from_divide(struct drm_i915_private *dev_priv)
> +{
> +	u32 ts_override = I915_READ(GEN8_TIMESTAMP_OVERRIDE);
> +	u64 base_freq, frac_freq;
> +
> +	base_freq = ((ts_override & GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK) >>
> +		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT) + 1;
> +	base_freq *= 1000000;
> +
> +	frac_freq = ((ts_override &
> +		      GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
> +		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
> +	if (frac_freq != 0)
> +		frac_freq = 1000000 / (frac_freq + 1);
> +
> +	return base_freq + frac_freq;
> +}
> +
> +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv)
> +{
> +	if (INTEL_GEN(dev_priv) <= 4) {
> +		/* PRMs say:
> +		 *
> +		 *     "The value in this register increments once every 16
> +		 *      hclks." ("CLKCFG" register)
> +		 *
> +		 * Since dev_priv->rawclk_freq stores the value in kHz divided
> +		 * by 4, we just need to divide it again by 4.
> +		 */
> +		return (dev_priv->rawclk_freq * 1000) / 4;
> +	} else if (INTEL_GEN(dev_priv) <= 7) {
> +		/* PRMs say:
> +		 *
> +		 *     "The PCU TSC counts 10ns increments; this timestamp
> +		 *      reflects bits 38:3 of the TSC (i.e. 80ns granularity,
> +		 *      rolling over every 1.5 hours).
> +		 */
> +		return 12500000;
> +	} else if (INTEL_GEN(dev_priv) <= 9) {
> +		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
> +		u64 freq = 0;
> +
> +		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC)
> +			freq = read_timestamp_frequency_from_divide(dev_priv);
> +		else
> +			freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
> +
> +		/* Now figure out how the command stream's timestamp register
> +		 * increments from this frequency (it might increment only
> +		 * every few clock cycle).
> +		 */
> +		freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
> +			      GEN8_CTC_SHIFT_PARAMETER_SHIFT);
> +
> +		return freq;
> +	} else if (INTEL_GEN(dev_priv) <= 10) {
> +		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
> +		u64 freq = 0;
> +		u32 rpm_config_reg = 0;
> +
> +		/* First figure out the reference frequency. There are 2 ways
> +		 * we can compute the frequency, either through the
> +		 * TIMESTAMP_OVERRIDE register or through CTC_MODE &
> +		 * RPM_CONFIG & CTC_MODE registers. CTC_MODE tells us which
> +		 * one we should use.
> +		 */
> +		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC) {
> +			freq = read_timestamp_frequency_from_divide(dev_priv);
> +		} else {
> +			u32 crystal_clock;
> +
> +			rpm_config_reg = I915_READ(RPM_CONFIG0);
> +			crystal_clock = (rpm_config_reg &
> +					 GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
> +				GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
> +			freq = crystal_clock == GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ ?
> +				19200000 : 24000000;
> +		}
> +
> +		/* Now figure out how the command stream's timestamp register
> +		 * increments from this frequency (it might increment only
> +		 * every few clock cycle).
> +		 */
> +		freq >>= 3 - ((rpm_config_reg &
> +			       GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
> +			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
> +
> +		return freq;
> +	}
> +
> +	DRM_ERROR("Unknown gen, unable to compute command stream timestamp frequency\n");
> +	return 0;
> +}
> +
>   /*
>    * Determine various intel_device_info fields at runtime.
>    *
> @@ -450,6 +544,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   	else if (INTEL_GEN(dev_priv) >= 10)
>   		gen10_sseu_info_init(dev_priv);
>   
> +	/* Initialize command stream timestamp frequency */
> +	info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv);
> +
>   	DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
>   	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
>   	DRM_DEBUG_DRIVER("subslice total: %u\n",
> @@ -465,4 +562,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   			 info->sseu.has_subslice_pg ? "y" : "n");
>   	DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>   			 info->sseu.has_eu_pg ? "y" : "n");
> +	DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
> +			 info->cs_timestamp_frequency);
>   }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 125bde7d9504..c3ff0d4947af 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -450,6 +450,12 @@ typedef struct drm_i915_irq_wait {
>    */
>   #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>   
> +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
> + * registers. This used to be fixed per platform but from CNL onwards, this
> + * might vary depending on the parts.
> + */
> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY   50
> +
>   typedef struct drm_i915_getparam {
>   	__s32 param;
>   	/*
sagar.a.kamble@intel.com Nov. 9, 2017, 9:10 a.m. UTC | #5
On 11/8/2017 11:06 PM, Lionel Landwerlin wrote:
> Is there anyone with spare time to review this patch?
I'm on it.
> It's kind of required for userspace to make sense of timestamps on CNL.
>
> Thanks a lot,
>
> -
> Lionel
>
> On 02/11/17 16:29, Lionel Landwerlin wrote:
>> We use to have this fixed per generation, but starting with CNL 
>> userspace
>> cannot tell just off the PCI ID. Let's make this information 
>> available. This
>> is particularly useful for performance monitoring where much of the
>> normalization work is done using those timestamps (this include pipeline
>> statistics in both GL & Vulkan as well as OA reports).
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c      |  2 +
>>   drivers/gpu/drm/i915/i915_drv.c          |  3 +
>>   drivers/gpu/drm/i915/i915_drv.h          |  2 +
>>   drivers/gpu/drm/i915/i915_reg.h          | 21 +++++++
>>   drivers/gpu/drm/i915/intel_device_info.c | 99 
>> ++++++++++++++++++++++++++++++++
>>   include/uapi/drm/i915_drm.h              |  6 ++
>>   6 files changed, 133 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 39883cd915db..0897fd616a1f 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -3246,6 +3246,8 @@ static int i915_engine_info(struct seq_file *m, 
>> void *unused)
>>              yesno(dev_priv->gt.awake));
>>       seq_printf(m, "Global active requests: %d\n",
>>              dev_priv->gt.active_requests);
>> +    seq_printf(m, "CS timestamp frequency: %llu\n",
>> +           dev_priv->info.cs_timestamp_frequency);
>>         p = drm_seq_file_printer(m);
>>       for_each_engine(engine, dev_priv, id)
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index e7e9e061073b..fdd23e79fb46 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -416,6 +416,9 @@ static int i915_getparam(struct drm_device *dev, 
>> void *data,
>>           if (!value)
>>               return -ENODEV;
>>           break;
>> +    case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
>> +        value = INTEL_INFO(dev_priv)->cs_timestamp_frequency;
>> +        break;
>>       default:
>>           DRM_DEBUG("Unknown parameter %d\n", param->param);
>>           return -EINVAL;
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 6cb7cd7f9420..4e804aaeaae1 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -886,6 +886,8 @@ struct intel_device_info {
>>       /* Slice/subslice/EU info */
>>       struct sseu_dev_info sseu;
>>   +    uint64_t cs_timestamp_frequency;
>> +
>>       struct color_luts {
>>           u16 degamma_lut_size;
>>           u16 gamma_lut_size;
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h 
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index a2223f01ee2a..f392f28f2cfa 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -1119,9 +1119,24 @@ static inline bool 
>> i915_mmio_reg_valid(i915_reg_t reg)
>>     /* RPM unit config (Gen8+) */
>>   #define RPM_CONFIG0        _MMIO(0x0D00)
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT    3
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK    (1 << 
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ    0
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ    1
>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT    1
>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK    (0x3 << 
>> GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
>> +
>>   #define RPM_CONFIG1        _MMIO(0x0D04)
>>   #define  GEN10_GT_NOA_ENABLE  (1 << 9)
>>   +/* GPM unit config (assuming Gen8+, documentation is fuzzy...) */
>> +#define GEN8_CTC_MODE            _MMIO(0xA26C)
>> +#define  GEN8_CTC_SOURCE_PARAMETER_MASK 1
>> +#define  GEN8_CTC_SOURCE_CRYSTAL_CLOCK    0
>> +#define  GEN8_CTC_SOURCE_DIVIDE_LOGIC    1
>> +#define  GEN8_CTC_SHIFT_PARAMETER_SHIFT    1
>> +#define  GEN8_CTC_SHIFT_PARAMETER_MASK    (0x3 << 
>> GEN8_CTC_SHIFT_PARAMETER_SHIFT)
>> +
>>   /* RPC unit config (Gen8+) */
>>   #define RPC_CONFIG        _MMIO(0x0D08)
>>   @@ -8865,6 +8880,12 @@ enum skl_power_gate {
>>   #define ILK_TIMESTAMP_HI    _MMIO(0x70070)
>>   #define IVB_TIMESTAMP_CTR    _MMIO(0x44070)
>>   +#define GEN8_TIMESTAMP_OVERRIDE                _MMIO(0x44074)
>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT        0
>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK        0x3ff
>> +#define GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT    12
>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf << 
>> 12)
>> +
>>   #define _PIPE_FRMTMSTMP_A        0x70048
>>   #define PIPE_FRMTMSTMP(pipe)        \
>>               _MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
>> b/drivers/gpu/drm/i915/intel_device_info.c
>> index db03d179fc85..9b71a9b6d80e 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -329,6 +329,100 @@ static void broadwell_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>       sseu->has_eu_pg = 0;
>>   }
>>   +static u64 read_timestamp_frequency_from_divide(struct 
>> drm_i915_private *dev_priv)
>> +{
>> +    u32 ts_override = I915_READ(GEN8_TIMESTAMP_OVERRIDE);
>> +    u64 base_freq, frac_freq;
>> +
>> +    base_freq = ((ts_override & 
>> GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK) >>
>> +             GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT) + 1;
>> +    base_freq *= 1000000;
>> +
>> +    frac_freq = ((ts_override &
>> + GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
>> + GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
>> +    if (frac_freq != 0)
>> +        frac_freq = 1000000 / (frac_freq + 1);
>> +
>> +    return base_freq + frac_freq;
>> +}
>> +
>> +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv)
>> +{
>> +    if (INTEL_GEN(dev_priv) <= 4) {
>> +        /* PRMs say:
>> +         *
>> +         *     "The value in this register increments once every 16
>> +         *      hclks." ("CLKCFG" register)
>> +         *
>> +         * Since dev_priv->rawclk_freq stores the value in kHz divided
>> +         * by 4, we just need to divide it again by 4.
>> +         */
>> +        return (dev_priv->rawclk_freq * 1000) / 4;
>> +    } else if (INTEL_GEN(dev_priv) <= 7) {
>> +        /* PRMs say:
>> +         *
>> +         *     "The PCU TSC counts 10ns increments; this timestamp
>> +         *      reflects bits 38:3 of the TSC (i.e. 80ns granularity,
>> +         *      rolling over every 1.5 hours).
>> +         */
>> +        return 12500000;
>> +    } else if (INTEL_GEN(dev_priv) <= 9) {
>> +        u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>> +        u64 freq = 0;
>> +
>> +        if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == 
>> GEN8_CTC_SOURCE_DIVIDE_LOGIC)
>> +            freq = read_timestamp_frequency_from_divide(dev_priv);
>> +        else
>> +            freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
>> +
>> +        /* Now figure out how the command stream's timestamp register
>> +         * increments from this frequency (it might increment only
>> +         * every few clock cycle).
>> +         */
>> +        freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
>> +                  GEN8_CTC_SHIFT_PARAMETER_SHIFT);
>> +
>> +        return freq;
>> +    } else if (INTEL_GEN(dev_priv) <= 10) {
>> +        u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>> +        u64 freq = 0;
>> +        u32 rpm_config_reg = 0;
>> +
>> +        /* First figure out the reference frequency. There are 2 ways
>> +         * we can compute the frequency, either through the
>> +         * TIMESTAMP_OVERRIDE register or through CTC_MODE &
>> +         * RPM_CONFIG & CTC_MODE registers. CTC_MODE tells us which
>> +         * one we should use.
>> +         */
>> +        if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == 
>> GEN8_CTC_SOURCE_DIVIDE_LOGIC) {
>> +            freq = read_timestamp_frequency_from_divide(dev_priv);
>> +        } else {
>> +            u32 crystal_clock;
>> +
>> +            rpm_config_reg = I915_READ(RPM_CONFIG0);
>> +            crystal_clock = (rpm_config_reg &
>> +                     GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
>> +                GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
>> +            freq = crystal_clock == 
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ ?
>> +                19200000 : 24000000;
>> +        }
>> +
>> +        /* Now figure out how the command stream's timestamp register
>> +         * increments from this frequency (it might increment only
>> +         * every few clock cycle).
>> +         */
>> +        freq >>= 3 - ((rpm_config_reg &
>> +                   GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
>> +                  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
>> +
>> +        return freq;
>> +    }
>> +
>> +    DRM_ERROR("Unknown gen, unable to compute command stream 
>> timestamp frequency\n");
>> +    return 0;
>> +}
>> +
>>   /*
>>    * Determine various intel_device_info fields at runtime.
>>    *
>> @@ -450,6 +544,9 @@ void intel_device_info_runtime_init(struct 
>> drm_i915_private *dev_priv)
>>       else if (INTEL_GEN(dev_priv) >= 10)
>>           gen10_sseu_info_init(dev_priv);
>>   +    /* Initialize command stream timestamp frequency */
>> +    info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv);
>> +
>>       DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
>>       DRM_DEBUG_DRIVER("slice total: %u\n", 
>> hweight8(info->sseu.slice_mask));
>>       DRM_DEBUG_DRIVER("subslice total: %u\n",
>> @@ -465,4 +562,6 @@ void intel_device_info_runtime_init(struct 
>> drm_i915_private *dev_priv)
>>                info->sseu.has_subslice_pg ? "y" : "n");
>>       DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>>                info->sseu.has_eu_pg ? "y" : "n");
>> +    DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
>> +             info->cs_timestamp_frequency);
>>   }
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 125bde7d9504..c3ff0d4947af 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -450,6 +450,12 @@ typedef struct drm_i915_irq_wait {
>>    */
>>   #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>>   +/* Frequency of the command streamer timestamps given by the 
>> *_TIMESTAMP
>> + * registers. This used to be fixed per platform but from CNL 
>> onwards, this
>> + * might vary depending on the parts.
>> + */
>> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY   50
>> +
>>   typedef struct drm_i915_getparam {
>>       __s32 param;
>>       /*
>
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
sagar.a.kamble@intel.com Nov. 9, 2017, 11:58 a.m. UTC | #6
On 11/2/2017 9:59 PM, Lionel Landwerlin wrote:
> We use to have this fixed per generation, but starting with CNL userspace
> cannot tell just off the PCI ID. Let's make this information available. This
> is particularly useful for performance monitoring where much of the
> normalization work is done using those timestamps (this include pipeline
> statistics in both GL & Vulkan as well as OA reports).
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c      |  2 +
>   drivers/gpu/drm/i915/i915_drv.c          |  3 +
>   drivers/gpu/drm/i915/i915_drv.h          |  2 +
>   drivers/gpu/drm/i915/i915_reg.h          | 21 +++++++
>   drivers/gpu/drm/i915/intel_device_info.c | 99 ++++++++++++++++++++++++++++++++
>   include/uapi/drm/i915_drm.h              |  6 ++
>   6 files changed, 133 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 39883cd915db..0897fd616a1f 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3246,6 +3246,8 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>   		   yesno(dev_priv->gt.awake));
>   	seq_printf(m, "Global active requests: %d\n",
>   		   dev_priv->gt.active_requests);
> +	seq_printf(m, "CS timestamp frequency: %llu\n",
> +		   dev_priv->info.cs_timestamp_frequency);
should be accessed through INTEL_INFO
How about adding "Hz" to message
>   
>   	p = drm_seq_file_printer(m);
>   	for_each_engine(engine, dev_priv, id)
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index e7e9e061073b..fdd23e79fb46 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -416,6 +416,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   		if (!value)
>   			return -ENODEV;
>   		break;
> +	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
> +		value = INTEL_INFO(dev_priv)->cs_timestamp_frequency;
losing the precision here. can we make cs_timestamp_frequency u32?
> +		break;
>   	default:
>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>   		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 6cb7cd7f9420..4e804aaeaae1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -886,6 +886,8 @@ struct intel_device_info {
>   	/* Slice/subslice/EU info */
>   	struct sseu_dev_info sseu;
>   
> +	uint64_t cs_timestamp_frequency;
> +
s/uint64_t/u64 - (Chris had suggested earlier)
>   	struct color_luts {
>   		u16 degamma_lut_size;
>   		u16 gamma_lut_size;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index a2223f01ee2a..f392f28f2cfa 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1119,9 +1119,24 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   
>   /* RPM unit config (Gen8+) */
>   #define RPM_CONFIG0	    _MMIO(0x0D00)
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	0
> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	1
> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
> +
>   #define RPM_CONFIG1	    _MMIO(0x0D04)
>   #define  GEN10_GT_NOA_ENABLE  (1 << 9)
>   
> +/* GPM unit config (assuming Gen8+, documentation is fuzzy...) */
> +#define GEN8_CTC_MODE			_MMIO(0xA26C)
> +#define  GEN8_CTC_SOURCE_PARAMETER_MASK 1
> +#define  GEN8_CTC_SOURCE_CRYSTAL_CLOCK	0
> +#define  GEN8_CTC_SOURCE_DIVIDE_LOGIC	1
> +#define  GEN8_CTC_SHIFT_PARAMETER_SHIFT	1
> +#define  GEN8_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN8_CTC_SHIFT_PARAMETER_SHIFT)
> +
>   /* RPC unit config (Gen8+) */
>   #define RPC_CONFIG	    _MMIO(0x0D08)
>   
> @@ -8865,6 +8880,12 @@ enum skl_power_gate {
>   #define ILK_TIMESTAMP_HI	_MMIO(0x70070)
>   #define IVB_TIMESTAMP_CTR	_MMIO(0x44070)
>   
> +#define GEN8_TIMESTAMP_OVERRIDE				_MMIO(0x44074)
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT		0
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK		0x3ff
US_COUNTER_DIVIDER_MASK?
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	12
> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	(0xf << 12)
> +
>   #define _PIPE_FRMTMSTMP_A		0x70048
>   #define PIPE_FRMTMSTMP(pipe)		\
>   			_MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index db03d179fc85..9b71a9b6d80e 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -329,6 +329,100 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>   	sseu->has_eu_pg = 0;
>   }
>   
> +static u64 read_timestamp_frequency_from_divide(struct drm_i915_private *dev_priv)
Should this be named read_reference_ts_freq?
> +{
> +	u32 ts_override = I915_READ(GEN8_TIMESTAMP_OVERRIDE);
> +	u64 base_freq, frac_freq;
> +
> +	base_freq = ((ts_override & GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK) >>
> +		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT) + 1;
> +	base_freq *= 1000000;
> +
> +	frac_freq = ((ts_override &
> +		      GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
> +		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
> +	if (frac_freq != 0)
> +		frac_freq = 1000000 / (frac_freq + 1);
Not considering numerator?
> +
> +	return base_freq + frac_freq;
> +}
> +
> +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv)
> +{
> +	if (INTEL_GEN(dev_priv) <= 4) {
> +		/* PRMs say:
> +		 *
> +		 *     "The value in this register increments once every 16
> +		 *      hclks." ("CLKCFG" register)
> +		 *
> +		 * Since dev_priv->rawclk_freq stores the value in kHz divided
> +		 * by 4, we just need to divide it again by 4.
> +		 */
I read this as hclk is 1/4th fsb clock and timestamp is 1/16 of hclk so 
this should be 16.
> +		return (dev_priv->rawclk_freq * 1000) / 4;
> +	} else if (INTEL_GEN(dev_priv) <= 7) {
> +		/* PRMs say:
> +		 *
> +		 *     "The PCU TSC counts 10ns increments; this timestamp
> +		 *      reflects bits 38:3 of the TSC (i.e. 80ns granularity,
> +		 *      rolling over every 1.5 hours).
> +		 */
> +		return 12500000;
> +	} else if (INTEL_GEN(dev_priv) <= 9) {
> +		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
> +		u64 freq = 0;
> +
> +		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC)
> +			freq = read_timestamp_frequency_from_divide(dev_priv);
> +		else
> +			freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
> +
> +		/* Now figure out how the command stream's timestamp register
> +		 * increments from this frequency (it might increment only
> +		 * every few clock cycle).
> +		 */
> +		freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
> +			      GEN8_CTC_SHIFT_PARAMETER_SHIFT);
Gen8 documentation is indeed fuzzy. Are we getting 12.5mhz after this 
shift as doc says it to have 80ns base.
> +
> +		return freq;
> +	} else if (INTEL_GEN(dev_priv) <= 10) {
> +		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
> +		u64 freq = 0;
> +		u32 rpm_config_reg = 0;
> +
> +		/* First figure out the reference frequency. There are 2 ways
> +		 * we can compute the frequency, either through the
> +		 * TIMESTAMP_OVERRIDE register or through CTC_MODE &
Remove CTC_MODE as it does not itself determine the frequency.
> +		 * RPM_CONFIG & CTC_MODE registers. CTC_MODE tells us which
> +		 * one we should use.
> +		 */
> +		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC) {
> +			freq = read_timestamp_frequency_from_divide(dev_priv);
> +		} else {
> +			u32 crystal_clock;
> +
> +			rpm_config_reg = I915_READ(RPM_CONFIG0);
> +			crystal_clock = (rpm_config_reg &
> +					 GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
> +				GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
> +			freq = crystal_clock == GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ ?
> +				19200000 : 24000000;
switch case would be better i guess.
> +		}
> +
> +		/* Now figure out how the command stream's timestamp register
> +		 * increments from this frequency (it might increment only
> +		 * every few clock cycle).
> +		 */
> +		freq >>= 3 - ((rpm_config_reg &
> +			       GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
> +			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
> +
> +		return freq;
> +	}
> +
> +	DRM_ERROR("Unknown gen, unable to compute command stream timestamp frequency\n");
> +	return 0;
> +}
> +
>   /*
>    * Determine various intel_device_info fields at runtime.
>    *
> @@ -450,6 +544,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   	else if (INTEL_GEN(dev_priv) >= 10)
>   		gen10_sseu_info_init(dev_priv);
>   
> +	/* Initialize command stream timestamp frequency */
> +	info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv);
> +
>   	DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
>   	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
>   	DRM_DEBUG_DRIVER("subslice total: %u\n",
> @@ -465,4 +562,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   			 info->sseu.has_subslice_pg ? "y" : "n");
>   	DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>   			 info->sseu.has_eu_pg ? "y" : "n");
> +	DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
> +			 info->cs_timestamp_frequency);
>   }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 125bde7d9504..c3ff0d4947af 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -450,6 +450,12 @@ typedef struct drm_i915_irq_wait {
>    */
>   #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>   
> +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
> + * registers. This used to be fixed per platform but from CNL onwards, this
> + * might vary depending on the parts.
> + */
> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY   50
> +
>   typedef struct drm_i915_getparam {
>   	__s32 param;
>   	/*
Lionel Landwerlin Nov. 9, 2017, 2:06 p.m. UTC | #7
On 09/11/17 11:58, Sagar Arun Kamble wrote:
>
>
> On 11/2/2017 9:59 PM, Lionel Landwerlin wrote:
>> We use to have this fixed per generation, but starting with CNL 
>> userspace
>> cannot tell just off the PCI ID. Let's make this information 
>> available. This
>> is particularly useful for performance monitoring where much of the
>> normalization work is done using those timestamps (this include pipeline
>> statistics in both GL & Vulkan as well as OA reports).
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c      |  2 +
>>   drivers/gpu/drm/i915/i915_drv.c          |  3 +
>>   drivers/gpu/drm/i915/i915_drv.h          |  2 +
>>   drivers/gpu/drm/i915/i915_reg.h          | 21 +++++++
>>   drivers/gpu/drm/i915/intel_device_info.c | 99 
>> ++++++++++++++++++++++++++++++++
>>   include/uapi/drm/i915_drm.h              |  6 ++
>>   6 files changed, 133 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 39883cd915db..0897fd616a1f 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -3246,6 +3246,8 @@ static int i915_engine_info(struct seq_file *m, 
>> void *unused)
>>              yesno(dev_priv->gt.awake));
>>       seq_printf(m, "Global active requests: %d\n",
>>              dev_priv->gt.active_requests);
>> +    seq_printf(m, "CS timestamp frequency: %llu\n",
>> +           dev_priv->info.cs_timestamp_frequency);
> should be accessed through INTEL_INFO
> How about adding "Hz" to message

Done.

>>         p = drm_seq_file_printer(m);
>>       for_each_engine(engine, dev_priv, id)
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index e7e9e061073b..fdd23e79fb46 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -416,6 +416,9 @@ static int i915_getparam(struct drm_device *dev, 
>> void *data,
>>           if (!value)
>>               return -ENODEV;
>>           break;
>> +    case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
>> +        value = INTEL_INFO(dev_priv)->cs_timestamp_frequency;
> losing the precision here. can we make cs_timestamp_frequency u32?

Yeah, I'm not super happy about the int* of getparam.
MAX_INT limits us up to ~2GHz, which I don't think we'll ever reach.
Do you agree? Do you think we need to handle bigger values?


>> +        break;
>>       default:
>>           DRM_DEBUG("Unknown parameter %d\n", param->param);
>>           return -EINVAL;
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 6cb7cd7f9420..4e804aaeaae1 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -886,6 +886,8 @@ struct intel_device_info {
>>       /* Slice/subslice/EU info */
>>       struct sseu_dev_info sseu;
>>   +    uint64_t cs_timestamp_frequency;
>> +
> s/uint64_t/u64 - (Chris had suggested earlier)

Done.

>>       struct color_luts {
>>           u16 degamma_lut_size;
>>           u16 gamma_lut_size;
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h 
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index a2223f01ee2a..f392f28f2cfa 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -1119,9 +1119,24 @@ static inline bool 
>> i915_mmio_reg_valid(i915_reg_t reg)
>>     /* RPM unit config (Gen8+) */
>>   #define RPM_CONFIG0        _MMIO(0x0D00)
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT    3
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK    (1 << 
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ    0
>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ    1
>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT    1
>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK    (0x3 << 
>> GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
>> +
>>   #define RPM_CONFIG1        _MMIO(0x0D04)
>>   #define  GEN10_GT_NOA_ENABLE  (1 << 9)
>>   +/* GPM unit config (assuming Gen8+, documentation is fuzzy...) */
>> +#define GEN8_CTC_MODE            _MMIO(0xA26C)
>> +#define  GEN8_CTC_SOURCE_PARAMETER_MASK 1
>> +#define  GEN8_CTC_SOURCE_CRYSTAL_CLOCK    0
>> +#define  GEN8_CTC_SOURCE_DIVIDE_LOGIC    1
>> +#define  GEN8_CTC_SHIFT_PARAMETER_SHIFT    1
>> +#define  GEN8_CTC_SHIFT_PARAMETER_MASK    (0x3 << 
>> GEN8_CTC_SHIFT_PARAMETER_SHIFT)
>> +
>>   /* RPC unit config (Gen8+) */
>>   #define RPC_CONFIG        _MMIO(0x0D08)
>>   @@ -8865,6 +8880,12 @@ enum skl_power_gate {
>>   #define ILK_TIMESTAMP_HI    _MMIO(0x70070)
>>   #define IVB_TIMESTAMP_CTR    _MMIO(0x44070)
>>   +#define GEN8_TIMESTAMP_OVERRIDE                _MMIO(0x44074)
>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT        0
>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK        0x3ff
> US_COUNTER_DIVIDER_MASK?

Sure, I thought it was just a bit too long :)

>> +#define GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT    12
>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf << 
>> 12)
>> +
>>   #define _PIPE_FRMTMSTMP_A        0x70048
>>   #define PIPE_FRMTMSTMP(pipe)        \
>>               _MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
>> b/drivers/gpu/drm/i915/intel_device_info.c
>> index db03d179fc85..9b71a9b6d80e 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -329,6 +329,100 @@ static void broadwell_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>       sseu->has_eu_pg = 0;
>>   }
>>   +static u64 read_timestamp_frequency_from_divide(struct 
>> drm_i915_private *dev_priv)
> Should this be named read_reference_ts_freq?

Yes, thanks!

>> +{
>> +    u32 ts_override = I915_READ(GEN8_TIMESTAMP_OVERRIDE);
>> +    u64 base_freq, frac_freq;
>> +
>> +    base_freq = ((ts_override & 
>> GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK) >>
>> +             GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT) + 1;
>> +    base_freq *= 1000000;
>> +
>> +    frac_freq = ((ts_override &
>> + GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
>> + GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
>> +    if (frac_freq != 0)
>> +        frac_freq = 1000000 / (frac_freq + 1);
> Not considering numerator?

The documentation is quite terrible, but my reading is that the 
numerator doesn't apply to any current generations.

>> +
>> +    return base_freq + frac_freq;
>> +}
>> +
>> +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv)
>> +{
>> +    if (INTEL_GEN(dev_priv) <= 4) {
>> +        /* PRMs say:
>> +         *
>> +         *     "The value in this register increments once every 16
>> +         *      hclks." ("CLKCFG" register)
>> +         *
>> +         * Since dev_priv->rawclk_freq stores the value in kHz divided
>> +         * by 4, we just need to divide it again by 4.
>> +         */
> I read this as hclk is 1/4th fsb clock and timestamp is 1/16 of hclk 
> so this should be 16.

You're right, but as the comment above explains, rawclk_freq is already 
hclk / 4.
Another / 4 gives us / 16.

>> +        return (dev_priv->rawclk_freq * 1000) / 4;
>> +    } else if (INTEL_GEN(dev_priv) <= 7) {
>> +        /* PRMs say:
>> +         *
>> +         *     "The PCU TSC counts 10ns increments; this timestamp
>> +         *      reflects bits 38:3 of the TSC (i.e. 80ns granularity,
>> +         *      rolling over every 1.5 hours).
>> +         */
>> +        return 12500000;
>> +    } else if (INTEL_GEN(dev_priv) <= 9) {
>> +        u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>> +        u64 freq = 0;
>> +
>> +        if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == 
>> GEN8_CTC_SOURCE_DIVIDE_LOGIC)
>> +            freq = read_timestamp_frequency_from_divide(dev_priv);
>> +        else
>> +            freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
>> +
>> +        /* Now figure out how the command stream's timestamp register
>> +         * increments from this frequency (it might increment only
>> +         * every few clock cycle).
>> +         */
>> +        freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
>> +                  GEN8_CTC_SHIFT_PARAMETER_SHIFT);
> Gen8 documentation is indeed fuzzy. Are we getting 12.5mhz after this 
> shift as doc says it to have 80ns base.
>> +
>> +        return freq;
>> +    } else if (INTEL_GEN(dev_priv) <= 10) {
>> +        u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>> +        u64 freq = 0;
>> +        u32 rpm_config_reg = 0;
>> +
>> +        /* First figure out the reference frequency. There are 2 ways
>> +         * we can compute the frequency, either through the
>> +         * TIMESTAMP_OVERRIDE register or through CTC_MODE &
> Remove CTC_MODE as it does not itself determine the frequency.

Done, thanks.

>> +         * RPM_CONFIG & CTC_MODE registers. CTC_MODE tells us which
>> +         * one we should use.
>> +         */
>> +        if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == 
>> GEN8_CTC_SOURCE_DIVIDE_LOGIC) {
>> +            freq = read_timestamp_frequency_from_divide(dev_priv);
>> +        } else {
>> +            u32 crystal_clock;
>> +
>> +            rpm_config_reg = I915_READ(RPM_CONFIG0);
>> +            crystal_clock = (rpm_config_reg &
>> +                     GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
>> +                GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
>> +            freq = crystal_clock == 
>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ ?
>> +                19200000 : 24000000;
> switch case would be better i guess.

Done.

>> +        }
>> +
>> +        /* Now figure out how the command stream's timestamp register
>> +         * increments from this frequency (it might increment only
>> +         * every few clock cycle).
>> +         */
>> +        freq >>= 3 - ((rpm_config_reg &
>> +                   GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
>> +                  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
>> +
>> +        return freq;
>> +    }
>> +
>> +    DRM_ERROR("Unknown gen, unable to compute command stream 
>> timestamp frequency\n");
>> +    return 0;
>> +}
>> +
>>   /*
>>    * Determine various intel_device_info fields at runtime.
>>    *
>> @@ -450,6 +544,9 @@ void intel_device_info_runtime_init(struct 
>> drm_i915_private *dev_priv)
>>       else if (INTEL_GEN(dev_priv) >= 10)
>>           gen10_sseu_info_init(dev_priv);
>>   +    /* Initialize command stream timestamp frequency */
>> +    info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv);
>> +
>>       DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
>>       DRM_DEBUG_DRIVER("slice total: %u\n", 
>> hweight8(info->sseu.slice_mask));
>>       DRM_DEBUG_DRIVER("subslice total: %u\n",
>> @@ -465,4 +562,6 @@ void intel_device_info_runtime_init(struct 
>> drm_i915_private *dev_priv)
>>                info->sseu.has_subslice_pg ? "y" : "n");
>>       DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>>                info->sseu.has_eu_pg ? "y" : "n");
>> +    DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
>> +             info->cs_timestamp_frequency);
>>   }
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 125bde7d9504..c3ff0d4947af 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -450,6 +450,12 @@ typedef struct drm_i915_irq_wait {
>>    */
>>   #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>>   +/* Frequency of the command streamer timestamps given by the 
>> *_TIMESTAMP
>> + * registers. This used to be fixed per platform but from CNL 
>> onwards, this
>> + * might vary depending on the parts.
>> + */
>> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY   50
>> +
>>   typedef struct drm_i915_getparam {
>>       __s32 param;
>>       /*
>
>
Lionel Landwerlin Nov. 9, 2017, 2:13 p.m. UTC | #8
On 09/11/17 14:06, Lionel Landwerlin wrote:
>>
>> +    } else if (INTEL_GEN(dev_priv) <= 9) {
>> +        u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>> +        u64 freq = 0;
>> +
>> +        if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == 
>> GEN8_CTC_SOURCE_DIVIDE_LOGIC)
>> +            freq = read_timestamp_frequency_from_divide(dev_priv);
>> +        else
>> +            freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
>> +
>> +        /* Now figure out how the command stream's timestamp register
>> +         * increments from this frequency (it might increment only
>> +         * every few clock cycle).
>> +         */
>> +        freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
>> +                  GEN8_CTC_SHIFT_PARAMETER_SHIFT);
> Gen8 documentation is indeed fuzzy. Are we getting 12.5mhz after this 
> shift as doc says it to have 80ns base. 
Forgot to answer that point. Let me check this on BDW again.
But yes, the idea is that we should get 12.5MHz on BDW.
sagar.a.kamble@intel.com Nov. 9, 2017, 4:37 p.m. UTC | #9
On 11/9/2017 7:36 PM, Lionel Landwerlin wrote:
> On 09/11/17 11:58, Sagar Arun Kamble wrote:
>>
>>
>> On 11/2/2017 9:59 PM, Lionel Landwerlin wrote:
>>> We use to have this fixed per generation, but starting with CNL 
>>> userspace
>>> cannot tell just off the PCI ID. Let's make this information 
>>> available. This
>>> is particularly useful for performance monitoring where much of the
>>> normalization work is done using those timestamps (this include 
>>> pipeline
>>> statistics in both GL & Vulkan as well as OA reports).
>>>
>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_debugfs.c      |  2 +
>>>   drivers/gpu/drm/i915/i915_drv.c          |  3 +
>>>   drivers/gpu/drm/i915/i915_drv.h          |  2 +
>>>   drivers/gpu/drm/i915/i915_reg.h          | 21 +++++++
>>>   drivers/gpu/drm/i915/intel_device_info.c | 99 
>>> ++++++++++++++++++++++++++++++++
>>>   include/uapi/drm/i915_drm.h              |  6 ++
>>>   6 files changed, 133 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 39883cd915db..0897fd616a1f 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -3246,6 +3246,8 @@ static int i915_engine_info(struct seq_file 
>>> *m, void *unused)
>>>              yesno(dev_priv->gt.awake));
>>>       seq_printf(m, "Global active requests: %d\n",
>>>              dev_priv->gt.active_requests);
>>> +    seq_printf(m, "CS timestamp frequency: %llu\n",
>>> +           dev_priv->info.cs_timestamp_frequency);
>> should be accessed through INTEL_INFO
>> How about adding "Hz" to message
>
> Done.
>
>>>         p = drm_seq_file_printer(m);
>>>       for_each_engine(engine, dev_priv, id)
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>>> b/drivers/gpu/drm/i915/i915_drv.c
>>> index e7e9e061073b..fdd23e79fb46 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.c
>>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>>> @@ -416,6 +416,9 @@ static int i915_getparam(struct drm_device *dev, 
>>> void *data,
>>>           if (!value)
>>>               return -ENODEV;
>>>           break;
>>> +    case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
>>> +        value = INTEL_INFO(dev_priv)->cs_timestamp_frequency;
>> losing the precision here. can we make cs_timestamp_frequency u32?
>
> Yeah, I'm not super happy about the int* of getparam.
> MAX_INT limits us up to ~2GHz, which I don't think we'll ever reach.
> Do you agree? Do you think we need to handle bigger values?
>
Yes. Agree on making this int.
>
>>> +        break;
>>>       default:
>>>           DRM_DEBUG("Unknown parameter %d\n", param->param);
>>>           return -EINVAL;
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 6cb7cd7f9420..4e804aaeaae1 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -886,6 +886,8 @@ struct intel_device_info {
>>>       /* Slice/subslice/EU info */
>>>       struct sseu_dev_info sseu;
>>>   +    uint64_t cs_timestamp_frequency;
>>> +
>> s/uint64_t/u64 - (Chris had suggested earlier)
>
> Done.
>
>>>       struct color_luts {
>>>           u16 degamma_lut_size;
>>>           u16 gamma_lut_size;
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h 
>>> b/drivers/gpu/drm/i915/i915_reg.h
>>> index a2223f01ee2a..f392f28f2cfa 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -1119,9 +1119,24 @@ static inline bool 
>>> i915_mmio_reg_valid(i915_reg_t reg)
>>>     /* RPM unit config (Gen8+) */
>>>   #define RPM_CONFIG0        _MMIO(0x0D00)
>>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT    3
>>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK    (1 << 
>>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
>>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ    0
>>> +#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ    1
>>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT    1
>>> +#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK    (0x3 << 
>>> GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
>>> +
>>>   #define RPM_CONFIG1        _MMIO(0x0D04)
>>>   #define  GEN10_GT_NOA_ENABLE  (1 << 9)
>>>   +/* GPM unit config (assuming Gen8+, documentation is fuzzy...) */
>>> +#define GEN8_CTC_MODE            _MMIO(0xA26C)
>>> +#define  GEN8_CTC_SOURCE_PARAMETER_MASK 1
>>> +#define  GEN8_CTC_SOURCE_CRYSTAL_CLOCK    0
>>> +#define  GEN8_CTC_SOURCE_DIVIDE_LOGIC    1
>>> +#define  GEN8_CTC_SHIFT_PARAMETER_SHIFT    1
>>> +#define  GEN8_CTC_SHIFT_PARAMETER_MASK    (0x3 << 
>>> GEN8_CTC_SHIFT_PARAMETER_SHIFT)
>>> +
>>>   /* RPC unit config (Gen8+) */
>>>   #define RPC_CONFIG        _MMIO(0x0D08)
>>>   @@ -8865,6 +8880,12 @@ enum skl_power_gate {
>>>   #define ILK_TIMESTAMP_HI    _MMIO(0x70070)
>>>   #define IVB_TIMESTAMP_CTR    _MMIO(0x44070)
>>>   +#define GEN8_TIMESTAMP_OVERRIDE _MMIO(0x44074)
>>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT        0
>>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK        0x3ff
>> US_COUNTER_DIVIDER_MASK?
>
> Sure, I thought it was just a bit too long :)
>
>>> +#define GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT    12
>>> +#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK (0xf 
>>> << 12)
>>> +
>>>   #define _PIPE_FRMTMSTMP_A        0x70048
>>>   #define PIPE_FRMTMSTMP(pipe)        \
>>>               _MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
>>> b/drivers/gpu/drm/i915/intel_device_info.c
>>> index db03d179fc85..9b71a9b6d80e 100644
>>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>>> @@ -329,6 +329,100 @@ static void broadwell_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>       sseu->has_eu_pg = 0;
>>>   }
>>>   +static u64 read_timestamp_frequency_from_divide(struct 
>>> drm_i915_private *dev_priv)
>> Should this be named read_reference_ts_freq?
>
> Yes, thanks!
>
>>> +{
>>> +    u32 ts_override = I915_READ(GEN8_TIMESTAMP_OVERRIDE);
>>> +    u64 base_freq, frac_freq;
>>> +
>>> +    base_freq = ((ts_override & 
>>> GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK) >>
>>> +             GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT) + 1;
>>> +    base_freq *= 1000000;
>>> +
>>> +    frac_freq = ((ts_override &
>>> + GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
>>> + GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
>>> +    if (frac_freq != 0)
>>> +        frac_freq = 1000000 / (frac_freq + 1);
>> Not considering numerator?
>
> The documentation is quite terrible, but my reading is that the 
> numerator doesn't apply to any current generations.
>
Understood now. I think we should consider whether override is set 
before considering denominator too.
>>> +
>>> +    return base_freq + frac_freq;
>>> +}
>>> +
>>> +static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv)
>>> +{
>>> +    if (INTEL_GEN(dev_priv) <= 4) {
>>> +        /* PRMs say:
>>> +         *
>>> +         *     "The value in this register increments once every 16
>>> +         *      hclks." ("CLKCFG" register)
>>> +         *
>>> +         * Since dev_priv->rawclk_freq stores the value in kHz divided
>>> +         * by 4, we just need to divide it again by 4.
>>> +         */
>> I read this as hclk is 1/4th fsb clock and timestamp is 1/16 of hclk 
>> so this should be 16.
>
> You're right, but as the comment above explains, rawclk_freq is 
> already hclk / 4.
> Another / 4 gives us / 16.
1. hclk=1/4* fsb_clk
2. ts_clk=1/16*hclk
=> ts_clk=1/64*fsb_clk
So this should be "(dev_priv->rawclk_freq * 1000) / 16" right?
>
>>> +        return (dev_priv->rawclk_freq * 1000) / 4;
>>> +    } else if (INTEL_GEN(dev_priv) <= 7) {
>>> +        /* PRMs say:
>>> +         *
>>> +         *     "The PCU TSC counts 10ns increments; this timestamp
>>> +         *      reflects bits 38:3 of the TSC (i.e. 80ns granularity,
>>> +         *      rolling over every 1.5 hours).
>>> +         */
>>> +        return 12500000;
>>> +    } else if (INTEL_GEN(dev_priv) <= 9) {
>>> +        u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>>> +        u64 freq = 0;
>>> +
>>> +        if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == 
>>> GEN8_CTC_SOURCE_DIVIDE_LOGIC)
>>> +            freq = read_timestamp_frequency_from_divide(dev_priv);
>>> +        else
>>> +            freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
>>> +
>>> +        /* Now figure out how the command stream's timestamp register
>>> +         * increments from this frequency (it might increment only
>>> +         * every few clock cycle).
>>> +         */
>>> +        freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
>>> +                  GEN8_CTC_SHIFT_PARAMETER_SHIFT);
>> Gen8 documentation is indeed fuzzy. Are we getting 12.5mhz after this 
>> shift as doc says it to have 80ns base.
>>> +
>>> +        return freq;
>>> +    } else if (INTEL_GEN(dev_priv) <= 10) {
>>> +        u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>>> +        u64 freq = 0;
>>> +        u32 rpm_config_reg = 0;
>>> +
>>> +        /* First figure out the reference frequency. There are 2 ways
>>> +         * we can compute the frequency, either through the
>>> +         * TIMESTAMP_OVERRIDE register or through CTC_MODE &
>> Remove CTC_MODE as it does not itself determine the frequency.
>
> Done, thanks.
>
>>> +         * RPM_CONFIG & CTC_MODE registers. CTC_MODE tells us which
>>> +         * one we should use.
>>> +         */
>>> +        if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == 
>>> GEN8_CTC_SOURCE_DIVIDE_LOGIC) {
>>> +            freq = read_timestamp_frequency_from_divide(dev_priv);
>>> +        } else {
>>> +            u32 crystal_clock;
>>> +
>>> +            rpm_config_reg = I915_READ(RPM_CONFIG0);
>>> +            crystal_clock = (rpm_config_reg &
>>> + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
>>> +                GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
>>> +            freq = crystal_clock == 
>>> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ ?
>>> +                19200000 : 24000000;
>> switch case would be better i guess.
>
> Done.
>
>>> +        }
>>> +
>>> +        /* Now figure out how the command stream's timestamp register
>>> +         * increments from this frequency (it might increment only
>>> +         * every few clock cycle).
>>> +         */
>>> +        freq >>= 3 - ((rpm_config_reg &
>>> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
>>> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
>>> +
>>> +        return freq;
>>> +    }
>>> +
>>> +    DRM_ERROR("Unknown gen, unable to compute command stream 
>>> timestamp frequency\n");
>>> +    return 0;
>>> +}
>>> +
>>>   /*
>>>    * Determine various intel_device_info fields at runtime.
>>>    *
>>> @@ -450,6 +544,9 @@ void intel_device_info_runtime_init(struct 
>>> drm_i915_private *dev_priv)
>>>       else if (INTEL_GEN(dev_priv) >= 10)
>>>           gen10_sseu_info_init(dev_priv);
>>>   +    /* Initialize command stream timestamp frequency */
>>> +    info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv);
>>> +
>>>       DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
>>>       DRM_DEBUG_DRIVER("slice total: %u\n", 
>>> hweight8(info->sseu.slice_mask));
>>>       DRM_DEBUG_DRIVER("subslice total: %u\n",
>>> @@ -465,4 +562,6 @@ void intel_device_info_runtime_init(struct 
>>> drm_i915_private *dev_priv)
>>>                info->sseu.has_subslice_pg ? "y" : "n");
>>>       DRM_DEBUG_DRIVER("has EU power gating: %s\n",
>>>                info->sseu.has_eu_pg ? "y" : "n");
>>> +    DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
>>> +             info->cs_timestamp_frequency);
>>>   }
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 125bde7d9504..c3ff0d4947af 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -450,6 +450,12 @@ typedef struct drm_i915_irq_wait {
>>>    */
>>>   #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
>>>   +/* Frequency of the command streamer timestamps given by the 
>>> *_TIMESTAMP
>>> + * registers. This used to be fixed per platform but from CNL 
>>> onwards, this
>>> + * might vary depending on the parts.
>>> + */
>>> +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY   50
>>> +
>>>   typedef struct drm_i915_getparam {
>>>       __s32 param;
>>>       /*
>>
>>
>
Thanks
Sagar
Lionel Landwerlin Nov. 9, 2017, 5:44 p.m. UTC | #10
On 09/11/17 14:13, Lionel Landwerlin wrote:
> On 09/11/17 14:06, Lionel Landwerlin wrote:
>>>
>>> +    } else if (INTEL_GEN(dev_priv) <= 9) {
>>> +        u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
>>> +        u64 freq = 0;
>>> +
>>> +        if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == 
>>> GEN8_CTC_SOURCE_DIVIDE_LOGIC)
>>> +            freq = read_timestamp_frequency_from_divide(dev_priv);
>>> +        else
>>> +            freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
>>> +
>>> +        /* Now figure out how the command stream's timestamp register
>>> +         * increments from this frequency (it might increment only
>>> +         * every few clock cycle).
>>> +         */
>>> +        freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
>>> +                  GEN8_CTC_SHIFT_PARAMETER_SHIFT);
>> Gen8 documentation is indeed fuzzy. Are we getting 12.5mhz after this 
>> shift as doc says it to have 80ns base. 
> Forgot to answer that point. Let me check this on BDW again.
> But yes, the idea is that we should get 12.5MHz on BDW.

Okay, looks like that's wrong on my BDW system....
So this bit of right shift should probably only be applied to the else 
case (i.e. gen9)

-
Lionel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 39883cd915db..0897fd616a1f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3246,6 +3246,8 @@  static int i915_engine_info(struct seq_file *m, void *unused)
 		   yesno(dev_priv->gt.awake));
 	seq_printf(m, "Global active requests: %d\n",
 		   dev_priv->gt.active_requests);
+	seq_printf(m, "CS timestamp frequency: %llu\n",
+		   dev_priv->info.cs_timestamp_frequency);
 
 	p = drm_seq_file_printer(m);
 	for_each_engine(engine, dev_priv, id)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e7e9e061073b..fdd23e79fb46 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -416,6 +416,9 @@  static int i915_getparam(struct drm_device *dev, void *data,
 		if (!value)
 			return -ENODEV;
 		break;
+	case I915_PARAM_CS_TIMESTAMP_FREQUENCY:
+		value = INTEL_INFO(dev_priv)->cs_timestamp_frequency;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6cb7cd7f9420..4e804aaeaae1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -886,6 +886,8 @@  struct intel_device_info {
 	/* Slice/subslice/EU info */
 	struct sseu_dev_info sseu;
 
+	uint64_t cs_timestamp_frequency;
+
 	struct color_luts {
 		u16 degamma_lut_size;
 		u16 gamma_lut_size;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index a2223f01ee2a..f392f28f2cfa 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1119,9 +1119,24 @@  static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0	    _MMIO(0x0D00)
+#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
+#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
+#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	0
+#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	1
+#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
+#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
+
 #define RPM_CONFIG1	    _MMIO(0x0D04)
 #define  GEN10_GT_NOA_ENABLE  (1 << 9)
 
+/* GPM unit config (assuming Gen8+, documentation is fuzzy...) */
+#define GEN8_CTC_MODE			_MMIO(0xA26C)
+#define  GEN8_CTC_SOURCE_PARAMETER_MASK 1
+#define  GEN8_CTC_SOURCE_CRYSTAL_CLOCK	0
+#define  GEN8_CTC_SOURCE_DIVIDE_LOGIC	1
+#define  GEN8_CTC_SHIFT_PARAMETER_SHIFT	1
+#define  GEN8_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN8_CTC_SHIFT_PARAMETER_SHIFT)
+
 /* RPC unit config (Gen8+) */
 #define RPC_CONFIG	    _MMIO(0x0D08)
 
@@ -8865,6 +8880,12 @@  enum skl_power_gate {
 #define ILK_TIMESTAMP_HI	_MMIO(0x70070)
 #define IVB_TIMESTAMP_CTR	_MMIO(0x44070)
 
+#define GEN8_TIMESTAMP_OVERRIDE				_MMIO(0x44074)
+#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT		0
+#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK		0x3ff
+#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT	12
+#define  GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK	(0xf << 12)
+
 #define _PIPE_FRMTMSTMP_A		0x70048
 #define PIPE_FRMTMSTMP(pipe)		\
 			_MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index db03d179fc85..9b71a9b6d80e 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -329,6 +329,100 @@  static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 	sseu->has_eu_pg = 0;
 }
 
+static u64 read_timestamp_frequency_from_divide(struct drm_i915_private *dev_priv)
+{
+	u32 ts_override = I915_READ(GEN8_TIMESTAMP_OVERRIDE);
+	u64 base_freq, frac_freq;
+
+	base_freq = ((ts_override & GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_MASK) >>
+		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_SHIFT) + 1;
+	base_freq *= 1000000;
+
+	frac_freq = ((ts_override &
+		      GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
+		     GEN8_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
+	if (frac_freq != 0)
+		frac_freq = 1000000 / (frac_freq + 1);
+
+	return base_freq + frac_freq;
+}
+
+static u64 read_timestamp_frequency(struct drm_i915_private *dev_priv)
+{
+	if (INTEL_GEN(dev_priv) <= 4) {
+		/* PRMs say:
+		 *
+		 *     "The value in this register increments once every 16
+		 *      hclks." ("CLKCFG" register)
+		 *
+		 * Since dev_priv->rawclk_freq stores the value in kHz divided
+		 * by 4, we just need to divide it again by 4.
+		 */
+		return (dev_priv->rawclk_freq * 1000) / 4;
+	} else if (INTEL_GEN(dev_priv) <= 7) {
+		/* PRMs say:
+		 *
+		 *     "The PCU TSC counts 10ns increments; this timestamp
+		 *      reflects bits 38:3 of the TSC (i.e. 80ns granularity,
+		 *      rolling over every 1.5 hours).
+		 */
+		return 12500000;
+	} else if (INTEL_GEN(dev_priv) <= 9) {
+		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
+		u64 freq = 0;
+
+		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC)
+			freq = read_timestamp_frequency_from_divide(dev_priv);
+		else
+			freq = IS_GEN9_LP(dev_priv) ? 19200000 : 24000000;
+
+		/* Now figure out how the command stream's timestamp register
+		 * increments from this frequency (it might increment only
+		 * every few clock cycle).
+		 */
+		freq >>= 3 - ((ctc_reg & GEN8_CTC_SHIFT_PARAMETER_MASK) >>
+			      GEN8_CTC_SHIFT_PARAMETER_SHIFT);
+
+		return freq;
+	} else if (INTEL_GEN(dev_priv) <= 10) {
+		u32 ctc_reg = I915_READ(GEN8_CTC_MODE);
+		u64 freq = 0;
+		u32 rpm_config_reg = 0;
+
+		/* First figure out the reference frequency. There are 2 ways
+		 * we can compute the frequency, either through the
+		 * TIMESTAMP_OVERRIDE register or through CTC_MODE &
+		 * RPM_CONFIG & CTC_MODE registers. CTC_MODE tells us which
+		 * one we should use.
+		 */
+		if ((ctc_reg & GEN8_CTC_SOURCE_PARAMETER_MASK) == GEN8_CTC_SOURCE_DIVIDE_LOGIC) {
+			freq = read_timestamp_frequency_from_divide(dev_priv);
+		} else {
+			u32 crystal_clock;
+
+			rpm_config_reg = I915_READ(RPM_CONFIG0);
+			crystal_clock = (rpm_config_reg &
+					 GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
+				GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+			freq = crystal_clock == GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ ?
+				19200000 : 24000000;
+		}
+
+		/* Now figure out how the command stream's timestamp register
+		 * increments from this frequency (it might increment only
+		 * every few clock cycle).
+		 */
+		freq >>= 3 - ((rpm_config_reg &
+			       GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
+
+		return freq;
+	}
+
+	DRM_ERROR("Unknown gen, unable to compute command stream timestamp frequency\n");
+	return 0;
+}
+
 /*
  * Determine various intel_device_info fields at runtime.
  *
@@ -450,6 +544,9 @@  void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 	else if (INTEL_GEN(dev_priv) >= 10)
 		gen10_sseu_info_init(dev_priv);
 
+	/* Initialize command stream timestamp frequency */
+	info->cs_timestamp_frequency = read_timestamp_frequency(dev_priv);
+
 	DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
 	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
 	DRM_DEBUG_DRIVER("subslice total: %u\n",
@@ -465,4 +562,6 @@  void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 			 info->sseu.has_subslice_pg ? "y" : "n");
 	DRM_DEBUG_DRIVER("has EU power gating: %s\n",
 			 info->sseu.has_eu_pg ? "y" : "n");
+	DRM_DEBUG_DRIVER("CS timestamp frequency: %llu\n",
+			 info->cs_timestamp_frequency);
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 125bde7d9504..c3ff0d4947af 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -450,6 +450,12 @@  typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
 
+/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
+ * registers. This used to be fixed per platform but from CNL onwards, this
+ * might vary depending on the parts.
+ */
+#define I915_PARAM_CS_TIMESTAMP_FREQUENCY   50
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*