diff mbox

[RFC,1/4] drm/i915/perf: Add support to correlate GPU timestamp with system time

Message ID 1510748034-14034-2-git-send-email-sagar.a.kamble@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sagar.a.kamble@intel.com Nov. 15, 2017, 12:13 p.m. UTC
We can compute system time corresponding to GPU timestamp by taking a
reference point and then adding delta time computed using timecounter
and cyclecounter support in kernel. We have to configure cyclecounter
with the GPU timestamp frequency.
In further patches we can leverage timecounter_cyc2time function to
compute the system time corresponding to GPU timestamp cycles derived
from OA report. Important thing to note is timecounter_cyc2time considers
time backwards if delta timestamp is more than half the max ns time
covered by counter. (It will be ~35min for 36 bit counter. If this much
sampling duration is needed we will have to update tc->nsec by explicitly
reading the timecounter after duration less than 35min during sampling)

On enabling perf stream we start the timecounter/cyclecounter and while
collecting OA samples we translate GPU timestamp to System timestamp.

Earlier approach that was based on cross-timestamp is not needed. It
was being used to approximate the frequency based on invalid assumptions
(possibly drift was being seen in the time due to precision issue).
The precision of time from GPU clocks is already in ns and timecounter
takes care of it as verified over variable durations.
Cross-timestamp might be valuable to get the precise reference point
w.r.t device and system time but it needs a support for reading ART
counter from i915 which I find not available for i915. Hence, we are
compensating the offset to setup the reference point ourselves.
With this approach we have very fine precision of device and system time
only differing by 5-10us.

Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sourab Gupta <sourab.gupta@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  9 +++++++
 drivers/gpu/drm/i915/i915_perf.c | 53 ++++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h  |  6 +++++
 3 files changed, 68 insertions(+)

Comments

Chris Wilson Nov. 15, 2017, 12:25 p.m. UTC | #1
Quoting Sagar Arun Kamble (2017-11-15 12:13:51)
>  #include <drm/drmP.h>
>  #include <drm/intel-gtt.h>
> @@ -2149,6 +2150,14 @@ struct i915_perf_stream {
>          * @oa_config: The OA configuration used by the stream.
>          */
>         struct i915_oa_config *oa_config;
> +
> +       /**
> +        * System time correlation variables.
> +        */
> +       struct cyclecounter cc;
> +       spinlock_t systime_lock;
> +       struct timespec64 start_systime;
> +       struct timecounter tc;

This pattern is repeated a lot by struct timecounter users. (I'm still
trying to understand why the common case is not catered for by a
convenience timecounter api.)

>  };
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 00be015..72ddc34 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -192,6 +192,7 @@
>   */
>  
>  #include <linux/anon_inodes.h>
> +#include <linux/clocksource.h>
>  #include <linux/sizes.h>
>  #include <linux/uuid.h>
>  
> @@ -2391,6 +2392,56 @@ static unsigned int i915_perf_poll(struct file *file, poll_table *wait)
>  }
>  
>  /**
> + * i915_cyclecounter_read - read raw cycle/timestamp counter
> + * @cc: cyclecounter structure
> + */
> +static u64 i915_cyclecounter_read(const struct cyclecounter *cc)
> +{
> +       struct i915_perf_stream *stream = container_of(cc, typeof(*stream), cc);
> +       struct drm_i915_private *dev_priv = stream->dev_priv;
> +       u64 ts_count;
> +
> +       intel_runtime_pm_get(dev_priv);
> +       ts_count = I915_READ64_2x32(GEN4_TIMESTAMP,
> +                                   GEN7_TIMESTAMP_UDW);
> +       intel_runtime_pm_put(dev_priv);
> +
> +       return ts_count;
> +}
> +
> +static void i915_perf_init_cyclecounter(struct i915_perf_stream *stream)
> +{
> +       struct drm_i915_private *dev_priv = stream->dev_priv;
> +       int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency;
> +       struct cyclecounter *cc = &stream->cc;
> +       u32 maxsec;
> +
> +       cc->read = i915_cyclecounter_read;
> +       cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv));
> +       maxsec = cc->mask / cs_ts_freq;
> +
> +       clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq,
> +                              NSEC_PER_SEC, maxsec);
> +}
> +
> +static void i915_perf_init_timecounter(struct i915_perf_stream *stream)
> +{
> +#define SYSTIME_START_OFFSET   350000 /* Counter read takes about 350us */
> +       unsigned long flags;
> +       u64 ns;
> +
> +       i915_perf_init_cyclecounter(stream);
> +       spin_lock_init(&stream->systime_lock);
> +
> +       getnstimeofday64(&stream->start_systime);
> +       ns = timespec64_to_ns(&stream->start_systime) + SYSTIME_START_OFFSET;

Use ktime directly. Or else Arnd will be back with a patch to fix it.
(All non-ktime interfaces are effectively deprecated; obsolete for
drivers.)

> +       spin_lock_irqsave(&stream->systime_lock, flags);
> +       timecounter_init(&stream->tc, &stream->cc, ns);
> +       spin_unlock_irqrestore(&stream->systime_lock, flags);
> +}
> +
> +/**
>   * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
>   * @stream: A disabled i915 perf stream
>   *
> @@ -2408,6 +2459,8 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream)
>         /* Allow stream->ops->enable() to refer to this */
>         stream->enabled = true;
>  
> +       i915_perf_init_timecounter(stream);
> +
>         if (stream->ops->enable)
>                 stream->ops->enable(stream);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index cfdf4f8..e7e6966 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -8882,6 +8882,12 @@ enum skl_power_gate {
>  
>  /* Gen4+ Timestamp and Pipe Frame time stamp registers */
>  #define GEN4_TIMESTAMP         _MMIO(0x2358)
> +#define GEN7_TIMESTAMP_UDW     _MMIO(0x235C)
> +#define PRE_GEN7_TIMESTAMP_WIDTH       32
> +#define GEN7_TIMESTAMP_WIDTH           36
> +#define CS_TIMESTAMP_WIDTH(dev_priv) \
> +       (INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \
> +                                  GEN7_TIMESTAMP_WIDTH)

s/PRE_GEN7/GEN4/ would be consistent.
If you really want to add support for earlier, I9XX_.

Ok. I can accept the justification, and we are not the only ones who do
the cyclecounter -> timecounter correction like this.
-Chris
sagar.a.kamble@intel.com Nov. 15, 2017, 4:41 p.m. UTC | #2
On 11/15/2017 5:55 PM, Chris Wilson wrote:
> Quoting Sagar Arun Kamble (2017-11-15 12:13:51)
>>   #include <drm/drmP.h>
>>   #include <drm/intel-gtt.h>
>> @@ -2149,6 +2150,14 @@ struct i915_perf_stream {
>>           * @oa_config: The OA configuration used by the stream.
>>           */
>>          struct i915_oa_config *oa_config;
>> +
>> +       /**
>> +        * System time correlation variables.
>> +        */
>> +       struct cyclecounter cc;
>> +       spinlock_t systime_lock;
>> +       struct timespec64 start_systime;
>> +       struct timecounter tc;
> This pattern is repeated a lot by struct timecounter users. (I'm still
> trying to understand why the common case is not catered for by a
> convenience timecounter api.)
Yes. Looks good case to change the cyclecounter* member in timecounter 
to cyclecounter.
And then we can avoid separate cyclecounter.
>
>>   };
>>   
>>   /**
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index 00be015..72ddc34 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -192,6 +192,7 @@
>>    */
>>   
>>   #include <linux/anon_inodes.h>
>> +#include <linux/clocksource.h>
>>   #include <linux/sizes.h>
>>   #include <linux/uuid.h>
>>   
>> @@ -2391,6 +2392,56 @@ static unsigned int i915_perf_poll(struct file *file, poll_table *wait)
>>   }
>>   
>>   /**
>> + * i915_cyclecounter_read - read raw cycle/timestamp counter
>> + * @cc: cyclecounter structure
>> + */
>> +static u64 i915_cyclecounter_read(const struct cyclecounter *cc)
>> +{
>> +       struct i915_perf_stream *stream = container_of(cc, typeof(*stream), cc);
>> +       struct drm_i915_private *dev_priv = stream->dev_priv;
>> +       u64 ts_count;
>> +
>> +       intel_runtime_pm_get(dev_priv);
>> +       ts_count = I915_READ64_2x32(GEN4_TIMESTAMP,
>> +                                   GEN7_TIMESTAMP_UDW);
>> +       intel_runtime_pm_put(dev_priv);
>> +
>> +       return ts_count;
>> +}
>> +
>> +static void i915_perf_init_cyclecounter(struct i915_perf_stream *stream)
>> +{
>> +       struct drm_i915_private *dev_priv = stream->dev_priv;
>> +       int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency;
>> +       struct cyclecounter *cc = &stream->cc;
>> +       u32 maxsec;
>> +
>> +       cc->read = i915_cyclecounter_read;
>> +       cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv));
>> +       maxsec = cc->mask / cs_ts_freq;
>> +
>> +       clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq,
>> +                              NSEC_PER_SEC, maxsec);
>> +}
>> +
>> +static void i915_perf_init_timecounter(struct i915_perf_stream *stream)
>> +{
>> +#define SYSTIME_START_OFFSET   350000 /* Counter read takes about 350us */
>> +       unsigned long flags;
>> +       u64 ns;
>> +
>> +       i915_perf_init_cyclecounter(stream);
>> +       spin_lock_init(&stream->systime_lock);
>> +
>> +       getnstimeofday64(&stream->start_systime);
>> +       ns = timespec64_to_ns(&stream->start_systime) + SYSTIME_START_OFFSET;
> Use ktime directly. Or else Arnd will be back with a patch to fix it.
> (All non-ktime interfaces are effectively deprecated; obsolete for
> drivers.)
Ok. Will update.
>
>> +       spin_lock_irqsave(&stream->systime_lock, flags);
>> +       timecounter_init(&stream->tc, &stream->cc, ns);
>> +       spin_unlock_irqrestore(&stream->systime_lock, flags);
>> +}
>> +
>> +/**
>>    * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
>>    * @stream: A disabled i915 perf stream
>>    *
>> @@ -2408,6 +2459,8 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream)
>>          /* Allow stream->ops->enable() to refer to this */
>>          stream->enabled = true;
>>   
>> +       i915_perf_init_timecounter(stream);
>> +
>>          if (stream->ops->enable)
>>                  stream->ops->enable(stream);
>>   }
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index cfdf4f8..e7e6966 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -8882,6 +8882,12 @@ enum skl_power_gate {
>>   
>>   /* Gen4+ Timestamp and Pipe Frame time stamp registers */
>>   #define GEN4_TIMESTAMP         _MMIO(0x2358)
>> +#define GEN7_TIMESTAMP_UDW     _MMIO(0x235C)
>> +#define PRE_GEN7_TIMESTAMP_WIDTH       32
>> +#define GEN7_TIMESTAMP_WIDTH           36
>> +#define CS_TIMESTAMP_WIDTH(dev_priv) \
>> +       (INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \
>> +                                  GEN7_TIMESTAMP_WIDTH)
> s/PRE_GEN7/GEN4/ would be consistent.
> If you really want to add support for earlier, I9XX_.
Yes.
>
> Ok. I can accept the justification, and we are not the only ones who do
> the cyclecounter -> timecounter correction like this.
> -Chris
Lionel Landwerlin Dec. 5, 2017, 1:58 p.m. UTC | #3
On 15/11/17 12:25, Chris Wilson wrote:
> Quoting Sagar Arun Kamble (2017-11-15 12:13:51)
>>   #include <drm/drmP.h>
>>   #include <drm/intel-gtt.h>
>> @@ -2149,6 +2150,14 @@ struct i915_perf_stream {
>>           * @oa_config: The OA configuration used by the stream.
>>           */
>>          struct i915_oa_config *oa_config;
>> +
>> +       /**
>> +        * System time correlation variables.
>> +        */
>> +       struct cyclecounter cc;
>> +       spinlock_t systime_lock;
>> +       struct timespec64 start_systime;
>> +       struct timecounter tc;
> This pattern is repeated a lot by struct timecounter users. (I'm still
> trying to understand why the common case is not catered for by a
> convenience timecounter api.)
>
>>   };
>>   
>>   /**
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index 00be015..72ddc34 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -192,6 +192,7 @@
>>    */
>>   
>>   #include <linux/anon_inodes.h>
>> +#include <linux/clocksource.h>
>>   #include <linux/sizes.h>
>>   #include <linux/uuid.h>
>>   
>> @@ -2391,6 +2392,56 @@ static unsigned int i915_perf_poll(struct file *file, poll_table *wait)
>>   }
>>   
>>   /**
>> + * i915_cyclecounter_read - read raw cycle/timestamp counter
>> + * @cc: cyclecounter structure
>> + */
>> +static u64 i915_cyclecounter_read(const struct cyclecounter *cc)
>> +{
>> +       struct i915_perf_stream *stream = container_of(cc, typeof(*stream), cc);
>> +       struct drm_i915_private *dev_priv = stream->dev_priv;
>> +       u64 ts_count;
>> +
>> +       intel_runtime_pm_get(dev_priv);
>> +       ts_count = I915_READ64_2x32(GEN4_TIMESTAMP,
>> +                                   GEN7_TIMESTAMP_UDW);
>> +       intel_runtime_pm_put(dev_priv);
>> +
>> +       return ts_count;
>> +}
>> +
>> +static void i915_perf_init_cyclecounter(struct i915_perf_stream *stream)
>> +{
>> +       struct drm_i915_private *dev_priv = stream->dev_priv;
>> +       int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency;
>> +       struct cyclecounter *cc = &stream->cc;
>> +       u32 maxsec;
>> +
>> +       cc->read = i915_cyclecounter_read;
>> +       cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv));
>> +       maxsec = cc->mask / cs_ts_freq;
>> +
>> +       clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq,
>> +                              NSEC_PER_SEC, maxsec);
>> +}
>> +
>> +static void i915_perf_init_timecounter(struct i915_perf_stream *stream)
>> +{
>> +#define SYSTIME_START_OFFSET   350000 /* Counter read takes about 350us */
>> +       unsigned long flags;
>> +       u64 ns;
>> +
>> +       i915_perf_init_cyclecounter(stream);
>> +       spin_lock_init(&stream->systime_lock);
>> +
>> +       getnstimeofday64(&stream->start_systime);
>> +       ns = timespec64_to_ns(&stream->start_systime) + SYSTIME_START_OFFSET;
> Use ktime directly. Or else Arnd will be back with a patch to fix it.
> (All non-ktime interfaces are effectively deprecated; obsolete for
> drivers.)
>
>> +       spin_lock_irqsave(&stream->systime_lock, flags);
>> +       timecounter_init(&stream->tc, &stream->cc, ns);
>> +       spin_unlock_irqrestore(&stream->systime_lock, flags);
>> +}
>> +
>> +/**
>>    * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
>>    * @stream: A disabled i915 perf stream
>>    *
>> @@ -2408,6 +2459,8 @@ static void i915_perf_enable_locked(struct i915_perf_stream *stream)
>>          /* Allow stream->ops->enable() to refer to this */
>>          stream->enabled = true;
>>   
>> +       i915_perf_init_timecounter(stream);
>> +
>>          if (stream->ops->enable)
>>                  stream->ops->enable(stream);
>>   }
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index cfdf4f8..e7e6966 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -8882,6 +8882,12 @@ enum skl_power_gate {
>>   
>>   /* Gen4+ Timestamp and Pipe Frame time stamp registers */
>>   #define GEN4_TIMESTAMP         _MMIO(0x2358)
>> +#define GEN7_TIMESTAMP_UDW     _MMIO(0x235C)
>> +#define PRE_GEN7_TIMESTAMP_WIDTH       32
>> +#define GEN7_TIMESTAMP_WIDTH           36
>> +#define CS_TIMESTAMP_WIDTH(dev_priv) \
>> +       (INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \
>> +                                  GEN7_TIMESTAMP_WIDTH)
> s/PRE_GEN7/GEN4/ would be consistent.
> If you really want to add support for earlier, I9XX_.

Why not use the RING_TIMESTAMP() RING_TIMESTAMP_UDW() ?
There's already used for the reg_read ioctl.

>
> Ok. I can accept the justification, and we are not the only ones who do
> the cyclecounter -> timecounter correction like this.
> -Chris
>
sagar.a.kamble@intel.com Dec. 6, 2017, 8:17 a.m. UTC | #4
On 12/5/2017 7:28 PM, Lionel Landwerlin wrote:
> On 15/11/17 12:25, Chris Wilson wrote:
>> Quoting Sagar Arun Kamble (2017-11-15 12:13:51)
>>>   #include <drm/drmP.h>
>>>   #include <drm/intel-gtt.h>
>>> @@ -2149,6 +2150,14 @@ struct i915_perf_stream {
>>>           * @oa_config: The OA configuration used by the stream.
>>>           */
>>>          struct i915_oa_config *oa_config;
>>> +
>>> +       /**
>>> +        * System time correlation variables.
>>> +        */
>>> +       struct cyclecounter cc;
>>> +       spinlock_t systime_lock;
>>> +       struct timespec64 start_systime;
>>> +       struct timecounter tc;
>> This pattern is repeated a lot by struct timecounter users. (I'm still
>> trying to understand why the common case is not catered for by a
>> convenience timecounter api.)
>>
>>>   };
>>>     /**
>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>> b/drivers/gpu/drm/i915/i915_perf.c
>>> index 00be015..72ddc34 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>> @@ -192,6 +192,7 @@
>>>    */
>>>     #include <linux/anon_inodes.h>
>>> +#include <linux/clocksource.h>
>>>   #include <linux/sizes.h>
>>>   #include <linux/uuid.h>
>>>   @@ -2391,6 +2392,56 @@ static unsigned int i915_perf_poll(struct 
>>> file *file, poll_table *wait)
>>>   }
>>>     /**
>>> + * i915_cyclecounter_read - read raw cycle/timestamp counter
>>> + * @cc: cyclecounter structure
>>> + */
>>> +static u64 i915_cyclecounter_read(const struct cyclecounter *cc)
>>> +{
>>> +       struct i915_perf_stream *stream = container_of(cc, 
>>> typeof(*stream), cc);
>>> +       struct drm_i915_private *dev_priv = stream->dev_priv;
>>> +       u64 ts_count;
>>> +
>>> +       intel_runtime_pm_get(dev_priv);
>>> +       ts_count = I915_READ64_2x32(GEN4_TIMESTAMP,
>>> +                                   GEN7_TIMESTAMP_UDW);
>>> +       intel_runtime_pm_put(dev_priv);
>>> +
>>> +       return ts_count;
>>> +}
>>> +
>>> +static void i915_perf_init_cyclecounter(struct i915_perf_stream 
>>> *stream)
>>> +{
>>> +       struct drm_i915_private *dev_priv = stream->dev_priv;
>>> +       int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency;
>>> +       struct cyclecounter *cc = &stream->cc;
>>> +       u32 maxsec;
>>> +
>>> +       cc->read = i915_cyclecounter_read;
>>> +       cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv));
>>> +       maxsec = cc->mask / cs_ts_freq;
>>> +
>>> +       clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq,
>>> +                              NSEC_PER_SEC, maxsec);
>>> +}
>>> +
>>> +static void i915_perf_init_timecounter(struct i915_perf_stream 
>>> *stream)
>>> +{
>>> +#define SYSTIME_START_OFFSET   350000 /* Counter read takes about 
>>> 350us */
>>> +       unsigned long flags;
>>> +       u64 ns;
>>> +
>>> +       i915_perf_init_cyclecounter(stream);
>>> +       spin_lock_init(&stream->systime_lock);
>>> +
>>> +       getnstimeofday64(&stream->start_systime);
>>> +       ns = timespec64_to_ns(&stream->start_systime) + 
>>> SYSTIME_START_OFFSET;
>> Use ktime directly. Or else Arnd will be back with a patch to fix it.
>> (All non-ktime interfaces are effectively deprecated; obsolete for
>> drivers.)
>>
>>> + spin_lock_irqsave(&stream->systime_lock, flags);
>>> +       timecounter_init(&stream->tc, &stream->cc, ns);
>>> +       spin_unlock_irqrestore(&stream->systime_lock, flags);
>>> +}
>>> +
>>> +/**
>>>    * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
>>>    * @stream: A disabled i915 perf stream
>>>    *
>>> @@ -2408,6 +2459,8 @@ static void i915_perf_enable_locked(struct 
>>> i915_perf_stream *stream)
>>>          /* Allow stream->ops->enable() to refer to this */
>>>          stream->enabled = true;
>>>   +       i915_perf_init_timecounter(stream);
>>> +
>>>          if (stream->ops->enable)
>>>                  stream->ops->enable(stream);
>>>   }
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h 
>>> b/drivers/gpu/drm/i915/i915_reg.h
>>> index cfdf4f8..e7e6966 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -8882,6 +8882,12 @@ enum skl_power_gate {
>>>     /* Gen4+ Timestamp and Pipe Frame time stamp registers */
>>>   #define GEN4_TIMESTAMP         _MMIO(0x2358)
>>> +#define GEN7_TIMESTAMP_UDW     _MMIO(0x235C)
>>> +#define PRE_GEN7_TIMESTAMP_WIDTH       32
>>> +#define GEN7_TIMESTAMP_WIDTH           36
>>> +#define CS_TIMESTAMP_WIDTH(dev_priv) \
>>> +       (INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \
>>> +                                  GEN7_TIMESTAMP_WIDTH)
>> s/PRE_GEN7/GEN4/ would be consistent.
>> If you really want to add support for earlier, I9XX_.
>
> Why not use the RING_TIMESTAMP() RING_TIMESTAMP_UDW() ?
> There's already used for the reg_read ioctl.

Yes. We can use these.

>
>>
>> Ok. I can accept the justification, and we are not the only ones who do
>> the cyclecounter -> timecounter correction like this.
>> -Chris
>>
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2158a75..e08bc85 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -43,6 +43,7 @@ 
 #include <linux/pm_qos.h>
 #include <linux/reservation.h>
 #include <linux/shmem_fs.h>
+#include <linux/timecounter.h>
 
 #include <drm/drmP.h>
 #include <drm/intel-gtt.h>
@@ -2149,6 +2150,14 @@  struct i915_perf_stream {
 	 * @oa_config: The OA configuration used by the stream.
 	 */
 	struct i915_oa_config *oa_config;
+
+	/**
+	 * System time correlation variables.
+	 */
+	struct cyclecounter cc;
+	spinlock_t systime_lock;
+	struct timespec64 start_systime;
+	struct timecounter tc;
 };
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 00be015..72ddc34 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -192,6 +192,7 @@ 
  */
 
 #include <linux/anon_inodes.h>
+#include <linux/clocksource.h>
 #include <linux/sizes.h>
 #include <linux/uuid.h>
 
@@ -2391,6 +2392,56 @@  static unsigned int i915_perf_poll(struct file *file, poll_table *wait)
 }
 
 /**
+ * i915_cyclecounter_read - read raw cycle/timestamp counter
+ * @cc: cyclecounter structure
+ */
+static u64 i915_cyclecounter_read(const struct cyclecounter *cc)
+{
+	struct i915_perf_stream *stream = container_of(cc, typeof(*stream), cc);
+	struct drm_i915_private *dev_priv = stream->dev_priv;
+	u64 ts_count;
+
+	intel_runtime_pm_get(dev_priv);
+	ts_count = I915_READ64_2x32(GEN4_TIMESTAMP,
+				    GEN7_TIMESTAMP_UDW);
+	intel_runtime_pm_put(dev_priv);
+
+	return ts_count;
+}
+
+static void i915_perf_init_cyclecounter(struct i915_perf_stream *stream)
+{
+	struct drm_i915_private *dev_priv = stream->dev_priv;
+	int cs_ts_freq = dev_priv->perf.oa.timestamp_frequency;
+	struct cyclecounter *cc = &stream->cc;
+	u32 maxsec;
+
+	cc->read = i915_cyclecounter_read;
+	cc->mask = CYCLECOUNTER_MASK(CS_TIMESTAMP_WIDTH(dev_priv));
+	maxsec = cc->mask / cs_ts_freq;
+
+	clocks_calc_mult_shift(&cc->mult, &cc->shift, cs_ts_freq,
+			       NSEC_PER_SEC, maxsec);
+}
+
+static void i915_perf_init_timecounter(struct i915_perf_stream *stream)
+{
+#define SYSTIME_START_OFFSET	350000 /* Counter read takes about 350us */
+	unsigned long flags;
+	u64 ns;
+
+	i915_perf_init_cyclecounter(stream);
+	spin_lock_init(&stream->systime_lock);
+
+	getnstimeofday64(&stream->start_systime);
+	ns = timespec64_to_ns(&stream->start_systime) + SYSTIME_START_OFFSET;
+
+	spin_lock_irqsave(&stream->systime_lock, flags);
+	timecounter_init(&stream->tc, &stream->cc, ns);
+	spin_unlock_irqrestore(&stream->systime_lock, flags);
+}
+
+/**
  * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
  * @stream: A disabled i915 perf stream
  *
@@ -2408,6 +2459,8 @@  static void i915_perf_enable_locked(struct i915_perf_stream *stream)
 	/* Allow stream->ops->enable() to refer to this */
 	stream->enabled = true;
 
+	i915_perf_init_timecounter(stream);
+
 	if (stream->ops->enable)
 		stream->ops->enable(stream);
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index cfdf4f8..e7e6966 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -8882,6 +8882,12 @@  enum skl_power_gate {
 
 /* Gen4+ Timestamp and Pipe Frame time stamp registers */
 #define GEN4_TIMESTAMP		_MMIO(0x2358)
+#define GEN7_TIMESTAMP_UDW	_MMIO(0x235C)
+#define PRE_GEN7_TIMESTAMP_WIDTH	32
+#define GEN7_TIMESTAMP_WIDTH		36
+#define CS_TIMESTAMP_WIDTH(dev_priv) \
+	(INTEL_GEN(dev_priv) < 7 ? PRE_GEN7_TIMESTAMP_WIDTH : \
+				   GEN7_TIMESTAMP_WIDTH)
 #define ILK_TIMESTAMP_HI	_MMIO(0x70070)
 #define IVB_TIMESTAMP_CTR	_MMIO(0x44070)