[1/2] drm/i915/guc: Add GuC Load time to debugfs
diff mbox

Message ID 20170907003740.3016-1-anusha.srivatsa@intel.com
State New
Headers show

Commit Message

Srivatsa, Anusha Sept. 7, 2017, 12:37 a.m. UTC
Calculate the time that GuC takes to load.
This information could be very useful in
determining if GuC is taking unreasonably long time
to load in a certain platforms.

v2: Calculate time before logs are collected.
Move the guc_load_time variable as a part of
intel_uc_fw struct. Store only final result
which is to be exported to debugfs. (Michal)
Add the load time in the print message as well.

Cc: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     | 3 +++
 drivers/gpu/drm/i915/intel_guc_loader.c | 8 ++++++++
 drivers/gpu/drm/i915/intel_uc.h         | 1 +
 3 files changed, 12 insertions(+)

Comments

Tvrtko Ursulin Sept. 7, 2017, 8:49 a.m. UTC | #1
On 07/09/2017 01:37, Anusha Srivatsa wrote:
> Calculate the time that GuC takes to load.
> This information could be very useful in
> determining if GuC is taking unreasonably long time
> to load in a certain platforms.

Do we need this in debugfs or a DRM_NOTE or something would be 
sufficient if the load time is above certain threshold?

Also, what are the typical times here? Are jiffies precise enough? Could 
be only 10ms granularity on some kernels.

Depending on the above, more or less applicable comments below:

> v2: Calculate time before logs are collected.
> Move the guc_load_time variable as a part of
> intel_uc_fw struct. Store only final result
> which is to be exported to debugfs. (Michal)
> Add the load time in the print message as well.
> 
> Cc: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c     | 3 +++
>   drivers/gpu/drm/i915/intel_guc_loader.c | 8 ++++++++
>   drivers/gpu/drm/i915/intel_uc.h         | 1 +
>   3 files changed, 12 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 48572b157222..e0b99dbc6608 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2379,6 +2379,9 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
>   		guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted);
>   	seq_printf(m, "\tversion found: %d.%d\n",
>   		guc_fw->major_ver_found, guc_fw->minor_ver_found);
> +	seq_printf(m, "\tGuC Load time is %lu ms\n",
> +		   jiffies_to_msecs(guc_fw->guc_load_time));

OCD: "GuC load time: %lums" to make it more consistent with the other 
entries here?

> +
>   	seq_printf(m, "\theader: offset is %d; size = %d\n",
>   		guc_fw->header_offset, guc_fw->header_size);
>   	seq_printf(m, "\tuCode: offset is %d; size = %d\n",
> diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> index 8b0ae7fce7f2..da917f84c471 100644
> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> @@ -199,6 +199,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
>   	struct sg_table *sg = vma->pages;
>   	u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
>   	int i, ret = 0;
> +	unsigned long guc_start_load, guc_finish_load;
>   
>   	/* where RSA signature starts */
>   	offset = guc_fw->rsa_offset;
> @@ -226,6 +227,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
>   
>   	/* Finally start the DMA */
>   	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA));
> +	guc_start_load = jiffies;
>   
>   	/*
>   	 * Wait for the DMA to complete & the GuC to start up.
> @@ -237,6 +239,9 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
>   	 */
>   	ret = wait_for(guc_ucode_response(dev_priv, &status), 100);
>   
> +	guc_finish_load = jiffies;
> +	guc_fw->guc_load_time = guc_finish_load - guc_start_load;

Strictly speaking you don't need the guc_finish_load local.

> +
>   	DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n",
>   			I915_READ(DMA_CTRL), status);
>   
> @@ -372,6 +377,9 @@ int intel_guc_init_hw(struct intel_guc *guc)
>   		 guc->fw.path,
>   		 guc->fw.major_ver_found, guc->fw.minor_ver_found);
>   
> +	DRM_DEBUG_DRIVER("Time taken to load GuC is %lu\n",
> +			 guc->fw.guc_load_time);
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> index 22ae52b17b0f..52aa05d13863 100644
> --- a/drivers/gpu/drm/i915/intel_uc.h
> +++ b/drivers/gpu/drm/i915/intel_uc.h
> @@ -154,6 +154,7 @@ struct intel_uc_fw {
>   	uint32_t rsa_offset;
>   	uint32_t ucode_size;
>   	uint32_t ucode_offset;
> +	unsigned long guc_load_time;

Looks wrong to add guc_ (and later huc_) prefixed members in the common 
struct since both intel_guc and intel_huc encapsulate it. If you just 
had a single field and called it load_time, wouldn't you get separate 
copies for guc and huc automatically?

Regards,

Tvrtko

>   };
>   
>   struct intel_guc_log {
>
Michal Wajdeczko Sept. 7, 2017, 10:23 a.m. UTC | #2
On Thu, Sep 07, 2017 at 09:49:02AM +0100, Tvrtko Ursulin wrote:
> 
> On 07/09/2017 01:37, Anusha Srivatsa wrote:
> > Calculate the time that GuC takes to load.
> > This information could be very useful in
> > determining if GuC is taking unreasonably long time
> > to load in a certain platforms.
> 
> Do we need this in debugfs or a DRM_NOTE or something would be sufficient if
> the load time is above certain threshold?
> 
> Also, what are the typical times here? Are jiffies precise enough? Could be
> only 10ms granularity on some kernels.
> 
> Depending on the above, more or less applicable comments below:
> 
> > v2: Calculate time before logs are collected.
> > Move the guc_load_time variable as a part of
> > intel_uc_fw struct. Store only final result
> > which is to be exported to debugfs. (Michal)
> > Add the load time in the print message as well.
> > 
> > Cc: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>
> > Cc: Oscar Mateo <oscar.mateo@intel.com>
> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c     | 3 +++
> >   drivers/gpu/drm/i915/intel_guc_loader.c | 8 ++++++++
> >   drivers/gpu/drm/i915/intel_uc.h         | 1 +
> >   3 files changed, 12 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 48572b157222..e0b99dbc6608 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -2379,6 +2379,9 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
> >   		guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted);
> >   	seq_printf(m, "\tversion found: %d.%d\n",
> >   		guc_fw->major_ver_found, guc_fw->minor_ver_found);
> > +	seq_printf(m, "\tGuC Load time is %lu ms\n",
> > +		   jiffies_to_msecs(guc_fw->guc_load_time));
> 
> OCD: "GuC load time: %lums" to make it more consistent with the other
> entries here?

We can also skip "Guc" prefix as all entries are under "GuC firmware status" 
And maybe better place will be to insert it right after existing "load" entry?
Then output will be:

	GuC firmware status:
		path: i915/xxx.bin
		fetch: SUCCESS
		load: SUCCESS
		load time: 10ms
		...

or we can even try to combine both entries:

	GuC firmware status:
		path: i915/xxx.bin
		fetch: SUCCESS
		load: SUCCESS 10ms
		...

> 
> > +
> >   	seq_printf(m, "\theader: offset is %d; size = %d\n",
> >   		guc_fw->header_offset, guc_fw->header_size);
> >   	seq_printf(m, "\tuCode: offset is %d; size = %d\n",
> > diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> > index 8b0ae7fce7f2..da917f84c471 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> > @@ -199,6 +199,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
> >   	struct sg_table *sg = vma->pages;
> >   	u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
> >   	int i, ret = 0;
> > +	unsigned long guc_start_load, guc_finish_load;
> >   	/* where RSA signature starts */
> >   	offset = guc_fw->rsa_offset;
> > @@ -226,6 +227,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
> >   	/* Finally start the DMA */
> >   	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA));
> > +	guc_start_load = jiffies;
> >   	/*
> >   	 * Wait for the DMA to complete & the GuC to start up.
> > @@ -237,6 +239,9 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
> >   	 */
> >   	ret = wait_for(guc_ucode_response(dev_priv, &status), 100);
> > +	guc_finish_load = jiffies;
> > +	guc_fw->guc_load_time = guc_finish_load - guc_start_load;
> 
> Strictly speaking you don't need the guc_finish_load local.
> 
> > +
> >   	DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n",
> >   			I915_READ(DMA_CTRL), status);
> > @@ -372,6 +377,9 @@ int intel_guc_init_hw(struct intel_guc *guc)
> >   		 guc->fw.path,
> >   		 guc->fw.major_ver_found, guc->fw.minor_ver_found);
> > +	DRM_DEBUG_DRIVER("Time taken to load GuC is %lu\n",
> > +			 guc->fw.guc_load_time);
> > +
> >   	return 0;
> >   }
> > diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> > index 22ae52b17b0f..52aa05d13863 100644
> > --- a/drivers/gpu/drm/i915/intel_uc.h
> > +++ b/drivers/gpu/drm/i915/intel_uc.h
> > @@ -154,6 +154,7 @@ struct intel_uc_fw {
> >   	uint32_t rsa_offset;
> >   	uint32_t ucode_size;
> >   	uint32_t ucode_offset;
> > +	unsigned long guc_load_time;
> 
> Looks wrong to add guc_ (and later huc_) prefixed members in the common
> struct since both intel_guc and intel_huc encapsulate it. If you just had a
> single field and called it load_time, wouldn't you get separate copies for
> guc and huc automatically?

Additionally, move this new member closer to related "load_status" member.

Regards,
Michal

> 
> Regards,
> 
> Tvrtko
> 
> >   };
> >   struct intel_guc_log {
> > 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Srivatsa, Anusha Sept. 7, 2017, 5:07 p.m. UTC | #3
>-----Original Message-----
>From: Wajdeczko, Michal
>Sent: Thursday, September 7, 2017 3:23 AM
>To: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>Cc: Srivatsa, Anusha <anusha.srivatsa@intel.com>; intel-
>gfx@lists.freedesktop.org; Sundaresan, Sujaritha
><sujaritha.sundaresan@intel.com>
>Subject: Re: [Intel-gfx] [PATCH 1/2] drm/i915/guc: Add GuC Load time to debugfs
>
>On Thu, Sep 07, 2017 at 09:49:02AM +0100, Tvrtko Ursulin wrote:
>>
>> On 07/09/2017 01:37, Anusha Srivatsa wrote:
>> > Calculate the time that GuC takes to load.
>> > This information could be very useful in determining if GuC is
>> > taking unreasonably long time to load in a certain platforms.
>>
>> Do we need this in debugfs or a DRM_NOTE or something would be
>> sufficient if the load time is above certain threshold?
>>
>> Also, what are the typical times here? Are jiffies precise enough?
>> Could be only 10ms granularity on some kernels.
>>
>> Depending on the above, more or less applicable comments below:
>>
>> > v2: Calculate time before logs are collected.
>> > Move the guc_load_time variable as a part of intel_uc_fw struct.
>> > Store only final result which is to be exported to debugfs. (Michal)
>> > Add the load time in the print message as well.
>> >
>> > Cc: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>
>> > Cc: Oscar Mateo <oscar.mateo@intel.com>
>> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>> > Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
>> > ---
>> >   drivers/gpu/drm/i915/i915_debugfs.c     | 3 +++
>> >   drivers/gpu/drm/i915/intel_guc_loader.c | 8 ++++++++
>> >   drivers/gpu/drm/i915/intel_uc.h         | 1 +
>> >   3 files changed, 12 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>> > b/drivers/gpu/drm/i915/i915_debugfs.c
>> > index 48572b157222..e0b99dbc6608 100644
>> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> > @@ -2379,6 +2379,9 @@ static int i915_guc_load_status_info(struct seq_file
>*m, void *data)
>> >   		guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted);
>> >   	seq_printf(m, "\tversion found: %d.%d\n",
>> >   		guc_fw->major_ver_found, guc_fw->minor_ver_found);
>> > +	seq_printf(m, "\tGuC Load time is %lu ms\n",
>> > +		   jiffies_to_msecs(guc_fw->guc_load_time));
>>
>> OCD: "GuC load time: %lums" to make it more consistent with the other
>> entries here?

I don't have a strong preference, either way works. I can change it in the next revision.

>We can also skip "Guc" prefix as all entries are under "GuC firmware status"
>And maybe better place will be to insert it right after existing "load" entry?
>Then output will be:
>
>	GuC firmware status:
>		path: i915/xxx.bin
>		fetch: SUCCESS
>		load: SUCCESS
>		load time: 10ms
>		...
>
>or we can even try to combine both entries:
>
>	GuC firmware status:
>		path: i915/xxx.bin
>		fetch: SUCCESS
>		load: SUCCESS 10ms
>		...
>
I agree with removing the GuC_ prefix. I like the first output more somehow.... it tells status, and then the time taken for load..... 

Thanks Michal for your inputs....

Anusha
>> > +
>> >   	seq_printf(m, "\theader: offset is %d; size = %d\n",
>> >   		guc_fw->header_offset, guc_fw->header_size);
>> >   	seq_printf(m, "\tuCode: offset is %d; size = %d\n", diff --git
>> > a/drivers/gpu/drm/i915/intel_guc_loader.c
>> > b/drivers/gpu/drm/i915/intel_guc_loader.c
>> > index 8b0ae7fce7f2..da917f84c471 100644
>> > --- a/drivers/gpu/drm/i915/intel_guc_loader.c
>> > +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
>> > @@ -199,6 +199,7 @@ static int guc_ucode_xfer_dma(struct
>drm_i915_private *dev_priv,
>> >   	struct sg_table *sg = vma->pages;
>> >   	u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
>> >   	int i, ret = 0;
>> > +	unsigned long guc_start_load, guc_finish_load;
>> >   	/* where RSA signature starts */
>> >   	offset = guc_fw->rsa_offset;
>> > @@ -226,6 +227,7 @@ static int guc_ucode_xfer_dma(struct
>drm_i915_private *dev_priv,
>> >   	/* Finally start the DMA */
>> >   	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE |
>START_DMA));
>> > +	guc_start_load = jiffies;
>> >   	/*
>> >   	 * Wait for the DMA to complete & the GuC to start up.
>> > @@ -237,6 +239,9 @@ static int guc_ucode_xfer_dma(struct
>drm_i915_private *dev_priv,
>> >   	 */
>> >   	ret = wait_for(guc_ucode_response(dev_priv, &status), 100);
>> > +	guc_finish_load = jiffies;
>> > +	guc_fw->guc_load_time = guc_finish_load - guc_start_load;
>>
>> Strictly speaking you don't need the guc_finish_load local.
>>
>> > +
>> >   	DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n",
>> >   			I915_READ(DMA_CTRL), status);
>> > @@ -372,6 +377,9 @@ int intel_guc_init_hw(struct intel_guc *guc)
>> >   		 guc->fw.path,
>> >   		 guc->fw.major_ver_found, guc->fw.minor_ver_found);
>> > +	DRM_DEBUG_DRIVER("Time taken to load GuC is %lu\n",
>> > +			 guc->fw.guc_load_time);
>> > +
>> >   	return 0;
>> >   }
>> > diff --git a/drivers/gpu/drm/i915/intel_uc.h
>> > b/drivers/gpu/drm/i915/intel_uc.h index 22ae52b17b0f..52aa05d13863
>> > 100644
>> > --- a/drivers/gpu/drm/i915/intel_uc.h
>> > +++ b/drivers/gpu/drm/i915/intel_uc.h
>> > @@ -154,6 +154,7 @@ struct intel_uc_fw {
>> >   	uint32_t rsa_offset;
>> >   	uint32_t ucode_size;
>> >   	uint32_t ucode_offset;
>> > +	unsigned long guc_load_time;
>>
>> Looks wrong to add guc_ (and later huc_) prefixed members in the
>> common struct since both intel_guc and intel_huc encapsulate it. If
>> you just had a single field and called it load_time, wouldn't you get
>> separate copies for guc and huc automatically?
>
>Additionally, move this new member closer to related "load_status" member.
>
>Regards,
>Michal
>
>>
>> Regards,
>>
>> Tvrtko
>>
>> >   };
>> >   struct intel_guc_log {
>> >
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Srivatsa, Anusha Sept. 7, 2017, 5:07 p.m. UTC | #4
>-----Original Message-----

>From: Tvrtko Ursulin [mailto:tvrtko.ursulin@linux.intel.com]

>Sent: Thursday, September 7, 2017 1:49 AM

>To: Srivatsa, Anusha <anusha.srivatsa@intel.com>; intel-

>gfx@lists.freedesktop.org

>Cc: Sundaresan, Sujaritha <sujaritha.sundaresan@intel.com>

>Subject: Re: [Intel-gfx] [PATCH 1/2] drm/i915/guc: Add GuC Load time to debugfs

>

>

>On 07/09/2017 01:37, Anusha Srivatsa wrote:

>> Calculate the time that GuC takes to load.

>> This information could be very useful in determining if GuC is taking

>> unreasonably long time to load in a certain platforms.

>

>Do we need this in debugfs or a DRM_NOTE or something would be sufficient if

>the load time is above certain threshold?


The intention was to have debug related info in a debugfs. DRM_NOTE will also be useful, we can do both. The load time can be as an entry in debugfs and if it's beyond a threshold - 20ms I assume, we can have a DRM_NOTE.
Does that sound good?

>Also, what are the typical times here? Are jiffies precise enough? Could be only

>10ms granularity on some kernels.


Usually guc load times are around 8ms to 10ms....
That’s a very good point. I thought jiffies are a good approach for this purpose, but if there is a better or more accurate way, I will be happy to go that direction. 

>Depending on the above, more or less applicable comments below:

>

>> v2: Calculate time before logs are collected.

>> Move the guc_load_time variable as a part of intel_uc_fw struct. Store

>> only final result which is to be exported to debugfs. (Michal) Add the

>> load time in the print message as well.

>>

>> Cc: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>

>> Cc: Oscar Mateo <oscar.mateo@intel.com>

>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>

>> Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>

>> ---

>>   drivers/gpu/drm/i915/i915_debugfs.c     | 3 +++

>>   drivers/gpu/drm/i915/intel_guc_loader.c | 8 ++++++++

>>   drivers/gpu/drm/i915/intel_uc.h         | 1 +

>>   3 files changed, 12 insertions(+)

>>

>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c

>> b/drivers/gpu/drm/i915/i915_debugfs.c

>> index 48572b157222..e0b99dbc6608 100644

>> --- a/drivers/gpu/drm/i915/i915_debugfs.c

>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c

>> @@ -2379,6 +2379,9 @@ static int i915_guc_load_status_info(struct seq_file

>*m, void *data)

>>   		guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted);

>>   	seq_printf(m, "\tversion found: %d.%d\n",

>>   		guc_fw->major_ver_found, guc_fw->minor_ver_found);

>> +	seq_printf(m, "\tGuC Load time is %lu ms\n",

>> +		   jiffies_to_msecs(guc_fw->guc_load_time));

>

>OCD: "GuC load time: %lums" to make it more consistent with the other entries

>here?

>

>> +

>>   	seq_printf(m, "\theader: offset is %d; size = %d\n",

>>   		guc_fw->header_offset, guc_fw->header_size);

>>   	seq_printf(m, "\tuCode: offset is %d; size = %d\n", diff --git

>> a/drivers/gpu/drm/i915/intel_guc_loader.c

>> b/drivers/gpu/drm/i915/intel_guc_loader.c

>> index 8b0ae7fce7f2..da917f84c471 100644

>> --- a/drivers/gpu/drm/i915/intel_guc_loader.c

>> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c

>> @@ -199,6 +199,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private

>*dev_priv,

>>   	struct sg_table *sg = vma->pages;

>>   	u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];

>>   	int i, ret = 0;

>> +	unsigned long guc_start_load, guc_finish_load;

>>

>>   	/* where RSA signature starts */

>>   	offset = guc_fw->rsa_offset;

>> @@ -226,6 +227,7 @@ static int guc_ucode_xfer_dma(struct

>> drm_i915_private *dev_priv,

>>

>>   	/* Finally start the DMA */

>>   	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE |

>START_DMA));

>> +	guc_start_load = jiffies;

>>

>>   	/*

>>   	 * Wait for the DMA to complete & the GuC to start up.

>> @@ -237,6 +239,9 @@ static int guc_ucode_xfer_dma(struct drm_i915_private

>*dev_priv,

>>   	 */

>>   	ret = wait_for(guc_ucode_response(dev_priv, &status), 100);

>>

>> +	guc_finish_load = jiffies;

>> +	guc_fw->guc_load_time = guc_finish_load - guc_start_load;

>

>Strictly speaking you don't need the guc_finish_load local.

>

>> +

>>   	DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n",

>>   			I915_READ(DMA_CTRL), status);

>>

>> @@ -372,6 +377,9 @@ int intel_guc_init_hw(struct intel_guc *guc)

>>   		 guc->fw.path,

>>   		 guc->fw.major_ver_found, guc->fw.minor_ver_found);

>>

>> +	DRM_DEBUG_DRIVER("Time taken to load GuC is %lu\n",

>> +			 guc->fw.guc_load_time);

>> +

>>   	return 0;

>>   }

>>

>> diff --git a/drivers/gpu/drm/i915/intel_uc.h

>> b/drivers/gpu/drm/i915/intel_uc.h index 22ae52b17b0f..52aa05d13863

>> 100644

>> --- a/drivers/gpu/drm/i915/intel_uc.h

>> +++ b/drivers/gpu/drm/i915/intel_uc.h

>> @@ -154,6 +154,7 @@ struct intel_uc_fw {

>>   	uint32_t rsa_offset;

>>   	uint32_t ucode_size;

>>   	uint32_t ucode_offset;

>> +	unsigned long guc_load_time;

>

>Looks wrong to add guc_ (and later huc_) prefixed members in the common

>struct since both intel_guc and intel_huc encapsulate it. If you just had a single

>field and called it load_time, wouldn't you get separate copies for guc and huc

>automatically?


That’s a good point. 
Neater approach too. Thanks Tvrtko!

Anusha
>Regards,

>

>Tvrtko

>

>>   };

>>

>>   struct intel_guc_log {

>>
Chris Wilson Sept. 7, 2017, 10:08 p.m. UTC | #5
Quoting Tvrtko Ursulin (2017-09-07 09:49:02)
> 
> On 07/09/2017 01:37, Anusha Srivatsa wrote:
> > Calculate the time that GuC takes to load.
> > This information could be very useful in
> > determining if GuC is taking unreasonably long time
> > to load in a certain platforms.
> 
> Do we need this in debugfs or a DRM_NOTE or something would be 
> sufficient if the load time is above certain threshold?
> 
> Also, what are the typical times here? Are jiffies precise enough? Could 
> be only 10ms granularity on some kernels.

Hear, hear. Aiming for jiffie resolution sets a very low bar.

Why is this important anyway? It shouldn't be on the critical path for
bringing up userspace (i.e. on the synchronous portion of boot/module
loading). Nor should it be delaying bringing up a screen, once the core
is initialised we should be able to bring up GT and the display in
parallel.
-Chris
Daniel Vetter Sept. 8, 2017, 7:17 a.m. UTC | #6
On Thu, Sep 07, 2017 at 09:49:02AM +0100, Tvrtko Ursulin wrote:
> 
> On 07/09/2017 01:37, Anusha Srivatsa wrote:
> > Calculate the time that GuC takes to load.
> > This information could be very useful in
> > determining if GuC is taking unreasonably long time
> > to load in a certain platforms.
> 
> Do we need this in debugfs or a DRM_NOTE or something would be sufficient if
> the load time is above certain threshold?

Yeah I'd vote for dmesg logging too.
-Daniel

> 
> Also, what are the typical times here? Are jiffies precise enough? Could be
> only 10ms granularity on some kernels.
> 
> Depending on the above, more or less applicable comments below:
> 
> > v2: Calculate time before logs are collected.
> > Move the guc_load_time variable as a part of
> > intel_uc_fw struct. Store only final result
> > which is to be exported to debugfs. (Michal)
> > Add the load time in the print message as well.
> > 
> > Cc: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>
> > Cc: Oscar Mateo <oscar.mateo@intel.com>
> > Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_debugfs.c     | 3 +++
> >   drivers/gpu/drm/i915/intel_guc_loader.c | 8 ++++++++
> >   drivers/gpu/drm/i915/intel_uc.h         | 1 +
> >   3 files changed, 12 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> > index 48572b157222..e0b99dbc6608 100644
> > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > @@ -2379,6 +2379,9 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
> >   		guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted);
> >   	seq_printf(m, "\tversion found: %d.%d\n",
> >   		guc_fw->major_ver_found, guc_fw->minor_ver_found);
> > +	seq_printf(m, "\tGuC Load time is %lu ms\n",
> > +		   jiffies_to_msecs(guc_fw->guc_load_time));
> 
> OCD: "GuC load time: %lums" to make it more consistent with the other
> entries here?
> 
> > +
> >   	seq_printf(m, "\theader: offset is %d; size = %d\n",
> >   		guc_fw->header_offset, guc_fw->header_size);
> >   	seq_printf(m, "\tuCode: offset is %d; size = %d\n",
> > diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
> > index 8b0ae7fce7f2..da917f84c471 100644
> > --- a/drivers/gpu/drm/i915/intel_guc_loader.c
> > +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
> > @@ -199,6 +199,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
> >   	struct sg_table *sg = vma->pages;
> >   	u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
> >   	int i, ret = 0;
> > +	unsigned long guc_start_load, guc_finish_load;
> >   	/* where RSA signature starts */
> >   	offset = guc_fw->rsa_offset;
> > @@ -226,6 +227,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
> >   	/* Finally start the DMA */
> >   	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA));
> > +	guc_start_load = jiffies;
> >   	/*
> >   	 * Wait for the DMA to complete & the GuC to start up.
> > @@ -237,6 +239,9 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
> >   	 */
> >   	ret = wait_for(guc_ucode_response(dev_priv, &status), 100);
> > +	guc_finish_load = jiffies;
> > +	guc_fw->guc_load_time = guc_finish_load - guc_start_load;
> 
> Strictly speaking you don't need the guc_finish_load local.
> 
> > +
> >   	DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n",
> >   			I915_READ(DMA_CTRL), status);
> > @@ -372,6 +377,9 @@ int intel_guc_init_hw(struct intel_guc *guc)
> >   		 guc->fw.path,
> >   		 guc->fw.major_ver_found, guc->fw.minor_ver_found);
> > +	DRM_DEBUG_DRIVER("Time taken to load GuC is %lu\n",
> > +			 guc->fw.guc_load_time);
> > +
> >   	return 0;
> >   }
> > diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
> > index 22ae52b17b0f..52aa05d13863 100644
> > --- a/drivers/gpu/drm/i915/intel_uc.h
> > +++ b/drivers/gpu/drm/i915/intel_uc.h
> > @@ -154,6 +154,7 @@ struct intel_uc_fw {
> >   	uint32_t rsa_offset;
> >   	uint32_t ucode_size;
> >   	uint32_t ucode_offset;
> > +	unsigned long guc_load_time;
> 
> Looks wrong to add guc_ (and later huc_) prefixed members in the common
> struct since both intel_guc and intel_huc encapsulate it. If you just had a
> single field and called it load_time, wouldn't you get separate copies for
> guc and huc automatically?
> 
> Regards,
> 
> Tvrtko
> 
> >   };
> >   struct intel_guc_log {
> > 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Srivatsa, Anusha Sept. 8, 2017, 5:58 p.m. UTC | #7
>-----Original Message-----

>From: Chris Wilson [mailto:chris@chris-wilson.co.uk]

>Sent: Thursday, September 7, 2017 3:08 PM

>To: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>; Srivatsa, Anusha

><anusha.srivatsa@intel.com>; intel-gfx@lists.freedesktop.org

>Cc: Sundaresan, Sujaritha <sujaritha.sundaresan@intel.com>

>Subject: Re: [Intel-gfx] [PATCH 1/2] drm/i915/guc: Add GuC Load time to debugfs

>

>Quoting Tvrtko Ursulin (2017-09-07 09:49:02)

>>

>> On 07/09/2017 01:37, Anusha Srivatsa wrote:

>> > Calculate the time that GuC takes to load.

>> > This information could be very useful in determining if GuC is

>> > taking unreasonably long time to load in a certain platforms.

>>

>> Do we need this in debugfs or a DRM_NOTE or something would be

>> sufficient if the load time is above certain threshold?

>>

>> Also, what are the typical times here? Are jiffies precise enough?

>> Could be only 10ms granularity on some kernels.

>

>Hear, hear. Aiming for jiffie resolution sets a very low bar.

>

>Why is this important anyway? It shouldn't be on the critical path for bringing up

>userspace (i.e. on the synchronous portion of boot/module loading). Nor should

>it be delaying bringing up a screen, once the core is initialised we should be able

>to bring up GT and the display in parallel.


Hi Chris,
Wont it still be of some value to know the time taken to load the firmware in different platforms? To have some data to tell how much time it takes to load?

Anusha
>-Chris
Chris Wilson Sept. 8, 2017, 6:04 p.m. UTC | #8
Quoting Srivatsa, Anusha (2017-09-08 18:58:11)
> 
> 
> >-----Original Message-----
> >From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> >Sent: Thursday, September 7, 2017 3:08 PM
> >To: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>; Srivatsa, Anusha
> ><anusha.srivatsa@intel.com>; intel-gfx@lists.freedesktop.org
> >Cc: Sundaresan, Sujaritha <sujaritha.sundaresan@intel.com>
> >Subject: Re: [Intel-gfx] [PATCH 1/2] drm/i915/guc: Add GuC Load time to debugfs
> >
> >Quoting Tvrtko Ursulin (2017-09-07 09:49:02)
> >>
> >> On 07/09/2017 01:37, Anusha Srivatsa wrote:
> >> > Calculate the time that GuC takes to load.
> >> > This information could be very useful in determining if GuC is
> >> > taking unreasonably long time to load in a certain platforms.
> >>
> >> Do we need this in debugfs or a DRM_NOTE or something would be
> >> sufficient if the load time is above certain threshold?
> >>
> >> Also, what are the typical times here? Are jiffies precise enough?
> >> Could be only 10ms granularity on some kernels.
> >
> >Hear, hear. Aiming for jiffie resolution sets a very low bar.
> >
> >Why is this important anyway? It shouldn't be on the critical path for bringing up
> >userspace (i.e. on the synchronous portion of boot/module loading). Nor should
> >it be delaying bringing up a screen, once the core is initialised we should be able
> >to bring up GT and the display in parallel.
> 
> Hi Chris,
> Wont it still be of some value to know the time taken to load the firmware in different platforms? To have some data to tell how much time it takes to load?

If it always reports 0 jiffies that is not much use. Why do you need to
add code to do this since you can just log the function call timings?
-Chris
Tvrtko Ursulin Sept. 11, 2017, 3:53 p.m. UTC | #9
On 07/09/2017 18:07, Srivatsa, Anusha wrote:
>> -----Original Message-----
>> From: Tvrtko Ursulin [mailto:tvrtko.ursulin@linux.intel.com]
>> Sent: Thursday, September 7, 2017 1:49 AM
>> To: Srivatsa, Anusha <anusha.srivatsa@intel.com>; intel-
>> gfx@lists.freedesktop.org
>> Cc: Sundaresan, Sujaritha <sujaritha.sundaresan@intel.com>
>> Subject: Re: [Intel-gfx] [PATCH 1/2] drm/i915/guc: Add GuC Load time to debugfs
>>
>>
>> On 07/09/2017 01:37, Anusha Srivatsa wrote:
>>> Calculate the time that GuC takes to load.
>>> This information could be very useful in determining if GuC is taking
>>> unreasonably long time to load in a certain platforms.
>>
>> Do we need this in debugfs or a DRM_NOTE or something would be sufficient if
>> the load time is above certain threshold?
> 
> The intention was to have debug related info in a debugfs. DRM_NOTE will also be useful, we can do both. The load time can be as an entry in debugfs and if it's beyond a threshold - 20ms I assume, we can have a DRM_NOTE.
> Does that sound good?

I'd rather avoid adding code (for debugfs) if the only purpose is to 
detect when the load takes too long. But maybe there is a different 
reason to have it, which I am not aware off so I don't know.

>> Also, what are the typical times here? Are jiffies precise enough? Could be only
>> 10ms granularity on some kernels.
> 
> Usually guc load times are around 8ms to 10ms....
> That’s a very good point. I thought jiffies are a good approach for this purpose, but if there is a better or more accurate way, I will be happy to go that direction.

ktime_get(_ns) I think.

Regards,

Tvrtko

>> Depending on the above, more or less applicable comments below:
>>
>>> v2: Calculate time before logs are collected.
>>> Move the guc_load_time variable as a part of intel_uc_fw struct. Store
>>> only final result which is to be exported to debugfs. (Michal) Add the
>>> load time in the print message as well.
>>>
>>> Cc: Sujaritha Sundaresan <sujaritha.sundaresan@intel.com>
>>> Cc: Oscar Mateo <oscar.mateo@intel.com>
>>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>> Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_debugfs.c     | 3 +++
>>>    drivers/gpu/drm/i915/intel_guc_loader.c | 8 ++++++++
>>>    drivers/gpu/drm/i915/intel_uc.h         | 1 +
>>>    3 files changed, 12 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 48572b157222..e0b99dbc6608 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -2379,6 +2379,9 @@ static int i915_guc_load_status_info(struct seq_file
>> *m, void *data)
>>>    		guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted);
>>>    	seq_printf(m, "\tversion found: %d.%d\n",
>>>    		guc_fw->major_ver_found, guc_fw->minor_ver_found);
>>> +	seq_printf(m, "\tGuC Load time is %lu ms\n",
>>> +		   jiffies_to_msecs(guc_fw->guc_load_time));
>>
>> OCD: "GuC load time: %lums" to make it more consistent with the other entries
>> here?
>>
>>> +
>>>    	seq_printf(m, "\theader: offset is %d; size = %d\n",
>>>    		guc_fw->header_offset, guc_fw->header_size);
>>>    	seq_printf(m, "\tuCode: offset is %d; size = %d\n", diff --git
>>> a/drivers/gpu/drm/i915/intel_guc_loader.c
>>> b/drivers/gpu/drm/i915/intel_guc_loader.c
>>> index 8b0ae7fce7f2..da917f84c471 100644
>>> --- a/drivers/gpu/drm/i915/intel_guc_loader.c
>>> +++ b/drivers/gpu/drm/i915/intel_guc_loader.c
>>> @@ -199,6 +199,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private
>> *dev_priv,
>>>    	struct sg_table *sg = vma->pages;
>>>    	u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
>>>    	int i, ret = 0;
>>> +	unsigned long guc_start_load, guc_finish_load;
>>>
>>>    	/* where RSA signature starts */
>>>    	offset = guc_fw->rsa_offset;
>>> @@ -226,6 +227,7 @@ static int guc_ucode_xfer_dma(struct
>>> drm_i915_private *dev_priv,
>>>
>>>    	/* Finally start the DMA */
>>>    	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE |
>> START_DMA));
>>> +	guc_start_load = jiffies;
>>>
>>>    	/*
>>>    	 * Wait for the DMA to complete & the GuC to start up.
>>> @@ -237,6 +239,9 @@ static int guc_ucode_xfer_dma(struct drm_i915_private
>> *dev_priv,
>>>    	 */
>>>    	ret = wait_for(guc_ucode_response(dev_priv, &status), 100);
>>>
>>> +	guc_finish_load = jiffies;
>>> +	guc_fw->guc_load_time = guc_finish_load - guc_start_load;
>>
>> Strictly speaking you don't need the guc_finish_load local.
>>
>>> +
>>>    	DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n",
>>>    			I915_READ(DMA_CTRL), status);
>>>
>>> @@ -372,6 +377,9 @@ int intel_guc_init_hw(struct intel_guc *guc)
>>>    		 guc->fw.path,
>>>    		 guc->fw.major_ver_found, guc->fw.minor_ver_found);
>>>
>>> +	DRM_DEBUG_DRIVER("Time taken to load GuC is %lu\n",
>>> +			 guc->fw.guc_load_time);
>>> +
>>>    	return 0;
>>>    }
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_uc.h
>>> b/drivers/gpu/drm/i915/intel_uc.h index 22ae52b17b0f..52aa05d13863
>>> 100644
>>> --- a/drivers/gpu/drm/i915/intel_uc.h
>>> +++ b/drivers/gpu/drm/i915/intel_uc.h
>>> @@ -154,6 +154,7 @@ struct intel_uc_fw {
>>>    	uint32_t rsa_offset;
>>>    	uint32_t ucode_size;
>>>    	uint32_t ucode_offset;
>>> +	unsigned long guc_load_time;
>>
>> Looks wrong to add guc_ (and later huc_) prefixed members in the common
>> struct since both intel_guc and intel_huc encapsulate it. If you just had a single
>> field and called it load_time, wouldn't you get separate copies for guc and huc
>> automatically?
> 
> That’s a good point.
> Neater approach too. Thanks Tvrtko!
> 
> Anusha
>> Regards,
>>
>> Tvrtko
>>
>>>    };
>>>
>>>    struct intel_guc_log {
>>>

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 48572b157222..e0b99dbc6608 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2379,6 +2379,9 @@  static int i915_guc_load_status_info(struct seq_file *m, void *data)
 		guc_fw->major_ver_wanted, guc_fw->minor_ver_wanted);
 	seq_printf(m, "\tversion found: %d.%d\n",
 		guc_fw->major_ver_found, guc_fw->minor_ver_found);
+	seq_printf(m, "\tGuC Load time is %lu ms\n",
+		   jiffies_to_msecs(guc_fw->guc_load_time));
+
 	seq_printf(m, "\theader: offset is %d; size = %d\n",
 		guc_fw->header_offset, guc_fw->header_size);
 	seq_printf(m, "\tuCode: offset is %d; size = %d\n",
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 8b0ae7fce7f2..da917f84c471 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -199,6 +199,7 @@  static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
 	struct sg_table *sg = vma->pages;
 	u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT];
 	int i, ret = 0;
+	unsigned long guc_start_load, guc_finish_load;
 
 	/* where RSA signature starts */
 	offset = guc_fw->rsa_offset;
@@ -226,6 +227,7 @@  static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
 
 	/* Finally start the DMA */
 	I915_WRITE(DMA_CTRL, _MASKED_BIT_ENABLE(UOS_MOVE | START_DMA));
+	guc_start_load = jiffies;
 
 	/*
 	 * Wait for the DMA to complete & the GuC to start up.
@@ -237,6 +239,9 @@  static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv,
 	 */
 	ret = wait_for(guc_ucode_response(dev_priv, &status), 100);
 
+	guc_finish_load = jiffies;
+	guc_fw->guc_load_time = guc_finish_load - guc_start_load;
+
 	DRM_DEBUG_DRIVER("DMA status 0x%x, GuC status 0x%x\n",
 			I915_READ(DMA_CTRL), status);
 
@@ -372,6 +377,9 @@  int intel_guc_init_hw(struct intel_guc *guc)
 		 guc->fw.path,
 		 guc->fw.major_ver_found, guc->fw.minor_ver_found);
 
+	DRM_DEBUG_DRIVER("Time taken to load GuC is %lu\n",
+			 guc->fw.guc_load_time);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 22ae52b17b0f..52aa05d13863 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -154,6 +154,7 @@  struct intel_uc_fw {
 	uint32_t rsa_offset;
 	uint32_t ucode_size;
 	uint32_t ucode_offset;
+	unsigned long guc_load_time;
 };
 
 struct intel_guc_log {