diff mbox series

[v3,2/8] drm/panfrost: Enable cycle counter register upon job submission

Message ID 20230905184533.959171-3-adrian.larumbe@collabora.com (mailing list archive)
State New, archived
Headers show
Series Add fdinfo support to Panfrost | expand

Commit Message

Adrián Larumbe Sept. 5, 2023, 6:45 p.m. UTC
In a future development, we will want to keep track of the number of GPU
cycles spent on a given job. That means we should enable it only when the
GPU has work to do, and switch it off whenever it is idle to avoid power
waste.

To avoid race conditions during enablement/disabling, a reference counting
mechanism was introduced, and a job flag that tells us whether a given job
increased the refcount. This is necessary, because a future development
will let user space toggle cycle counting through a debugfs file, and a
given job might have been in flight by the time cycle counting was
disabled.

Toggling of GPU cycle counting has to be done through a module parameter.

Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
---
 drivers/gpu/drm/panfrost/panfrost_device.c |  5 +++
 drivers/gpu/drm/panfrost/panfrost_device.h |  6 +++
 drivers/gpu/drm/panfrost/panfrost_gpu.c    | 43 ++++++++++++++++++++++
 drivers/gpu/drm/panfrost/panfrost_gpu.h    |  6 +++
 drivers/gpu/drm/panfrost/panfrost_job.c    | 10 +++++
 drivers/gpu/drm/panfrost/panfrost_job.h    |  1 +
 6 files changed, 71 insertions(+)

Comments

Boris Brezillon Sept. 6, 2023, 7:21 a.m. UTC | #1
On Tue,  5 Sep 2023 19:45:18 +0100
Adrián Larumbe <adrian.larumbe@collabora.com> wrote:

> In a future development, we will want to keep track of the number of GPU
> cycles spent on a given job. That means we should enable it only when the
> GPU has work to do, and switch it off whenever it is idle to avoid power
> waste.
> 
> To avoid race conditions during enablement/disabling, a reference counting
> mechanism was introduced, and a job flag that tells us whether a given job
> increased the refcount. This is necessary, because a future development
> will let user space toggle cycle counting through a debugfs file, and a
> given job might have been in flight by the time cycle counting was
> disabled.
> 
> Toggling of GPU cycle counting has to be done through a module parameter.
> 
> Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
> ---
>  drivers/gpu/drm/panfrost/panfrost_device.c |  5 +++
>  drivers/gpu/drm/panfrost/panfrost_device.h |  6 +++
>  drivers/gpu/drm/panfrost/panfrost_gpu.c    | 43 ++++++++++++++++++++++
>  drivers/gpu/drm/panfrost/panfrost_gpu.h    |  6 +++
>  drivers/gpu/drm/panfrost/panfrost_job.c    | 10 +++++
>  drivers/gpu/drm/panfrost/panfrost_job.h    |  1 +
>  6 files changed, 71 insertions(+)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
> index fa1a086a862b..1ea2ac3804f0 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
> @@ -18,6 +18,9 @@
>  #include "panfrost_mmu.h"
>  #include "panfrost_perfcnt.h"
>  
> +static bool profile;
> +module_param(profile, bool, 0600);

Not sure if we should make that a module parameter. Might be better
exposed as a debugfs knob attached to the device (even if having
multiple Mali devices is rather unlikely, I think I'd prefer to make
this toggle per-device).

> +
>  static int panfrost_reset_init(struct panfrost_device *pfdev)
>  {
>  	pfdev->rstc = devm_reset_control_array_get_optional_exclusive(pfdev->dev);
> @@ -207,6 +210,8 @@ int panfrost_device_init(struct panfrost_device *pfdev)
>  
>  	spin_lock_init(&pfdev->as_lock);
>  
> +	atomic_set(&pfdev->profile_mode, profile);

So, profile_mode can only be set at probe time, meaning any changes to
the profile module param is not taken into account after that point.

> +
>  	err = panfrost_clk_init(pfdev);
>  	if (err) {
>  		dev_err(pfdev->dev, "clk init failed %d\n", err);
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> index b0126b9fbadc..5c09c9f3ae08 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> @@ -107,6 +107,7 @@ struct panfrost_device {
>  	struct list_head scheduled_jobs;
>  
>  	struct panfrost_perfcnt *perfcnt;
> +	atomic_t profile_mode;
>  
>  	struct mutex sched_lock;
>  
> @@ -121,6 +122,11 @@ struct panfrost_device {
>  	struct shrinker shrinker;
>  
>  	struct panfrost_devfreq pfdevfreq;
> +
> +	struct {
> +		atomic_t use_count;
> +		spinlock_t lock;
> +	} cycle_counter;
>  };
>  
>  struct panfrost_mmu {
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> index 2faa344d89ee..fddbc72bf093 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
> @@ -73,6 +73,8 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev)
>  	gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL);
>  	gpu_write(pfdev, GPU_INT_MASK, GPU_IRQ_MASK_ALL);
>  
> +	atomic_set(&pfdev->cycle_counter.use_count, 0);

I think I'd prefer if the jobs that were in-flight at the time a GPU
hang occurred explicitly release their reference on use_count. So maybe
something like

	/* When we reset the GPU we should have no cycle-counter users
	 * left.
	 */
	if (drm_WARN_ON(cycle_counter.use_count != 0))
		atomic_set(&pfdev->cycle_counter.use_count, 0);

to catch unbalanced get/put situations.

> +
>  	return 0;
>  }
>  
> @@ -321,6 +323,46 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
>  		 pfdev->features.shader_present, pfdev->features.l2_present);
>  }
>  
> +void panfrost_cycle_counter_get(struct panfrost_device *pfdev)
> +{
> +	if (atomic_inc_not_zero(&pfdev->cycle_counter.use_count))
> +		return;
> +
> +	spin_lock(&pfdev->cycle_counter.lock);
> +	if (atomic_inc_return(&pfdev->cycle_counter.use_count) == 1)
> +		gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_START);
> +	spin_unlock(&pfdev->cycle_counter.lock);
> +}
> +
> +void panfrost_cycle_counter_put(struct panfrost_device *pfdev)
> +{
> +	if (atomic_add_unless(&pfdev->cycle_counter.use_count, -1, 1))
> +		return;
> +
> +	spin_lock(&pfdev->cycle_counter.lock);
> +	if (atomic_dec_return(&pfdev->cycle_counter.use_count) == 0)
> +		gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_STOP);
> +	spin_unlock(&pfdev->cycle_counter.lock);
> +}
> +
> +void panfrost_cycle_counter_stop(struct panfrost_device *pfdev)
> +{
> +	atomic_set(&pfdev->profile_mode, 0);
> +	gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_STOP);

Why do we need to issue a STOP here. Setting profile_mode to false
should be enough to prevent future jobs from enabling the
cycle-counter, and the counter will be naturally disabled when all
in-flight jobs that had profiling enabled are done.

Actually I'm not even sure I understand why this function exists.

> +}
> +
> +unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev)
> +{
> +	u32 hi, lo;
> +
> +	do {
> +		hi = gpu_read(pfdev, GPU_CYCLE_COUNT_HI);
> +		lo = gpu_read(pfdev, GPU_CYCLE_COUNT_LO);
> +	} while (hi != gpu_read(pfdev, GPU_CYCLE_COUNT_HI));
> +
> +	return ((u64)hi << 32) | lo;
> +}
> +
>  void panfrost_gpu_power_on(struct panfrost_device *pfdev)
>  {
>  	int ret;
> @@ -367,6 +409,7 @@ void panfrost_gpu_power_on(struct panfrost_device *pfdev)
>  
>  void panfrost_gpu_power_off(struct panfrost_device *pfdev)
>  {
> +	panfrost_cycle_counter_stop(pfdev);

So, you're setting profile_mode = 0 in the suspend path, but AFAICT,
it's not set back to the module param profile value on resume, which
means it's disabled on suspend and never re-enabled after that.

Besides, I don't really see a reason to change the pfdev->profile_mode
value in this path. If we suspend the device, that means we have no
jobs running, and the use_count refcount should have dropped to zero,
thus disabling cycle counting.

>  	gpu_write(pfdev, TILER_PWROFF_LO, 0);
>  	gpu_write(pfdev, SHADER_PWROFF_LO, 0);
>  	gpu_write(pfdev, L2_PWROFF_LO, 0);
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.h b/drivers/gpu/drm/panfrost/panfrost_gpu.h
> index 468c51e7e46d..4d62e8901c79 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gpu.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.h
> @@ -16,6 +16,12 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev);
>  void panfrost_gpu_power_on(struct panfrost_device *pfdev);
>  void panfrost_gpu_power_off(struct panfrost_device *pfdev);
>  
> +void panfrost_stop_cycle_counter(struct panfrost_device *pfdev);
> +void panfrost_cycle_counter_get(struct panfrost_device *pfdev);
> +void panfrost_cycle_counter_stop(struct panfrost_device *pfdev);
> +void panfrost_cycle_counter_put(struct panfrost_device *pfdev);
> +unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev);
> +
>  void panfrost_gpu_amlogic_quirk(struct panfrost_device *pfdev);
>  
>  #endif
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index 033f5e684707..8b1bf6ac48f8 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -297,6 +297,11 @@ int panfrost_job_push(struct panfrost_job *job)
>  
>  	kref_get(&job->refcount); /* put by scheduler job completion */
>  
> +	if (atomic_read(&pfdev->profile_mode)) {
> +		panfrost_cycle_counter_get(pfdev);
> +		job->is_profiled = true;
> +	}
> +
>  	drm_sched_entity_push_job(&job->base);
>  
>  	mutex_unlock(&pfdev->sched_lock);
> @@ -351,6 +356,9 @@ static void panfrost_job_free(struct drm_sched_job *sched_job)
>  
>  	drm_sched_job_cleanup(sched_job);
>  
> +	if (job->is_profiled)
> +		panfrost_cycle_counter_put(job->pfdev);
> +
>  	panfrost_job_put(job);
>  }
>  
> @@ -842,6 +850,8 @@ int panfrost_job_init(struct panfrost_device *pfdev)
>  		}
>  	}
>  
> +	spin_lock_init(&pfdev->cycle_counter.lock);
> +
>  	panfrost_job_enable_interrupts(pfdev);
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
> index 8becc1ba0eb9..2aa0add35459 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
> @@ -32,6 +32,7 @@ struct panfrost_job {
>  
>  	/* Fence to be signaled by drm-sched once its done with the job */
>  	struct dma_fence *render_done_fence;
> +	bool is_profiled;
>  };
>  
>  int panfrost_job_init(struct panfrost_device *pfdev);
Boris Brezillon Sept. 6, 2023, 7:57 a.m. UTC | #2
On Tue,  5 Sep 2023 19:45:18 +0100
Adrián Larumbe <adrian.larumbe@collabora.com> wrote:

> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index 033f5e684707..8b1bf6ac48f8 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -297,6 +297,11 @@ int panfrost_job_push(struct panfrost_job *job)
>  
>  	kref_get(&job->refcount); /* put by scheduler job completion */
>  
> +	if (atomic_read(&pfdev->profile_mode)) {
> +		panfrost_cycle_counter_get(pfdev);

This one should go in panfrost_job_hw_submit() IMO, otherwise you're
enabling the cycle-counter before the job has its dependencies met, and
depending on what the job depends on, it might take some time.

> +		job->is_profiled = true;
> +	}
> +
>  	drm_sched_entity_push_job(&job->base);
>  
>  	mutex_unlock(&pfdev->sched_lock);
> @@ -351,6 +356,9 @@ static void panfrost_job_free(struct drm_sched_job *sched_job)
>  
>  	drm_sched_job_cleanup(sched_job);
>  
> +	if (job->is_profiled)
> +		panfrost_cycle_counter_put(job->pfdev);

I think I'd move this panfrost_cycle_counter_put() to
panfrost_job_handle_{err,done}(), to release the counter as soon as
we're done executing the job. We also need to make sure we release
cycle counter refs in the reset path (here [1]), to keep get/put
balanced when jobs are resubmitted.

> +
>  	panfrost_job_put(job);
>  }

[1]https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/panfrost/panfrost_job.c#L666
Adrián Larumbe Sept. 9, 2023, 3:28 p.m. UTC | #3
On 06.09.2023 09:57, Boris Brezillon wrote:
>On Tue,  5 Sep 2023 19:45:18 +0100
>Adrián Larumbe <adrian.larumbe@collabora.com> wrote:
>
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
>> index 033f5e684707..8b1bf6ac48f8 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
>> @@ -297,6 +297,11 @@ int panfrost_job_push(struct panfrost_job *job)
>>  
>>  	kref_get(&job->refcount); /* put by scheduler job completion */
>>  
>> +	if (atomic_read(&pfdev->profile_mode)) {
>> +		panfrost_cycle_counter_get(pfdev);
>
>This one should go in panfrost_job_hw_submit() IMO, otherwise you're
>enabling the cycle-counter before the job has its dependencies met, and
>depending on what the job depends on, it might take some time.

I think at first I decided against this because of a previous thread about
enabling the cycle counters:
https://www.mail-archive.com/dri-devel@lists.freedesktop.org/msg352508.html

But it turns out the fix suggested by Steven Price was what I was doing already
in the previous patch revision, namely tagging the job with whether it
had taken the cycle counter refcnt, so I guess it should be fine.

>> +		job->is_profiled = true;
>> +	}
>> +
>>  	drm_sched_entity_push_job(&job->base);
>>  
>>  	mutex_unlock(&pfdev->sched_lock);
>> @@ -351,6 +356,9 @@ static void panfrost_job_free(struct drm_sched_job *sched_job)
>>  
>>  	drm_sched_job_cleanup(sched_job);
>>  
>> +	if (job->is_profiled)
>> +		panfrost_cycle_counter_put(job->pfdev);
>
>I think I'd move this panfrost_cycle_counter_put() to
>panfrost_job_handle_{err,done}(), to release the counter as soon as
>we're done executing the job. We also need to make sure we release
>cycle counter refs in the reset path (here [1]), to keep get/put
>balanced when jobs are resubmitted.
>
>> +
>>  	panfrost_job_put(job);
>>  }
>
>[1]https://elixir.bootlin.com/linux/latest/source/drivers/gpu/drm/panfrost/panfrost_job.c#L666
Adrián Larumbe Sept. 9, 2023, 3:55 p.m. UTC | #4
On 06.09.2023 09:21, Boris Brezillon wrote:
>On Tue,  5 Sep 2023 19:45:18 +0100
>Adrián Larumbe <adrian.larumbe@collabora.com> wrote:
>
>> In a future development, we will want to keep track of the number of GPU
>> cycles spent on a given job. That means we should enable it only when the
>> GPU has work to do, and switch it off whenever it is idle to avoid power
>> waste.
>> 
>> To avoid race conditions during enablement/disabling, a reference counting
>> mechanism was introduced, and a job flag that tells us whether a given job
>> increased the refcount. This is necessary, because a future development
>> will let user space toggle cycle counting through a debugfs file, and a
>> given job might have been in flight by the time cycle counting was
>> disabled.
>> 
>> Toggling of GPU cycle counting has to be done through a module parameter.
>> 
>> Signed-off-by: Adrián Larumbe <adrian.larumbe@collabora.com>
>> ---
>>  drivers/gpu/drm/panfrost/panfrost_device.c |  5 +++
>>  drivers/gpu/drm/panfrost/panfrost_device.h |  6 +++
>>  drivers/gpu/drm/panfrost/panfrost_gpu.c    | 43 ++++++++++++++++++++++
>>  drivers/gpu/drm/panfrost/panfrost_gpu.h    |  6 +++
>>  drivers/gpu/drm/panfrost/panfrost_job.c    | 10 +++++
>>  drivers/gpu/drm/panfrost/panfrost_job.h    |  1 +
>>  6 files changed, 71 insertions(+)
>> 
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
>> index fa1a086a862b..1ea2ac3804f0 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_device.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
>> @@ -18,6 +18,9 @@
>>  #include "panfrost_mmu.h"
>>  #include "panfrost_perfcnt.h"
>>  
>> +static bool profile;
>> +module_param(profile, bool, 0600);
>
>Not sure if we should make that a module parameter. Might be better
>exposed as a debugfs knob attached to the device (even if having
>multiple Mali devices is rather unlikely, I think I'd prefer to make
>this toggle per-device).
>
>> +
>>  static int panfrost_reset_init(struct panfrost_device *pfdev)
>>  {
>>  	pfdev->rstc = devm_reset_control_array_get_optional_exclusive(pfdev->dev);
>> @@ -207,6 +210,8 @@ int panfrost_device_init(struct panfrost_device *pfdev)
>>  
>>  	spin_lock_init(&pfdev->as_lock);
>>  
>> +	atomic_set(&pfdev->profile_mode, profile);
>
>So, profile_mode can only be set at probe time, meaning any changes to
>the profile module param is not taken into account after that point.

Not in this patch in the series, that's why I thought of enabling debugfs
toggling in a later patch.  But I suppose it's best to coalesce them into a
single one and do away with the module param altogether.

>> +
>>  	err = panfrost_clk_init(pfdev);
>>  	if (err) {
>>  		dev_err(pfdev->dev, "clk init failed %d\n", err);
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
>> index b0126b9fbadc..5c09c9f3ae08 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
>> @@ -107,6 +107,7 @@ struct panfrost_device {
>>  	struct list_head scheduled_jobs;
>>  
>>  	struct panfrost_perfcnt *perfcnt;
>> +	atomic_t profile_mode;
>>  
>>  	struct mutex sched_lock;
>>  
>> @@ -121,6 +122,11 @@ struct panfrost_device {
>>  	struct shrinker shrinker;
>>  
>>  	struct panfrost_devfreq pfdevfreq;
>> +
>> +	struct {
>> +		atomic_t use_count;
>> +		spinlock_t lock;
>> +	} cycle_counter;
>>  };
>>  
>>  struct panfrost_mmu {
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
>> index 2faa344d89ee..fddbc72bf093 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
>> @@ -73,6 +73,8 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev)
>>  	gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL);
>>  	gpu_write(pfdev, GPU_INT_MASK, GPU_IRQ_MASK_ALL);
>>  
>> +	atomic_set(&pfdev->cycle_counter.use_count, 0);
>
>I think I'd prefer if the jobs that were in-flight at the time a GPU
>hang occurred explicitly release their reference on use_count. So maybe
>something like
>
>	/* When we reset the GPU we should have no cycle-counter users
>	 * left.
>	 */
>	if (drm_WARN_ON(cycle_counter.use_count != 0))
>		atomic_set(&pfdev->cycle_counter.use_count, 0);
>
>to catch unbalanced get/put situations.
>
>> +
>>  	return 0;
>>  }
>>  
>> @@ -321,6 +323,46 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
>>  		 pfdev->features.shader_present, pfdev->features.l2_present);
>>  }
>>  
>> +void panfrost_cycle_counter_get(struct panfrost_device *pfdev)
>> +{
>> +	if (atomic_inc_not_zero(&pfdev->cycle_counter.use_count))
>> +		return;
>> +
>> +	spin_lock(&pfdev->cycle_counter.lock);
>> +	if (atomic_inc_return(&pfdev->cycle_counter.use_count) == 1)
>> +		gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_START);
>> +	spin_unlock(&pfdev->cycle_counter.lock);
>> +}
>> +
>> +void panfrost_cycle_counter_put(struct panfrost_device *pfdev)
>> +{
>> +	if (atomic_add_unless(&pfdev->cycle_counter.use_count, -1, 1))
>> +		return;
>> +
>> +	spin_lock(&pfdev->cycle_counter.lock);
>> +	if (atomic_dec_return(&pfdev->cycle_counter.use_count) == 0)
>> +		gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_STOP);
>> +	spin_unlock(&pfdev->cycle_counter.lock);
>> +}
>> +
>> +void panfrost_cycle_counter_stop(struct panfrost_device *pfdev)
>> +{
>> +	atomic_set(&pfdev->profile_mode, 0);
>> +	gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_STOP);
>
>Why do we need to issue a STOP here. Setting profile_mode to false
>should be enough to prevent future jobs from enabling the
>cycle-counter, and the counter will be naturally disabled when all
>in-flight jobs that had profiling enabled are done.
>
>Actually I'm not even sure I understand why this function exists.

I thought it might be good to stop the cycle counter at once even if there
were still in-flight jobs, but now that you mention this perhaps it would
run the risk of disabling it even in the case of a broken refcount.

>> +}
>> +
>> +unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev)
>> +{
>> +	U32 hi, lo;
>> +
>> +	do {
>> +		hi = gpu_read(pfdev, GPU_CYCLE_COUNT_HI);
>> +		lo = gpu_read(pfdev, GPU_CYCLE_COUNT_LO);
>> +	} while (hi != gpu_read(pfdev, GPU_CYCLE_COUNT_HI));
>> +
>> +	return ((u64)hi << 32) | lo;
>> +}
>> +
>>  void panfrost_gpu_power_on(struct panfrost_device *pfdev)
>>  {
>>  	int ret;
>> @@ -367,6 +409,7 @@ void panfrost_gpu_power_on(struct panfrost_device *pfdev)
>>  
>>  void panfrost_gpu_power_off(struct panfrost_device *pfdev)
>>  {
>> +	panfrost_cycle_counter_stop(pfdev);
>
>So, you're setting profile_mode = 0 in the suspend path, but AFAICT,
>it's not set back to the module param profile value on resume, which
>means it's disabled on suspend and never re-enabled after that.

Yep, this is wrong, I missed this path altogether.

>Besides, I don't really see a reason to change the pfdev->profile_mode
>value in this path. If we suspend the device, that means we have no
>jobs running, and the use_count refcount should have dropped to zero,
>thus disabling cycle counting.
>
>>  	gpu_write(pfdev, TILER_PWROFF_LO, 0);
>>  	gpu_write(pfdev, SHADER_PWROFF_LO, 0);
>>  	gpu_write(pfdev, L2_PWROFF_LO, 0);
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.h b/drivers/gpu/drm/panfrost/panfrost_gpu.h
>> index 468c51e7e46d..4d62e8901c79 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_gpu.h
>> +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.h
>> @@ -16,6 +16,12 @@ int panfrost_gpu_soft_reset(struct panfrost_device *pfdev);
>>  void panfrost_gpu_power_on(struct panfrost_device *pfdev);
>>  void panfrost_gpu_power_off(struct panfrost_device *pfdev);
>>  
>> +void panfrost_stop_cycle_counter(struct panfrost_device *pfdev);
>> +void panfrost_cycle_counter_get(struct panfrost_device *pfdev);
>> +void panfrost_cycle_counter_stop(struct panfrost_device *pfdev);
>> +void panfrost_cycle_counter_put(struct panfrost_device *pfdev);
>> +unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev);
>> +
>>  void panfrost_gpu_amlogic_quirk(struct panfrost_device *pfdev);
>>  
>>  #endif
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
>> index 033f5e684707..8b1bf6ac48f8 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
>> @@ -297,6 +297,11 @@ int panfrost_job_push(struct panfrost_job *job)
>>  
>>  	kref_get(&job->refcount); /* put by scheduler job completion */
>>  
>> +	if (atomic_read(&pfdev->profile_mode)) {
>> +		panfrost_cycle_counter_get(pfdev);
>> +		job->is_profiled = true;
>> +	}
>> +
>>  	drm_sched_entity_push_job(&job->base);
>>  
>>  	mutex_unlock(&pfdev->sched_lock);
>> @@ -351,6 +356,9 @@ static void panfrost_job_free(struct drm_sched_job *sched_job)
>>  
>>  	drm_sched_job_cleanup(sched_job);
>>  
>> +	if (job->is_profiled)
>> +		panfrost_cycle_counter_put(job->pfdev);
>> +
>>  	panfrost_job_put(job);
>>  }
>>  
>> @@ -842,6 +850,8 @@ int panfrost_job_init(struct panfrost_device *pfdev)
>>  		}
>>  	}
>>  
>> +	spin_lock_init(&pfdev->cycle_counter.lock);
>> +
>>  	panfrost_job_enable_interrupts(pfdev);
>>  
>>  	return 0;
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
>> index 8becc1ba0eb9..2aa0add35459 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_job.h
>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
>> @@ -32,6 +32,7 @@ struct panfrost_job {
>>  
>>  	/* Fence to be signaled by drm-sched once its done with the job */
>>  	struct dma_fence *render_done_fence;
>> +	bool is_profiled;
>>  };
>>  
>>  int panfrost_job_init(struct panfrost_device *pfdev);
diff mbox series

Patch

diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index fa1a086a862b..1ea2ac3804f0 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -18,6 +18,9 @@ 
 #include "panfrost_mmu.h"
 #include "panfrost_perfcnt.h"
 
+static bool profile;
+module_param(profile, bool, 0600);
+
 static int panfrost_reset_init(struct panfrost_device *pfdev)
 {
 	pfdev->rstc = devm_reset_control_array_get_optional_exclusive(pfdev->dev);
@@ -207,6 +210,8 @@  int panfrost_device_init(struct panfrost_device *pfdev)
 
 	spin_lock_init(&pfdev->as_lock);
 
+	atomic_set(&pfdev->profile_mode, profile);
+
 	err = panfrost_clk_init(pfdev);
 	if (err) {
 		dev_err(pfdev->dev, "clk init failed %d\n", err);
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index b0126b9fbadc..5c09c9f3ae08 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -107,6 +107,7 @@  struct panfrost_device {
 	struct list_head scheduled_jobs;
 
 	struct panfrost_perfcnt *perfcnt;
+	atomic_t profile_mode;
 
 	struct mutex sched_lock;
 
@@ -121,6 +122,11 @@  struct panfrost_device {
 	struct shrinker shrinker;
 
 	struct panfrost_devfreq pfdevfreq;
+
+	struct {
+		atomic_t use_count;
+		spinlock_t lock;
+	} cycle_counter;
 };
 
 struct panfrost_mmu {
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index 2faa344d89ee..fddbc72bf093 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -73,6 +73,8 @@  int panfrost_gpu_soft_reset(struct panfrost_device *pfdev)
 	gpu_write(pfdev, GPU_INT_CLEAR, GPU_IRQ_MASK_ALL);
 	gpu_write(pfdev, GPU_INT_MASK, GPU_IRQ_MASK_ALL);
 
+	atomic_set(&pfdev->cycle_counter.use_count, 0);
+
 	return 0;
 }
 
@@ -321,6 +323,46 @@  static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
 		 pfdev->features.shader_present, pfdev->features.l2_present);
 }
 
+void panfrost_cycle_counter_get(struct panfrost_device *pfdev)
+{
+	if (atomic_inc_not_zero(&pfdev->cycle_counter.use_count))
+		return;
+
+	spin_lock(&pfdev->cycle_counter.lock);
+	if (atomic_inc_return(&pfdev->cycle_counter.use_count) == 1)
+		gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_START);
+	spin_unlock(&pfdev->cycle_counter.lock);
+}
+
+void panfrost_cycle_counter_put(struct panfrost_device *pfdev)
+{
+	if (atomic_add_unless(&pfdev->cycle_counter.use_count, -1, 1))
+		return;
+
+	spin_lock(&pfdev->cycle_counter.lock);
+	if (atomic_dec_return(&pfdev->cycle_counter.use_count) == 0)
+		gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_STOP);
+	spin_unlock(&pfdev->cycle_counter.lock);
+}
+
+void panfrost_cycle_counter_stop(struct panfrost_device *pfdev)
+{
+	atomic_set(&pfdev->profile_mode, 0);
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_CYCLE_COUNT_STOP);
+}
+
+unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev)
+{
+	u32 hi, lo;
+
+	do {
+		hi = gpu_read(pfdev, GPU_CYCLE_COUNT_HI);
+		lo = gpu_read(pfdev, GPU_CYCLE_COUNT_LO);
+	} while (hi != gpu_read(pfdev, GPU_CYCLE_COUNT_HI));
+
+	return ((u64)hi << 32) | lo;
+}
+
 void panfrost_gpu_power_on(struct panfrost_device *pfdev)
 {
 	int ret;
@@ -367,6 +409,7 @@  void panfrost_gpu_power_on(struct panfrost_device *pfdev)
 
 void panfrost_gpu_power_off(struct panfrost_device *pfdev)
 {
+	panfrost_cycle_counter_stop(pfdev);
 	gpu_write(pfdev, TILER_PWROFF_LO, 0);
 	gpu_write(pfdev, SHADER_PWROFF_LO, 0);
 	gpu_write(pfdev, L2_PWROFF_LO, 0);
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.h b/drivers/gpu/drm/panfrost/panfrost_gpu.h
index 468c51e7e46d..4d62e8901c79 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.h
@@ -16,6 +16,12 @@  int panfrost_gpu_soft_reset(struct panfrost_device *pfdev);
 void panfrost_gpu_power_on(struct panfrost_device *pfdev);
 void panfrost_gpu_power_off(struct panfrost_device *pfdev);
 
+void panfrost_stop_cycle_counter(struct panfrost_device *pfdev);
+void panfrost_cycle_counter_get(struct panfrost_device *pfdev);
+void panfrost_cycle_counter_stop(struct panfrost_device *pfdev);
+void panfrost_cycle_counter_put(struct panfrost_device *pfdev);
+unsigned long long panfrost_cycle_counter_read(struct panfrost_device *pfdev);
+
 void panfrost_gpu_amlogic_quirk(struct panfrost_device *pfdev);
 
 #endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 033f5e684707..8b1bf6ac48f8 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -297,6 +297,11 @@  int panfrost_job_push(struct panfrost_job *job)
 
 	kref_get(&job->refcount); /* put by scheduler job completion */
 
+	if (atomic_read(&pfdev->profile_mode)) {
+		panfrost_cycle_counter_get(pfdev);
+		job->is_profiled = true;
+	}
+
 	drm_sched_entity_push_job(&job->base);
 
 	mutex_unlock(&pfdev->sched_lock);
@@ -351,6 +356,9 @@  static void panfrost_job_free(struct drm_sched_job *sched_job)
 
 	drm_sched_job_cleanup(sched_job);
 
+	if (job->is_profiled)
+		panfrost_cycle_counter_put(job->pfdev);
+
 	panfrost_job_put(job);
 }
 
@@ -842,6 +850,8 @@  int panfrost_job_init(struct panfrost_device *pfdev)
 		}
 	}
 
+	spin_lock_init(&pfdev->cycle_counter.lock);
+
 	panfrost_job_enable_interrupts(pfdev);
 
 	return 0;
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
index 8becc1ba0eb9..2aa0add35459 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.h
+++ b/drivers/gpu/drm/panfrost/panfrost_job.h
@@ -32,6 +32,7 @@  struct panfrost_job {
 
 	/* Fence to be signaled by drm-sched once its done with the job */
 	struct dma_fence *render_done_fence;
+	bool is_profiled;
 };
 
 int panfrost_job_init(struct panfrost_device *pfdev);