diff mbox series

[3/3] drm/panfrost: Handle JD_REQ_CYCLE_COUNT

Message ID 20240807160900.149154-4-mary.guillemard@collabora.com (mailing list archive)
State New, archived
Headers show
Series drm/panfrost: Wire cycle counters and timestamp info to userspace | expand

Commit Message

Mary Guillemard Aug. 7, 2024, 4:08 p.m. UTC
If a job requires cycle counters or system timestamps propagation, we
must enable cycle counting before issuing a job and disable it right
after the job completes.

Since this extends the uAPI and because userland needs a way to advertise
features like VK_KHR_shader_clock conditionally, we bumps the driver
minor version.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
---
 drivers/gpu/drm/panfrost/panfrost_drv.c |  8 ++++++--
 drivers/gpu/drm/panfrost/panfrost_job.c | 10 ++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

Comments

Steven Price Aug. 8, 2024, 10:23 a.m. UTC | #1
On 07/08/2024 17:08, Mary Guillemard wrote:
> If a job requires cycle counters or system timestamps propagation, we
> must enable cycle counting before issuing a job and disable it right
> after the job completes.
> 
> Since this extends the uAPI and because userland needs a way to advertise
> features like VK_KHR_shader_clock conditionally, we bumps the driver
> minor version.
> 
> Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
> ---
>  drivers/gpu/drm/panfrost/panfrost_drv.c |  8 ++++++--
>  drivers/gpu/drm/panfrost/panfrost_job.c | 10 ++++++++++
>  2 files changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index d94c9bf5a7f9..fe983defdfdf 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -21,6 +21,8 @@
>  #include "panfrost_gpu.h"
>  #include "panfrost_perfcnt.h"
>  
> +#define JOB_REQUIREMENTS (PANFROST_JD_REQ_FS | PANFROST_JD_REQ_CYCLE_COUNT)
> +
>  static bool unstable_ioctls;
>  module_param_unsafe(unstable_ioctls, bool, 0600);
>  
> @@ -262,7 +264,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
>  	if (!args->jc)
>  		return -EINVAL;
>  
> -	if (args->requirements && args->requirements != PANFROST_JD_REQ_FS)
> +	if (args->requirements && args->requirements & ~JOB_REQUIREMENTS)

I think this can be simplified to just:
if (args->requirements & ~JOB_REQUIREMENTS)

>  		return -EINVAL;
>  
>  	if (args->out_sync > 0) {
> @@ -601,6 +603,8 @@ static const struct file_operations panfrost_drm_driver_fops = {
>   * - 1.0 - initial interface
>   * - 1.1 - adds HEAP and NOEXEC flags for CREATE_BO
>   * - 1.2 - adds AFBC_FEATURES query
> + * - 1.3 - adds JD_REQ_CYCLE_COUNT job requirement for SUBMIT
> + *       - adds SYSTEM_TIMESTAMP and SYSTEM_TIMESTAMP_FREQUENCY queries

Obviously needs updating if the first patch is dropped.

>   */
>  static const struct drm_driver panfrost_drm_driver = {
>  	.driver_features	= DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
> @@ -614,7 +618,7 @@ static const struct drm_driver panfrost_drm_driver = {
>  	.desc			= "panfrost DRM",
>  	.date			= "20180908",
>  	.major			= 1,
> -	.minor			= 2,
> +	.minor			= 3,
>  
>  	.gem_create_object	= panfrost_gem_create_object,
>  	.gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table,
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index df49d37d0e7e..d8c215c0c672 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -159,6 +159,9 @@ panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
>  	struct panfrost_job *job = pfdev->jobs[slot][0];
>  
>  	WARN_ON(!job);
> +	if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT)
> +		panfrost_cycle_counter_put(pfdev);
> +
>  	if (job->is_profiled) {
>  		if (job->engine_usage) {
>  			job->engine_usage->elapsed_ns[slot] +=
> @@ -219,6 +222,9 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
>  
>  	panfrost_job_write_affinity(pfdev, job->requirements, js);
>  
> +	if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT)
> +		panfrost_cycle_counter_get(pfdev);
> +
>  	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
>  	 * start */
>  	cfg |= JS_CONFIG_THREAD_PRI(8) |
> @@ -693,8 +699,12 @@ panfrost_reset(struct panfrost_device *pfdev,
>  	spin_lock(&pfdev->js->job_lock);
>  	for (i = 0; i < NUM_JOB_SLOTS; i++) {
>  		for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) {
> +			if (pfdev->jobs[i][j]->requirements & PANFROST_JD_REQ_CYCLE_COUNT)
> +				panfrost_cycle_counter_put(pfdev);
> +
>  			if (pfdev->jobs[i][j]->is_profiled)
>  				panfrost_cycle_counter_put(pfdev->jobs[i][j]->pfdev);
> +
>  			pm_runtime_put_noidle(pfdev->dev);
>  			panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
>  		}

This looks correct, but it would be nice to combine the is_profiled and
REQ_CYCLE_COUNT paths. We end up with the odd situation of taking two
reference counts (per job) if global profiling is enabled and the flag
is specified.

To be honest it looks as if there could be a bit of a cleanup by
changing panfrost_reset() to use panfrost_dequeue_job(). I'm not sure
why it was written that way.

Steve
diff mbox series

Patch

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index d94c9bf5a7f9..fe983defdfdf 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -21,6 +21,8 @@ 
 #include "panfrost_gpu.h"
 #include "panfrost_perfcnt.h"
 
+#define JOB_REQUIREMENTS (PANFROST_JD_REQ_FS | PANFROST_JD_REQ_CYCLE_COUNT)
+
 static bool unstable_ioctls;
 module_param_unsafe(unstable_ioctls, bool, 0600);
 
@@ -262,7 +264,7 @@  static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
 	if (!args->jc)
 		return -EINVAL;
 
-	if (args->requirements && args->requirements != PANFROST_JD_REQ_FS)
+	if (args->requirements && args->requirements & ~JOB_REQUIREMENTS)
 		return -EINVAL;
 
 	if (args->out_sync > 0) {
@@ -601,6 +603,8 @@  static const struct file_operations panfrost_drm_driver_fops = {
  * - 1.0 - initial interface
  * - 1.1 - adds HEAP and NOEXEC flags for CREATE_BO
  * - 1.2 - adds AFBC_FEATURES query
+ * - 1.3 - adds JD_REQ_CYCLE_COUNT job requirement for SUBMIT
+ *       - adds SYSTEM_TIMESTAMP and SYSTEM_TIMESTAMP_FREQUENCY queries
  */
 static const struct drm_driver panfrost_drm_driver = {
 	.driver_features	= DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ,
@@ -614,7 +618,7 @@  static const struct drm_driver panfrost_drm_driver = {
 	.desc			= "panfrost DRM",
 	.date			= "20180908",
 	.major			= 1,
-	.minor			= 2,
+	.minor			= 3,
 
 	.gem_create_object	= panfrost_gem_create_object,
 	.gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table,
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index df49d37d0e7e..d8c215c0c672 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -159,6 +159,9 @@  panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
 	struct panfrost_job *job = pfdev->jobs[slot][0];
 
 	WARN_ON(!job);
+	if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT)
+		panfrost_cycle_counter_put(pfdev);
+
 	if (job->is_profiled) {
 		if (job->engine_usage) {
 			job->engine_usage->elapsed_ns[slot] +=
@@ -219,6 +222,9 @@  static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
 
 	panfrost_job_write_affinity(pfdev, job->requirements, js);
 
+	if (job->requirements & PANFROST_JD_REQ_CYCLE_COUNT)
+		panfrost_cycle_counter_get(pfdev);
+
 	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
 	 * start */
 	cfg |= JS_CONFIG_THREAD_PRI(8) |
@@ -693,8 +699,12 @@  panfrost_reset(struct panfrost_device *pfdev,
 	spin_lock(&pfdev->js->job_lock);
 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
 		for (j = 0; j < ARRAY_SIZE(pfdev->jobs[0]) && pfdev->jobs[i][j]; j++) {
+			if (pfdev->jobs[i][j]->requirements & PANFROST_JD_REQ_CYCLE_COUNT)
+				panfrost_cycle_counter_put(pfdev);
+
 			if (pfdev->jobs[i][j]->is_profiled)
 				panfrost_cycle_counter_put(pfdev->jobs[i][j]->pfdev);
+
 			pm_runtime_put_noidle(pfdev->dev);
 			panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
 		}