diff mbox series

drm/v3d: Add DRM_IOCTL_V3D_PERFMON_SET_GLOBAL

Message ID 20241020204156.113853-1-christian.gmeiner@gmail.com (mailing list archive)
State New
Headers show
Series drm/v3d: Add DRM_IOCTL_V3D_PERFMON_SET_GLOBAL | expand

Commit Message

Christian Gmeiner Oct. 20, 2024, 8:41 p.m. UTC
From: Christian Gmeiner <cgmeiner@igalia.com>

This patch adds a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, which
allows the configuration of a global performance monitor (perfmon).
The global perfmon is used for all jobs, ensuring consistent performance
tracking across submissions.

Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
---
 drivers/gpu/drm/v3d/v3d_drv.c                 |  3 ++
 drivers/gpu/drm/v3d/v3d_drv.h                 | 10 ++++
 drivers/gpu/drm/v3d/v3d_perfmon.c             | 49 +++++++++++++++++++
 .../gpu/drm/v3d/v3d_performance_counters.h    |  6 +++
 drivers/gpu/drm/v3d/v3d_sched.c               | 10 +++-
 include/uapi/drm/v3d_drm.h                    | 15 ++++++
 6 files changed, 91 insertions(+), 2 deletions(-)

Comments

André Almeida Oct. 21, 2024, 8:42 p.m. UTC | #1
Hi Christian!

Em 20/10/2024 17:41, Christian Gmeiner escreveu:
> From: Christian Gmeiner <cgmeiner@igalia.com>
> 
> This patch adds a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, which
> allows the configuration of a global performance monitor (perfmon).
> The global perfmon is used for all jobs, ensuring consistent performance
> tracking across submissions.

Usually we write in the imperative form:

Add a new ioctl, ...

> 
> Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
> ---
>   drivers/gpu/drm/v3d/v3d_drv.c                 |  3 ++
>   drivers/gpu/drm/v3d/v3d_drv.h                 | 10 ++++
>   drivers/gpu/drm/v3d/v3d_perfmon.c             | 49 +++++++++++++++++++
>   .../gpu/drm/v3d/v3d_performance_counters.h    |  6 +++
>   drivers/gpu/drm/v3d/v3d_sched.c               | 10 +++-
>   include/uapi/drm/v3d_drm.h                    | 15 ++++++
>   6 files changed, 91 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
> index d7ff1f5fa481..f1753ee2af25 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.c
> +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> @@ -214,6 +214,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
>   	DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
>   	DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW),
>   };
>   
>   static const struct drm_driver v3d_drm_driver = {
> @@ -350,6 +351,8 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
>   	if (ret)
>   		goto drm_unregister;
>   
> +	atomic_set(&v3d->num_perfmon, 0);
> +
>   	return 0;
>   
>   drm_unregister:
> diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
> index cf4b23369dc4..9491d730d99f 100644
> --- a/drivers/gpu/drm/v3d/v3d_drv.h
> +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> @@ -61,6 +61,8 @@ struct v3d_queue_state {
>   	struct v3d_stats stats;
>   };
>   
> +struct v3d_dev;
> +

Forward declarations go in the beginning of the file, along with the 
other ones:

struct clk;
struct platform_device;
struct reset_control;
+struct v3d_dev;

>   /* Performance monitor object. The perform lifetime is controlled by userspace
>    * using perfmon related ioctls. A perfmon can be attached to a submit_cl
>    * request, and when this is the case, HW perf counters will be activated just
> @@ -68,6 +70,9 @@ struct v3d_queue_state {
>    * done. This way, only events related to a specific job will be counted.
>    */
>   struct v3d_perfmon {
> +	/* Pointer back to v3d instance this perfmon belongs. */
> +	struct v3d_dev *v3d;
> +
>   	/* Tracks the number of users of the perfmon, when this counter reaches
>   	 * zero the perfmon is destroyed.
>   	 */
> @@ -179,6 +184,9 @@ struct v3d_dev {
>   		u32 num_allocated;
>   		u32 pages_allocated;
>   	} bo_stats;
> +
> +	/* Keep track of current number of allocated perfmons. */
> +	atomic_t num_perfmon;
>   };
>   
>   static inline struct v3d_dev *
> @@ -584,6 +592,8 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
>   				 struct drm_file *file_priv);
>   int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data,
>   				  struct drm_file *file_priv);
> +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data,
> +				 struct drm_file *file_priv);
>   
>   /* v3d_sysfs.c */
>   int v3d_sysfs_init(struct device *dev);

[...]

> diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
> index 87fc5bb0a61e..960d392d75a3 100644
> --- a/include/uapi/drm/v3d_drm.h
> +++ b/include/uapi/drm/v3d_drm.h
> @@ -43,6 +43,7 @@ extern "C" {
>   #define DRM_V3D_PERFMON_GET_VALUES                0x0a
>   #define DRM_V3D_SUBMIT_CPU                        0x0b
>   #define DRM_V3D_PERFMON_GET_COUNTER               0x0c
> +#define DRM_V3D_PERFMON_SET_GLOBAL                0x0d
>   
>   #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
>   #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
> @@ -61,6 +62,8 @@ extern "C" {
>   #define DRM_IOCTL_V3D_SUBMIT_CPU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu)
>   #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \
>   						   struct drm_v3d_perfmon_get_counter)
> +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL  DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \
> +						   struct drm_v3d_perfmon_set_global)
>   
>   #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE             0x01
>   #define DRM_V3D_SUBMIT_EXTENSION		  0x02
> @@ -765,6 +768,18 @@ struct drm_v3d_perfmon_get_counter {
>   	__u8 reserved[7];
>   };
>   
> +/**

Using /** means that you are writting a kernel-doc comment [1], so make 
sure to describe each struct member, otherwise it's going to generate 
build warnings with W=1.

> + * struct drm_v3d_perfmon_set_global - ioctl to define a
> + * global performance counter that is used if a job has
> + * not assigned one on its own.
> + */
> +
> +#define DRM_V3D_PERFMON_CLEAR_GLOBAL    0x0001

I would keep this define above the struct comment.

> +struct drm_v3d_perfmon_set_global {
> +	__u32 flags;
> +	__u32 id;
> +};
> +
>   #if defined(__cplusplus)
>   }
>   #endif

[1] https://www.kernel.org/doc/html/latest/doc-guide/kernel-doc.html
Maíra Canal Oct. 22, 2024, 8:12 p.m. UTC | #2
On 10/20/24 17:41, Christian Gmeiner wrote:
> From: Christian Gmeiner <cgmeiner@igalia.com>
> 
> This patch adds a new ioctl, DRM_IOCTL_V3D_PERFMON_SET_GLOBAL, which
> allows the configuration of a global performance monitor (perfmon).
> The global perfmon is used for all jobs, ensuring consistent performance
> tracking across submissions.
> 
> Signed-off-by: Christian Gmeiner <cgmeiner@igalia.com>
> ---
>   drivers/gpu/drm/v3d/v3d_drv.c                 |  3 ++
>   drivers/gpu/drm/v3d/v3d_drv.h                 | 10 ++++
>   drivers/gpu/drm/v3d/v3d_perfmon.c             | 49 +++++++++++++++++++
>   .../gpu/drm/v3d/v3d_performance_counters.h    |  6 +++
>   drivers/gpu/drm/v3d/v3d_sched.c               | 10 +++-
>   include/uapi/drm/v3d_drm.h                    | 15 ++++++
>   6 files changed, 91 insertions(+), 2 deletions(-)
> 

Hi Christian,

I have one major issue with this approach: I don't think we should
introduce a `global_perfmon` in `struct v3d_perfmon_info`. `struct
v3d_perfmon_info` was created to store information about the counters,
such as total number of perfcnts supported and the description of the
counters.

I believe you should use `active_perfmon` in your implementation and
don't create `global_perfmon`. This is going to make the code less
tricky to understand and it's going to make sure that the hardware inner
working is transparent in software.

Only one perfmon can be active in a given moment of time, therefore,
let's use `active_perfmon` to represent it.

I couple more things came to my attention. First, I don't think we need
to limit the creation of other perfmons. We can create perfmons and
don't use it for a while. We only need to make sure that the user can't
attach perfmons to jobs, when the global perfmon is enabled.

For sure, if we go through this strategy, there is no need to have a
count of all the perfmons that V3D has.

I would prefer to treat the global perfmon as a state. Ideally, we would
enable and disable this state through the IOCTL.

One last thing is: don't forget to stop the perfmons when you don't use
it anymore :)

Best Regards,
- Maíra
diff mbox series

Patch

diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index d7ff1f5fa481..f1753ee2af25 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -214,6 +214,7 @@  static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW),
 };
 
 static const struct drm_driver v3d_drm_driver = {
@@ -350,6 +351,8 @@  static int v3d_platform_drm_probe(struct platform_device *pdev)
 	if (ret)
 		goto drm_unregister;
 
+	atomic_set(&v3d->num_perfmon, 0);
+
 	return 0;
 
 drm_unregister:
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index cf4b23369dc4..9491d730d99f 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -61,6 +61,8 @@  struct v3d_queue_state {
 	struct v3d_stats stats;
 };
 
+struct v3d_dev;
+
 /* Performance monitor object. The perform lifetime is controlled by userspace
  * using perfmon related ioctls. A perfmon can be attached to a submit_cl
  * request, and when this is the case, HW perf counters will be activated just
@@ -68,6 +70,9 @@  struct v3d_queue_state {
  * done. This way, only events related to a specific job will be counted.
  */
 struct v3d_perfmon {
+	/* Pointer back to v3d instance this perfmon belongs. */
+	struct v3d_dev *v3d;
+
 	/* Tracks the number of users of the perfmon, when this counter reaches
 	 * zero the perfmon is destroyed.
 	 */
@@ -179,6 +184,9 @@  struct v3d_dev {
 		u32 num_allocated;
 		u32 pages_allocated;
 	} bo_stats;
+
+	/* Keep track of current number of allocated perfmons. */
+	atomic_t num_perfmon;
 };
 
 static inline struct v3d_dev *
@@ -584,6 +592,8 @@  int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
 				 struct drm_file *file_priv);
 int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file_priv);
+int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv);
 
 /* v3d_sysfs.c */
 int v3d_sysfs_init(struct device *dev);
diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c
index 156be13ab2ef..15ccdbf2c4d6 100644
--- a/drivers/gpu/drm/v3d/v3d_perfmon.c
+++ b/drivers/gpu/drm/v3d/v3d_perfmon.c
@@ -221,6 +221,7 @@  void v3d_perfmon_get(struct v3d_perfmon *perfmon)
 void v3d_perfmon_put(struct v3d_perfmon *perfmon)
 {
 	if (perfmon && refcount_dec_and_test(&perfmon->refcnt)) {
+		atomic_dec(&perfmon->v3d->num_perfmon);
 		mutex_destroy(&perfmon->lock);
 		kfree(perfmon);
 	}
@@ -312,6 +313,9 @@  static int v3d_perfmon_idr_del(int id, void *elem, void *data)
 	if (perfmon == v3d->active_perfmon)
 		v3d_perfmon_stop(v3d, perfmon, false);
 
+	/* If this perfmon is global one, set global_perfmon to NULL */
+	cmpxchg(&v3d->perfmon_info.global_perfmon, perfmon, NULL);
+
 	v3d_perfmon_put(perfmon);
 
 	return 0;
@@ -349,6 +353,12 @@  int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
 			return -EINVAL;
 	}
 
+	/* If there is a global perfmon. block the creation of perfmons. */
+	if (v3d->perfmon_info.global_perfmon) {
+		dev_info_ratelimited(dev->dev, "global perfmon is active\n");
+		return -EAGAIN;
+	}
+
 	perfmon = kzalloc(struct_size(perfmon, values, req->ncounters),
 			  GFP_KERNEL);
 	if (!perfmon)
@@ -358,6 +368,7 @@  int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
 		perfmon->counters[i] = req->counters[i];
 
 	perfmon->ncounters = req->ncounters;
+	perfmon->v3d = v3d;
 
 	refcount_set(&perfmon->refcnt, 1);
 	mutex_init(&perfmon->lock);
@@ -373,6 +384,7 @@  int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
 		return ret;
 	}
 
+	atomic_inc(&perfmon->v3d->num_perfmon);
 	req->id = ret;
 
 	return 0;
@@ -451,3 +463,40 @@  int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data,
 
 	return 0;
 }
+
+int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv)
+{
+	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+	struct drm_v3d_perfmon_set_global *req = data;
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_perfmon *perfmon;
+
+	if (req->flags & ~DRM_V3D_PERFMON_CLEAR_GLOBAL)
+		return -EINVAL;
+
+	/* If the request is to clear the global performance monitor. */
+	if (req->flags & DRM_V3D_PERFMON_CLEAR_GLOBAL) {
+		if (cmpxchg(&v3d->perfmon_info.global_perfmon, v3d->perfmon_info.global_perfmon, NULL))
+			return 0;
+		else
+			return -EINVAL;
+	}
+
+	guard(mutex)(&v3d_priv->perfmon.lock);
+
+	perfmon = idr_find(&v3d_priv->perfmon.idr, req->id);
+	if (!perfmon)
+		return -EINVAL;
+
+	/* Only permit this ioctl(..) if there is exactly one perfmon - the one
+	 * we want to make global.
+	 */
+	if (atomic_read(&v3d->num_perfmon) != 1)
+		return -EPERM;
+
+	if (cmpxchg(&v3d->perfmon_info.global_perfmon, NULL, perfmon))
+		return -EBUSY;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/v3d/v3d_performance_counters.h b/drivers/gpu/drm/v3d/v3d_performance_counters.h
index d919a2fc9449..3d1c64eeb2da 100644
--- a/drivers/gpu/drm/v3d/v3d_performance_counters.h
+++ b/drivers/gpu/drm/v3d/v3d_performance_counters.h
@@ -30,6 +30,12 @@  struct v3d_perfmon_info {
 	 * Array of counters valid for the platform.
 	 */
 	const struct v3d_perf_counter_desc *counters;
+
+	/*
+	 * For some performance analysis tool in user space, we need
+	 * to use one global configured perfmon for all jobs.
+	 */
+	struct v3d_perfmon *global_perfmon;
 };
 
 #endif
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 08d2a2739582..686248d15329 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -120,11 +120,17 @@  v3d_cpu_job_free(struct drm_sched_job *sched_job)
 static void
 v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
 {
+	struct v3d_perfmon *perfmon = v3d->perfmon_info.global_perfmon;
+
+	if (!perfmon)
+		perfmon = job->perfmon;
+
 	if (job->perfmon != v3d->active_perfmon)
 		v3d_perfmon_stop(v3d, v3d->active_perfmon, true);
 
-	if (job->perfmon && v3d->active_perfmon != job->perfmon)
-		v3d_perfmon_start(v3d, job->perfmon);
+	if (perfmon && v3d->active_perfmon != perfmon)
+		v3d_perfmon_start(v3d, perfmon);
+
 }
 
 static void
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 87fc5bb0a61e..960d392d75a3 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -43,6 +43,7 @@  extern "C" {
 #define DRM_V3D_PERFMON_GET_VALUES                0x0a
 #define DRM_V3D_SUBMIT_CPU                        0x0b
 #define DRM_V3D_PERFMON_GET_COUNTER               0x0c
+#define DRM_V3D_PERFMON_SET_GLOBAL                0x0d
 
 #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
 #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -61,6 +62,8 @@  extern "C" {
 #define DRM_IOCTL_V3D_SUBMIT_CPU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu)
 #define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \
 						   struct drm_v3d_perfmon_get_counter)
+#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL  DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \
+						   struct drm_v3d_perfmon_set_global)
 
 #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE             0x01
 #define DRM_V3D_SUBMIT_EXTENSION		  0x02
@@ -765,6 +768,18 @@  struct drm_v3d_perfmon_get_counter {
 	__u8 reserved[7];
 };
 
+/**
+ * struct drm_v3d_perfmon_set_global - ioctl to define a
+ * global performance counter that is used if a job has
+ * not assigned one on its own.
+ */
+
+#define DRM_V3D_PERFMON_CLEAR_GLOBAL    0x0001
+struct drm_v3d_perfmon_set_global {
+	__u32 flags;
+	__u32 id;
+};
+
 #if defined(__cplusplus)
 }
 #endif