diff mbox series

[RFC,8/8] cgroup/drm: Expose GPU utilisation

Message ID 20231024160727.282960-9-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series DRM scheduling cgroup controller | expand

Commit Message

Tvrtko Ursulin Oct. 24, 2023, 4:07 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

To support container use cases where external orchestrators want to make
deployment and migration decisions based on GPU load and capacity, we can
expose the GPU load as seen by the controller in a new drm.active_us
field. This field contains a monotonic cumulative time cgroup has spent
executing GPU loads, as reported by the DRM drivers being used by group
members.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Eero Tamminen <eero.t.tamminen@intel.com>
---
 Documentation/admin-guide/cgroup-v2.rst |  8 +++++++
 kernel/cgroup/drm.c                     | 29 ++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 841533527b7b..9ac8ab65161c 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2445,6 +2445,14 @@  respected.
 DRM weight based time control interface files
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+  drm.stat
+	A read-only flat-keyed file.
+
+	Contains these fields:
+
+	- usage_usec - GPU time used by the group, recursively including all
+		       child groups.
+
   drm.weight
 	Standard cgroup weight based control [1, 10000] used to configure the
 	relative distributing of GPU time between the sibling groups.
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 1d1570bf3e90..127730990301 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -25,6 +25,8 @@  struct drm_cgroup_state {
 	bool over;
 	bool over_budget;
 
+	u64 total_us;
+
 	u64 per_s_budget_us;
 	u64 prev_active_us;
 	u64 active_us;
@@ -117,6 +119,24 @@  drmcs_write_weight(struct cgroup_subsys_state *css, struct cftype *cftype,
 	return 0;
 }
 
+static int drmcs_show_stat(struct seq_file *sf, void *v)
+{
+	struct drm_cgroup_state *drmcs = css_to_drmcs(seq_css(sf));
+	u64 val;
+
+#ifndef CONFIG_64BIT
+	mutex_lock(&drmcg_mutex);
+#endif
+	val = drmcs->total_us;
+#ifndef CONFIG_64BIT
+	mutex_unlock(&drmcg_mutex);
+#endif
+
+	seq_printf(sf, "usage_usec %llu\n", val);
+
+	return 0;
+}
+
 static bool __start_scanning(unsigned int period_us)
 {
 	struct drm_cgroup_state *root = &root_drmcs.drmcs;
@@ -169,11 +189,14 @@  static bool __start_scanning(unsigned int period_us)
 		parent = css_to_drmcs(node->parent);
 
 		active = drmcs_get_active_time_us(drmcs);
-		if (period_us && active > drmcs->prev_active_us)
+		if (period_us && active > drmcs->prev_active_us) {
 			drmcs->active_us += active - drmcs->prev_active_us;
+			drmcs->total_us += drmcs->active_us;
+		}
 		drmcs->prev_active_us = active;
 
 		parent->active_us += drmcs->active_us;
+		parent->total_us += drmcs->active_us;
 		parent->sum_children_weights += drmcs->weight;
 
 		css_put(node);
@@ -564,6 +587,10 @@  struct cftype files[] = {
 		.read_u64 = drmcs_read_weight,
 		.write_u64 = drmcs_write_weight,
 	},
+	{
+		.name = "stat",
+		.seq_show = drmcs_show_stat,
+	},
 	{ } /* Zero entry terminates. */
 };