diff mbox series

[RFC,1/3] fs/proc: Add gtime halted to proc/<pid>/stat

Message ID 20250218202618.567363-2-sieberf@amazon.com (mailing list archive)
State New
Headers show
Series kvm,sched: Add gtime halted | expand

Commit Message

Fernand Sieber Feb. 18, 2025, 8:26 p.m. UTC
The hypervisor may need to gain visibility to CPU guest activity for various
purposes such as reporting it to monitoring systems that tracks the amount
of work done on behalf of a guest.

With guest hlt, pause and mwait passthrough, gtime is not useful since the
guest never tells the hypervisor that it has halted execution. So the reported
guest time is always 100% even when the guest is completely halted.

Add a new concept of guest halted time that allows the hypervisor to keep
track of the number of halted cycles a CPU spends in guest mode.

The value is reported in proc/<pid>/stat and defaults to zero for architectures
that do not support it.
---
 Documentation/filesystems/proc.rst | 1 +
 fs/proc/array.c                    | 7 ++++++-
 include/linux/sched.h              | 1 +
 include/linux/sched/signal.h       | 1 +
 kernel/exit.c                      | 1 +
 kernel/fork.c                      | 2 +-
 6 files changed, 11 insertions(+), 2 deletions(-)

--
2.43.0




Amazon Development Centre (South Africa) (Proprietary) Limited
29 Gogosoa Street, Observatory, Cape Town, Western Cape, 7925, South Africa
Registration Number: 2004 / 034463 / 07
diff mbox series

Patch

diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 09f0aed5a08b..bbb230420fa4 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -386,6 +386,7 @@  It's slow but very precise.
   env_end       address below which program environment is placed
   exit_code     the thread's exit_code in the form reported by the waitpid
 		system call
+  gtime_halted  guest time when the cpu is halted of the task in jiffies
   ============= ===============================================================

 The /proc/PID/maps file contains the currently mapped memory regions and
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d6a0369caa93..0788ef0fa710 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -478,7 +478,7 @@  static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	struct mm_struct *mm;
 	unsigned long long start_time;
 	unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt;
-	u64 cutime, cstime, cgtime, utime, stime, gtime;
+	u64 cutime, cstime, cgtime, utime, stime, gtime, gtime_halted;
 	unsigned long rsslim = 0;
 	unsigned long flags;
 	int exit_code = task->exit_code;
@@ -556,12 +556,14 @@  static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			min_flt = sig->min_flt;
 			maj_flt = sig->maj_flt;
 			gtime = sig->gtime;
+			gtime_halted = sig->gtime_halted;

 			rcu_read_lock();
 			__for_each_thread(sig, t) {
 				min_flt += t->min_flt;
 				maj_flt += t->maj_flt;
 				gtime += task_gtime(t);
+				gtime_halted += t->gtime_halted;
 			}
 			rcu_read_unlock();
 		}
@@ -575,6 +577,7 @@  static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		min_flt = task->min_flt;
 		maj_flt = task->maj_flt;
 		gtime = task_gtime(task);
+		gtime_halted = task->gtime_halted;
 	}

 	/* scale priority and nice values from timeslices to -20..20 */
@@ -662,6 +665,8 @@  static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	else
 		seq_puts(m, " 0");

+	seq_put_decimal_ull(m, " ", nsec_to_clock_t(gtime_halted));
+
 	seq_putc(m, '\n');
 	if (mm)
 		mmput(mm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9632e3318e0d..5f6745357e20 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1087,6 +1087,7 @@  struct task_struct {
 	u64				stimescaled;
 #endif
 	u64				gtime;
+	u64				gtime_halted;
 	struct prev_cputime		prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 	struct vtime			vtime;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index d5d03d919df8..633082f7c7b8 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -187,6 +187,7 @@  struct signal_struct {
 	seqlock_t stats_lock;
 	u64 utime, stime, cutime, cstime;
 	u64 gtime;
+	u64 gtime_halted;
 	u64 cgtime;
 	struct prev_cputime prev_cputime;
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
diff --git a/kernel/exit.c b/kernel/exit.c
index 3485e5fc499e..ba6efc6900d0 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -188,6 +188,7 @@  static void __exit_signal(struct task_struct *tsk)
 	sig->utime += utime;
 	sig->stime += stime;
 	sig->gtime += task_gtime(tsk);
+	sig->gtime_halted += tsk->gtime_halted;
 	sig->min_flt += tsk->min_flt;
 	sig->maj_flt += tsk->maj_flt;
 	sig->nvcsw += tsk->nvcsw;
diff --git a/kernel/fork.c b/kernel/fork.c
index 735405a9c5f3..e3453084bb5a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2296,7 +2296,7 @@  __latent_entropy struct task_struct *copy_process(

 	init_sigpending(&p->pending);

-	p->utime = p->stime = p->gtime = 0;
+	p->utime = p->stime = p->gtime = p->gtime_halted = 0;
 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
 	p->utimescaled = p->stimescaled = 0;
 #endif