diff mbox series

[v2,3/3] vstatus: Display an informational message when the VSTATUS character is pressed or TIOCSTAT ioctl is called.

Message ID 20220206154856.2355838-4-walt@drummond.us (mailing list archive)
State Handled Elsewhere
Headers show
Series vstatus: TTY status message request | expand

Commit Message

Walt Drummond Feb. 6, 2022, 3:48 p.m. UTC
When triggered by pressing the VSTATUS key or calling the TIOCSTAT
ioctl, the n_tty line discipline will display a message on the user's
tty that provides basic information about the system and an
'interesting' process in the current foreground process group, eg:

  load: 0.58  cmd: sleep 744474 [sleeping] 0.36r 0.00u 0.00s 0% 772k

The status message provides:
 - System load average
 - Command name and process id (from the perspective of the session)
 - Scheduler state
 - Total wall-clock run time
 - User space run time
 - System space run time
 - Percentage of on-cpu time
 - Resident set size

The message is only displayed when the tty has the VSTATUS character
set, the local flags ICANON and IEXTEN are enabled and NOKERNINFO is
disabled; it is always displayed when TIOCSTAT is called regardless of
tty settings.

Signed-off-by: Walt Drummond <walt@drummond.us>
---
 drivers/tty/Makefile       |   2 +-
 drivers/tty/n_tty.c        |  34 +++++++
 drivers/tty/n_tty_status.c | 181 +++++++++++++++++++++++++++++++++++++
 drivers/tty/tty_io.c       |   2 +-
 include/linux/tty.h        |   5 +
 5 files changed, 222 insertions(+), 2 deletions(-)
 create mode 100644 drivers/tty/n_tty_status.c

Comments

Greg KH Feb. 6, 2022, 5:16 p.m. UTC | #1
On Sun, Feb 06, 2022 at 07:48:54AM -0800, Walt Drummond wrote:
> When triggered by pressing the VSTATUS key or calling the TIOCSTAT
> ioctl, the n_tty line discipline will display a message on the user's
> tty that provides basic information about the system and an
> 'interesting' process in the current foreground process group, eg:
> 
>   load: 0.58  cmd: sleep 744474 [sleeping] 0.36r 0.00u 0.00s 0% 772k
> 
> The status message provides:
>  - System load average
>  - Command name and process id (from the perspective of the session)
>  - Scheduler state
>  - Total wall-clock run time
>  - User space run time
>  - System space run time
>  - Percentage of on-cpu time
>  - Resident set size

This should be documented somewhere, and not buried in a changelog text
like this.  Can you also add this information somewhere in the
Documentation/ directory so that people have a hint as to what is going
on here?

> The message is only displayed when the tty has the VSTATUS character
> set, the local flags ICANON and IEXTEN are enabled and NOKERNINFO is
> disabled; it is always displayed when TIOCSTAT is called regardless of
> tty settings.
> 
> Signed-off-by: Walt Drummond <walt@drummond.us>
> ---
>  drivers/tty/Makefile       |   2 +-
>  drivers/tty/n_tty.c        |  34 +++++++
>  drivers/tty/n_tty_status.c | 181 +++++++++++++++++++++++++++++++++++++
>  drivers/tty/tty_io.c       |   2 +-
>  include/linux/tty.h        |   5 +
>  5 files changed, 222 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/tty/n_tty_status.c

Also, any chance for a test to be added so that we can ensure that this
doesn't change over time in ways that confuse/break people?

Is this now a new user/kernel api format that we must preserve for
forever?  Can we add/remove items over time that make sense or are
programs (not just people), going to parse this?

> 
> diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile
> index a2bd75fbaaa4..3539d7ab77e5 100644
> --- a/drivers/tty/Makefile
> +++ b/drivers/tty/Makefile
> @@ -2,7 +2,7 @@
>  obj-$(CONFIG_TTY)		+= tty_io.o n_tty.o tty_ioctl.o tty_ldisc.o \
>  				   tty_buffer.o tty_port.o tty_mutex.o \
>  				   tty_ldsem.o tty_baudrate.o tty_jobctrl.o \
> -				   n_null.o
> +				   n_null.o n_tty_status.o
>  obj-$(CONFIG_LEGACY_PTYS)	+= pty.o
>  obj-$(CONFIG_UNIX98_PTYS)	+= pty.o
>  obj-$(CONFIG_AUDIT)		+= tty_audit.o
> diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
> index 64a058a4c63b..fd70efc333d7 100644
> --- a/drivers/tty/n_tty.c
> +++ b/drivers/tty/n_tty.c
> @@ -80,6 +80,7 @@
>  #define ECHO_BLOCK		256
>  #define ECHO_DISCARD_WATERMARK	N_TTY_BUF_SIZE - (ECHO_BLOCK + 32)
>  
> +#define STATUS_LINE_LEN 160   /* tty status line will truncate at this length */

Tabs please.


>  
>  #undef N_TTY_TRACE
>  #ifdef N_TTY_TRACE
> @@ -127,6 +128,8 @@ struct n_tty_data {
>  	struct mutex output_lock;
>  };
>  
> +static void n_tty_status(struct tty_struct *tty);
> +
>  #define MASK(x) ((x) & (N_TTY_BUF_SIZE - 1))
>  
>  static inline size_t read_cnt(struct n_tty_data *ldata)
> @@ -1334,6 +1337,11 @@ static void n_tty_receive_char_special(struct tty_struct *tty, unsigned char c)
>  			commit_echoes(tty);
>  			return;
>  		}
> +		if (c == STATUS_CHAR(tty) && L_IEXTEN(tty)) {
> +			if (!L_NOKERNINFO(tty))
> +				n_tty_status(tty);
> +			return;
> +		}
>  		if (c == '\n') {
>  			if (L_ECHO(tty) || L_ECHONL(tty)) {
>  				echo_char_raw('\n', ldata);
> @@ -1763,6 +1771,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
>  			set_bit(EOF_CHAR(tty), ldata->char_map);
>  			set_bit('\n', ldata->char_map);
>  			set_bit(EOL_CHAR(tty), ldata->char_map);
> +			set_bit(STATUS_CHAR(tty), ldata->char_map);
>  			if (L_IEXTEN(tty)) {
>  				set_bit(WERASE_CHAR(tty), ldata->char_map);
>  				set_bit(LNEXT_CHAR(tty), ldata->char_map);
> @@ -2413,6 +2422,26 @@ static unsigned long inq_canon(struct n_tty_data *ldata)
>  	return nr;
>  }
>  
> +static void n_tty_status(struct tty_struct *tty)
> +{
> +	struct n_tty_data *ldata = tty->disc_data;
> +	char *msg;
> +	size_t len;
> +
> +	msg = kzalloc(STATUS_LINE_LEN, GFP_KERNEL);

Please check for memory failures.

> +
> +	if (ldata->column != 0) {
> +		*msg = '\n';
> +		len = n_tty_get_status(tty, msg + 1, STATUS_LINE_LEN - 1);
> +	} else {
> +		len = n_tty_get_status(tty, msg, STATUS_LINE_LEN);
> +	}
> +
> +	do_n_tty_write(tty, NULL, msg, len);
> +
> +	kfree(msg);
> +}
> +
>  static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
>  		       unsigned int cmd, unsigned long arg)
>  {
> @@ -2430,6 +2459,11 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
>  			retval = read_cnt(ldata);
>  		up_write(&tty->termios_rwsem);
>  		return put_user(retval, (unsigned int __user *) arg);
> +	case TIOCSTAT:
> +		down_read(&tty->termios_rwsem);
> +		n_tty_status(tty);
> +		up_read(&tty->termios_rwsem);
> +		return 0;
>  	default:
>  		return n_tty_ioctl_helper(tty, file, cmd, arg);
>  	}
> diff --git a/drivers/tty/n_tty_status.c b/drivers/tty/n_tty_status.c
> new file mode 100644
> index 000000000000..f0e053651368
> --- /dev/null
> +++ b/drivers/tty/n_tty_status.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: GPL-1.0+

We can not take GPL-1.0 code into the kernel anymore, sorry.  Please
consider using a sane license :)


> +/*
> + * n_tty_status.c --- implements VSTATUS and TIOCSTAT from BSD
> + *
> + * Display a basic status message containing information about the
> + * foreground process and system load on the users tty, triggered by
> + * the VSTATUS character or TIOCSTAT. Ex,
> + *
> + *   load: 14.11  cmd: tcsh 19623 [running] 185756.62r 88.00u 17.50s 0% 4260k
> + *
> + */
> +
> +#include <linux/tty.h>
> +#include <linux/mm.h>
> +#include <linux/sched/loadavg.h>
> +#include <linux/sched/mm.h>
> +
> +/* Convert nanoseconds into centiseconds */
> +static inline long ns_to_cs(long l)
> +{
> +	return l / (NSEC_PER_MSEC * 10);
> +
> +}

Unneded blank line.


> +
> +/* We want the pid from the context of session */
> +static inline pid_t __get_pid(struct task_struct *tsk, struct tty_struct *tty)
> +{
> +	struct pid_namespace *ns;
> +
> +	spin_lock_irq(&tty->ctrl.lock);
> +	ns = ns_of_pid(tty->ctrl.session);
> +	spin_unlock_irq(&tty->ctrl.lock);
> +
> +	return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
> +}
> +
> +/* This is the same odd "bitmap" described in
> + * fs/proc/array.c:get_task_state().  Consistency with standard
> + * implementations of VSTATUS requires a different set of state
> + * names.
> + */
> +static const char * const task_state_name_array[] = {
> +	"running",
> +	"sleeping",
> +	"disk sleep",
> +	"stopped",
> +	"tracing stop",
> +	"dead",
> +	"zombie",
> +	"parked",
> +	"idle",
> +};

How often is this going to get out-of-sync?  Should we use a real
enumerated type here?  Put the string somewhere else to keep this only
in one place?


> +
> +static inline const char *get_task_state_name(struct task_struct *tsk)
> +{
> +	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_name_array));

What is this protecting from?  What is going to change that requires
this to be increased?

> +	return task_state_name_array[task_state_index(tsk)];
> +}
> +
> +static inline struct task_struct *compare(struct task_struct *new,
> +					  struct task_struct *old)
> +{
> +	unsigned int ostate, nstate;
> +
> +	if (old == NULL)
> +		return new;
> +
> +	ostate = task_state_index(old);
> +	nstate = task_state_index(new);
> +
> +	if (ostate == nstate) {
> +		if (old->start_time > new->start_time)
> +			return old;
> +		return new;
> +	}
> +
> +	if (ostate < nstate)
> +		return old;
> +
> +	return new;
> +}
> +
> +static struct task_struct *pick_process(struct tty_struct *tty)
> +{
> +	struct task_struct *new, *winner = NULL;
> +
> +	read_lock(&tasklist_lock);
> +	spin_lock_irq(&tty->ctrl.lock);
> +
> +	do_each_pid_task(tty->ctrl.pgrp, PIDTYPE_PGID, new) {
> +		winner = compare(new, winner);
> +	} while_each_pid_task(tty->ctrl.pgrp, PIDTYPE_PGID, new);
> +
> +	spin_unlock_irq(&tty->ctrl.lock);
> +
> +	if (winner)
> +		winner = get_task_struct(winner);
> +
> +	read_unlock(&tasklist_lock);
> +
> +	return winner;
> +}


What are these two functions trying to do?  A comment would be nice to
give us a hint as I am guessing I am going to have to maintain this for
forever :)

> +
> +size_t n_tty_get_status(struct tty_struct *tty, char *msg, size_t msglen)
> +{
> +	struct task_struct *p;
> +	struct mm_struct *mm;
> +	struct rusage rusage;
> +	unsigned long loadavg[3];
> +	uint64_t pcpu, cputime, wallclock;
> +	struct timespec64 utime, stime, rtime;
> +	char tname[TASK_COMM_LEN];
> +	unsigned int pid;
> +	char *state;
> +	unsigned long rss = 0;
> +	size_t len = 0;
> +
> +	get_avenrun(loadavg, FIXED_1/200, 0);

Why 200?

> +	len = scnprintf(msg + len, msglen - len, "load: %lu.%02lu  ",
> +			LOAD_INT(loadavg[0]), LOAD_FRAC(loadavg[0]));
> +
> +	if (tty->ctrl.session == NULL) {
> +		len += scnprintf(msg + len, msglen - len,
> +				 "not a controlling terminal\n");
> +		goto out;
> +	}
> +
> +	if (tty->ctrl.pgrp == NULL) {
> +		len += scnprintf(msg + len, msglen - len,
> +				 "no foreground process group\n");
> +		goto out;
> +	}
> +
> +	/* Note that if p is refcounted */
> +	p = pick_process(tty);
> +	if (p == NULL) {
> +		len += scnprintf(msg + len, msglen - len,
> +				 "empty foreground process group\n");
> +		goto out;
> +	}
> +
> +	mm = get_task_mm(p);
> +	if (mm) {
> +		rss = get_mm_rss(mm) * PAGE_SIZE / 1024;
> +		mmput(mm);
> +	}
> +	get_task_comm(tname, p);
> +	getrusage(p, RUSAGE_BOTH, &rusage);
> +	pid = __get_pid(p, tty);
> +	state = (char *) get_task_state_name(p);
> +	wallclock = ktime_get_ns() - p->start_time;
> +	put_task_struct(p);
> +
> +	/* After this point, any of the information we have on p might
> +	 * become stale.  It's OK if the status message is a little bit
> +	 * lossy.
> +	 */
> +
> +	utime.tv_sec = rusage.ru_utime.tv_sec;
> +	utime.tv_nsec = rusage.ru_utime.tv_usec * NSEC_PER_USEC;
> +	stime.tv_sec = rusage.ru_stime.tv_sec;
> +	stime.tv_nsec = rusage.ru_stime.tv_usec * NSEC_PER_USEC;
> +	rtime = ns_to_timespec64(wallclock);
> +
> +	cputime = timespec64_to_ns(&utime) + timespec64_to_ns(&stime);
> +	pcpu = div64_u64(cputime * 100, wallclock);
> +
> +	len += scnprintf(msg + len, msglen - len,
> +			 /* task, PID, task state */
> +			 "cmd: %s %d [%s] "
> +			 /* rtime,    utime,      stime,      %cpu   rss */
> +			 "%llu.%02lur %llu.%02luu %llu.%02lus %llu%% %luk\n",
> +			 tname,	pid, state,
> +			 rtime.tv_sec, ns_to_cs(rtime.tv_nsec),
> +			 utime.tv_sec, ns_to_cs(utime.tv_nsec),
> +			 stime.tv_sec, ns_to_cs(stime.tv_nsec),
> +			 pcpu, rss);
> +
> +out:
> +	return len;
> +}
> diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
> index 6616d4a0d41d..f2f4f48ea502 100644
> --- a/drivers/tty/tty_io.c
> +++ b/drivers/tty/tty_io.c
> @@ -125,7 +125,7 @@ struct ktermios tty_std_termios = {	/* for the benefit of tty drivers  */
>  	.c_oflag = OPOST | ONLCR,
>  	.c_cflag = B38400 | CS8 | CREAD | HUPCL,
>  	.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
> -		   ECHOCTL | ECHOKE | IEXTEN,
> +		   ECHOCTL | ECHOKE | IEXTEN | NOKERNINFO,
>  	.c_cc = INIT_C_CC,
>  	.c_ispeed = 38400,
>  	.c_ospeed = 38400,
> diff --git a/include/linux/tty.h b/include/linux/tty.h
> index cbe5d535a69d..2e483708608c 100644
> --- a/include/linux/tty.h
> +++ b/include/linux/tty.h
> @@ -49,6 +49,7 @@
>  #define WERASE_CHAR(tty) ((tty)->termios.c_cc[VWERASE])
>  #define LNEXT_CHAR(tty)	((tty)->termios.c_cc[VLNEXT])
>  #define EOL2_CHAR(tty) ((tty)->termios.c_cc[VEOL2])
> +#define STATUS_CHAR(tty) ((tty)->termios.c_cc[VSTATUS])
>  
>  #define _I_FLAG(tty, f)	((tty)->termios.c_iflag & (f))
>  #define _O_FLAG(tty, f)	((tty)->termios.c_oflag & (f))
> @@ -114,6 +115,7 @@
>  #define L_PENDIN(tty)	_L_FLAG((tty), PENDIN)
>  #define L_IEXTEN(tty)	_L_FLAG((tty), IEXTEN)
>  #define L_EXTPROC(tty)	_L_FLAG((tty), EXTPROC)
> +#define L_NOKERNINFO(tty) _L_FLAG((tty), NOKERNINFO)
>  
>  struct device;
>  struct signal_struct;
> @@ -389,6 +391,9 @@ extern void __init n_tty_init(void);
>  static inline void n_tty_init(void) { }
>  #endif
>  
> +/* n_tty_status.c */
> +size_t n_tty_get_status(struct tty_struct *tty, char *msg, size_t msglen);

No need for this to be in include/linux/tty.h, put it in the .h file in
drivers/tty/ please.

thanks,

greg k-h
Arseny Maslennikov Feb. 6, 2022, 9:25 p.m. UTC | #2
On Sun, Feb 06, 2022 at 07:48:54AM -0800, Walt Drummond wrote:
> When triggered by pressing the VSTATUS key or calling the TIOCSTAT
> ioctl, the n_tty line discipline will display a message on the user's
> tty that provides basic information about the system and an
> 'interesting' process in the current foreground process group, eg:
> 
>   load: 0.58  cmd: sleep 744474 [sleeping] 0.36r 0.00u 0.00s 0% 772k
> 
> The status message provides:
>  - System load average
>  - Command name and process id (from the perspective of the session)
>  - Scheduler state
>  - Total wall-clock run time
>  - User space run time
>  - System space run time
>  - Percentage of on-cpu time
>  - Resident set size
> 
> The message is only displayed when the tty has the VSTATUS character
> set, the local flags ICANON and IEXTEN are enabled and NOKERNINFO is
> disabled; it is always displayed when TIOCSTAT is called regardless of
> tty settings.
> 
> Signed-off-by: Walt Drummond <walt@drummond.us>
> ---
>  drivers/tty/Makefile       |   2 +-
>  drivers/tty/n_tty.c        |  34 +++++++
>  drivers/tty/n_tty_status.c | 181 +++++++++++++++++++++++++++++++++++++
>  drivers/tty/tty_io.c       |   2 +-
>  include/linux/tty.h        |   5 +
>  5 files changed, 222 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/tty/n_tty_status.c
> 
> <...>
> @@ -2430,6 +2459,11 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
>  			retval = read_cnt(ldata);
>  		up_write(&tty->termios_rwsem);
>  		return put_user(retval, (unsigned int __user *) arg);
> +	case TIOCSTAT:

Perhaps we want to guard this (example pseudocode follows):

		if (*our ldisc is not n_tty*)
			return an error like -ENOTTY;

...since kerninfo is useless for non-UI ttys, e. g. serial device
drivers, and this ioctl could mess them up if this code path can be
taken. (I have not verified this kind of breakage is possible.) Please
see the complete rationale below, this paragraph is an illustrational
note for it.

> +		down_read(&tty->termios_rwsem);
> +		n_tty_status(tty);
> +		up_read(&tty->termios_rwsem);
> +		return 0;
>  	default:
>  		return n_tty_ioctl_helper(tty, file, cmd, arg);
>  	}
> diff --git a/drivers/tty/n_tty_status.c b/drivers/tty/n_tty_status.c
> new file mode 100644
> index 000000000000..f0e053651368
> --- /dev/null
> +++ b/drivers/tty/n_tty_status.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: GPL-1.0+
> +/*
> + * n_tty_status.c --- implements VSTATUS and TIOCSTAT from BSD
> + *
> + * Display a basic status message containing information about the
> + * foreground process and system load on the users tty, triggered by
> + * the VSTATUS character or TIOCSTAT. Ex,
> + *
> + *   load: 14.11  cmd: tcsh 19623 [running] 185756.62r 88.00u 17.50s 0% 4260k
> + *
> + */
> +
> +#include <linux/tty.h>
> +#include <linux/mm.h>
> +#include <linux/sched/loadavg.h>
> +#include <linux/sched/mm.h>
> +
> +/* Convert nanoseconds into centiseconds */
> +static inline long ns_to_cs(long l)
> +{
> +	return l / (NSEC_PER_MSEC * 10);
> +
> +}
> +
> +/* We want the pid from the context of session */
> +static inline pid_t __get_pid(struct task_struct *tsk, struct tty_struct *tty)
> +{
> +	struct pid_namespace *ns;
> +
> +	spin_lock_irq(&tty->ctrl.lock);
> +	ns = ns_of_pid(tty->ctrl.session);
> +	spin_unlock_irq(&tty->ctrl.lock);
> +
> +	return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
> +}
> +
> +/* This is the same odd "bitmap" described in
> + * fs/proc/array.c:get_task_state().  Consistency with standard
> + * implementations of VSTATUS requires a different set of state
> + * names.

As far as I can remember, VSTATUS is not subject to any standard, so no
implementation is *standard* by any means. The 2 most popular libre &
open source BSD derivatives implement the VSTATUS message with different
details (e. g. OpenBSD does not check for column 0, FreeBSD does) and
use different message formats.
We are not obliged to copy the message format or the task state names
from another system (which most likely uses a different set of task
states, or might change its task state set independently of Linux),
especially since the message is not part of any API and is not even
readable by processes who read or write on the terminal.

(If the terminal is a pty, then there is a user process which has
possession of an fd to the pty master, but anyway it can not — and
should not — distinguish between terminal output produced by processes
or by the ldisc.)

> + */
> +static const char * const task_state_name_array[] = {
> +	"running",
> +	"sleeping",
> +	"disk sleep",
> +	"stopped",
> +	"tracing stop",
> +	"dead",
> +	"zombie",
> +	"parked",
> +	"idle",
> +};
> +
> +static inline const char *get_task_state_name(struct task_struct *tsk)
> +{
> +	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_name_array));
> +	return task_state_name_array[task_state_index(tsk)];
> +}
> +
> <...>
> +size_t n_tty_get_status(struct tty_struct *tty, char *msg, size_t msglen)
> +{
> +	struct task_struct *p;
> +	struct mm_struct *mm;
> +	struct rusage rusage;
> +	unsigned long loadavg[3];
> +	uint64_t pcpu, cputime, wallclock;
> +	struct timespec64 utime, stime, rtime;
> +	char tname[TASK_COMM_LEN];
> +	unsigned int pid;
> +	char *state;
> +	unsigned long rss = 0;
> +	size_t len = 0;
> +
> +	get_avenrun(loadavg, FIXED_1/200, 0);
> +	len = scnprintf(msg + len, msglen - len, "load: %lu.%02lu  ",
> +			LOAD_INT(loadavg[0]), LOAD_FRAC(loadavg[0]));
> +
> +	if (tty->ctrl.session == NULL) {
> +		len += scnprintf(msg + len, msglen - len,
> +				 "not a controlling terminal\n");
> +		goto out;
> +	}
> +
> +	if (tty->ctrl.pgrp == NULL) {
> +		len += scnprintf(msg + len, msglen - len,
> +				 "no foreground process group\n");
> +		goto out;
> +	}
> +
> +	/* Note that if p is refcounted */
> +	p = pick_process(tty);
> +	if (p == NULL) {
> +		len += scnprintf(msg + len, msglen - len,
> +				 "empty foreground process group\n");
> +		goto out;
> +	}
> +
> +	mm = get_task_mm(p);
> +	if (mm) {
> +		rss = get_mm_rss(mm) * PAGE_SIZE / 1024;
> +		mmput(mm);
> +	}
> +	get_task_comm(tname, p);
> +	getrusage(p, RUSAGE_BOTH, &rusage);
> +	pid = __get_pid(p, tty);
> +	state = (char *) get_task_state_name(p);
> +	wallclock = ktime_get_ns() - p->start_time;
> +	put_task_struct(p);
> +
> +	/* After this point, any of the information we have on p might
> +	 * become stale.  It's OK if the status message is a little bit
> +	 * lossy.
> +	 */

...By the moment the user sees the status message, the presented
information is a bit stale anyway, but still relevant. :)

> +
> +	utime.tv_sec = rusage.ru_utime.tv_sec;
> +	utime.tv_nsec = rusage.ru_utime.tv_usec * NSEC_PER_USEC;
> +	stime.tv_sec = rusage.ru_stime.tv_sec;
> +	stime.tv_nsec = rusage.ru_stime.tv_usec * NSEC_PER_USEC;
> +	rtime = ns_to_timespec64(wallclock);
> +
> +	cputime = timespec64_to_ns(&utime) + timespec64_to_ns(&stime);
> +	pcpu = div64_u64(cputime * 100, wallclock);

Other reviewers have mentioned that this number does not make too much
sense, as we can see the dividend and the divisor in the message. It
would make more sense to display CPU consumption by the process in a
recent enough time window, or some other "hogginess" estimate, but I
doubt this information is available.

> +
> +	len += scnprintf(msg + len, msglen - len,
> +			 /* task, PID, task state */
> +			 "cmd: %s %d [%s] "
> +			 /* rtime,    utime,      stime,      %cpu   rss */
> +			 "%llu.%02lur %llu.%02luu %llu.%02lus %llu%% %luk\n",
> +			 tname,	pid, state,
> +			 rtime.tv_sec, ns_to_cs(rtime.tv_nsec),
> +			 utime.tv_sec, ns_to_cs(utime.tv_nsec),
> +			 stime.tv_sec, ns_to_cs(stime.tv_nsec),
> +			 pcpu, rss);
> +
> +out:
> +	return len;
> +}
> diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
> index 6616d4a0d41d..f2f4f48ea502 100644
> --- a/drivers/tty/tty_io.c
> +++ b/drivers/tty/tty_io.c
> @@ -125,7 +125,7 @@ struct ktermios tty_std_termios = {	/* for the benefit of tty drivers  */
>  	.c_oflag = OPOST | ONLCR,
>  	.c_cflag = B38400 | CS8 | CREAD | HUPCL,
>  	.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
> -		   ECHOCTL | ECHOKE | IEXTEN,
> +		   ECHOCTL | ECHOKE | IEXTEN | NOKERNINFO,

Does this mean that nokerninfo is on by default? Do we have a reason to
do that?

As of this patch we require icanon and iexten to be set for the message
to be composed and printed. An experiment shows PTY encapsulation
programs like openssh turn off both those flags on the tty they run on
before they take control (contrary to what has been said in LWN), so
they are unimpacted.

The termios(3) page from man-pages states:
   Raw mode
       cfmakeraw() sets the terminal to something like the "raw"  mode
       of  the old Version 7 terminal driver: input is available char‐
       acter by character, echoing is disabled, and all  special  pro‐
       cessing  of  terminal  input and output characters is disabled.
       The terminal attributes are set as follows:

           termios_p->c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
                           | INLCR | IGNCR | ICRNL | IXON);
           termios_p->c_oflag &= ~OPOST;
           termios_p->c_lflag &= ~(ECHO | ECHONL | ICANON | ISIG | IEXTEN);
           termios_p->c_cflag &= ~(CSIZE | PARENB);
           termios_p->c_cflag |= CS8;

So any program which uses this API effectively turns off kerninfo as
implemented here.

There are 2 ways n_tty_status() can be called as of this patch: either
from inside n_tty or via TIOCSTAT. The first path can't be taken on ttys
whose ldisc is not N_TTY, the second path we can fix as proposed in the
comment to the TIOCSTAT hunk if needed. IOW, we can make this safe for
device drivers.

Given all this, is there any other reason to enable nokerninfo (i. e.
disable status message) by default?

>  	.c_cc = INIT_C_CC,
>  	.c_ispeed = 38400,
>  	.c_ospeed = 38400,
> diff --git a/include/linux/tty.h b/include/linux/tty.h
> index cbe5d535a69d..2e483708608c 100644
> --- a/include/linux/tty.h
> +++ b/include/linux/tty.h
> @@ -49,6 +49,7 @@
>  #define WERASE_CHAR(tty) ((tty)->termios.c_cc[VWERASE])
>  #define LNEXT_CHAR(tty)	((tty)->termios.c_cc[VLNEXT])
>  #define EOL2_CHAR(tty) ((tty)->termios.c_cc[VEOL2])
> +#define STATUS_CHAR(tty) ((tty)->termios.c_cc[VSTATUS])
>  
>  #define _I_FLAG(tty, f)	((tty)->termios.c_iflag & (f))
>  #define _O_FLAG(tty, f)	((tty)->termios.c_oflag & (f))
> @@ -114,6 +115,7 @@
>  #define L_PENDIN(tty)	_L_FLAG((tty), PENDIN)
>  #define L_IEXTEN(tty)	_L_FLAG((tty), IEXTEN)
>  #define L_EXTPROC(tty)	_L_FLAG((tty), EXTPROC)
> +#define L_NOKERNINFO(tty) _L_FLAG((tty), NOKERNINFO)
>  
>  struct device;
>  struct signal_struct;
> @@ -389,6 +391,9 @@ extern void __init n_tty_init(void);
>  static inline void n_tty_init(void) { }
>  #endif
>  
> +/* n_tty_status.c */
> +size_t n_tty_get_status(struct tty_struct *tty, char *msg, size_t msglen);
> +
>  /* tty_audit.c */
>  #ifdef CONFIG_AUDIT
>  extern void tty_audit_exit(void);
> -- 
> 2.30.2

Thanks!
Arseny Maslennikov Feb. 6, 2022, 10:44 p.m. UTC | #3
On Mon, Feb 07, 2022 at 12:25:21AM +0300, Arseny Maslennikov wrote:
> On Sun, Feb 06, 2022 at 07:48:54AM -0800, Walt Drummond wrote:
> > @@ -2430,6 +2459,11 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
> >  			retval = read_cnt(ldata);
> >  		up_write(&tty->termios_rwsem);
> >  		return put_user(retval, (unsigned int __user *) arg);
> > +	case TIOCSTAT:
> 
> Perhaps we want to guard this (example pseudocode follows):
> 
> 		if (*our ldisc is not n_tty*)
> 			return an error like -ENOTTY;
> 
> ...since kerninfo is useless for non-UI ttys, e. g. serial device
> drivers, and this ioctl could mess them up if this code path can be
> taken. (I have not verified this kind of breakage is possible.) Please
> see the complete rationale below, this paragraph is an illustrational
> note for it.

Oh wait, this *is* n_tty_ioctl(), so the ioctl is n_tty-specific. This
makes the case below even clearer.
I've been clumsy, sorry about that.

> > diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
> > index 6616d4a0d41d..f2f4f48ea502 100644
> > --- a/drivers/tty/tty_io.c
> > +++ b/drivers/tty/tty_io.c
> > @@ -125,7 +125,7 @@ struct ktermios tty_std_termios = {	/* for the benefit of tty drivers  */
> >  	.c_oflag = OPOST | ONLCR,
> >  	.c_cflag = B38400 | CS8 | CREAD | HUPCL,
> >  	.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
> > -		   ECHOCTL | ECHOKE | IEXTEN,
> > +		   ECHOCTL | ECHOKE | IEXTEN | NOKERNINFO,
> 
> Does this mean that nokerninfo is on by default? Do we have a reason to
> do that?
> 
> As of this patch we require icanon and iexten to be set for the message
> to be composed and printed. An experiment shows PTY encapsulation
> programs like openssh turn off both those flags on the tty they run on
> before they take control (contrary to what has been said in LWN), so
> they are unimpacted.
> 
> The termios(3) page from man-pages states:
>    Raw mode
>        cfmakeraw() sets the terminal to something like the "raw"  mode
>        of  the old Version 7 terminal driver: input is available char‐
>        acter by character, echoing is disabled, and all  special  pro‐
>        cessing  of  terminal  input and output characters is disabled.
>        The terminal attributes are set as follows:
> 
>            termios_p->c_iflag &= ~(IGNBRK | BRKINT | PARMRK | ISTRIP
>                            | INLCR | IGNCR | ICRNL | IXON);
>            termios_p->c_oflag &= ~OPOST;
>            termios_p->c_lflag &= ~(ECHO | ECHONL | ICANON | ISIG | IEXTEN);
>            termios_p->c_cflag &= ~(CSIZE | PARENB);
>            termios_p->c_cflag |= CS8;
> 
> So any program which uses this API effectively turns off kerninfo as
> implemented here.
> 
> There are 2 ways n_tty_status() can be called as of this patch: either
> from inside n_tty or via TIOCSTAT. The first path can't be taken on ttys
> whose ldisc is not N_TTY, ...

The second path is OK as well.

> Given all this, is there any other reason to enable nokerninfo (i. e.
> disable status message) by default?
Jiri Slaby Feb. 7, 2022, 5:38 a.m. UTC | #4
On 06. 02. 22, 16:48, Walt Drummond wrote:
> When triggered by pressing the VSTATUS key or calling the TIOCSTAT
> ioctl, the n_tty line discipline will display a message on the user's
> tty that provides basic information about the system and an
> 'interesting' process in the current foreground process group, eg:
> 
>    load: 0.58  cmd: sleep 744474 [sleeping] 0.36r 0.00u 0.00s 0% 772k
> 
> The status message provides:
>   - System load average
>   - Command name and process id (from the perspective of the session)
>   - Scheduler state
>   - Total wall-clock run time
>   - User space run time
>   - System space run time
>   - Percentage of on-cpu time
>   - Resident set size
> 
> The message is only displayed when the tty has the VSTATUS character
> set, the local flags ICANON and IEXTEN are enabled and NOKERNINFO is
> disabled; it is always displayed when TIOCSTAT is called regardless of
> tty settings.
> 
> Signed-off-by: Walt Drummond <walt@drummond.us>
> ---

It looks like my comments were addressed. However you did not document 
the chances since v1 here. IOW, [v2] tag missing here.

And please add the CCs I added last time, so that relevant people still 
can comment.

thanks,
diff mbox series

Patch

diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile
index a2bd75fbaaa4..3539d7ab77e5 100644
--- a/drivers/tty/Makefile
+++ b/drivers/tty/Makefile
@@ -2,7 +2,7 @@ 
 obj-$(CONFIG_TTY)		+= tty_io.o n_tty.o tty_ioctl.o tty_ldisc.o \
 				   tty_buffer.o tty_port.o tty_mutex.o \
 				   tty_ldsem.o tty_baudrate.o tty_jobctrl.o \
-				   n_null.o
+				   n_null.o n_tty_status.o
 obj-$(CONFIG_LEGACY_PTYS)	+= pty.o
 obj-$(CONFIG_UNIX98_PTYS)	+= pty.o
 obj-$(CONFIG_AUDIT)		+= tty_audit.o
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 64a058a4c63b..fd70efc333d7 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -80,6 +80,7 @@ 
 #define ECHO_BLOCK		256
 #define ECHO_DISCARD_WATERMARK	N_TTY_BUF_SIZE - (ECHO_BLOCK + 32)
 
+#define STATUS_LINE_LEN 160   /* tty status line will truncate at this length */
 
 #undef N_TTY_TRACE
 #ifdef N_TTY_TRACE
@@ -127,6 +128,8 @@  struct n_tty_data {
 	struct mutex output_lock;
 };
 
+static void n_tty_status(struct tty_struct *tty);
+
 #define MASK(x) ((x) & (N_TTY_BUF_SIZE - 1))
 
 static inline size_t read_cnt(struct n_tty_data *ldata)
@@ -1334,6 +1337,11 @@  static void n_tty_receive_char_special(struct tty_struct *tty, unsigned char c)
 			commit_echoes(tty);
 			return;
 		}
+		if (c == STATUS_CHAR(tty) && L_IEXTEN(tty)) {
+			if (!L_NOKERNINFO(tty))
+				n_tty_status(tty);
+			return;
+		}
 		if (c == '\n') {
 			if (L_ECHO(tty) || L_ECHONL(tty)) {
 				echo_char_raw('\n', ldata);
@@ -1763,6 +1771,7 @@  static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 			set_bit(EOF_CHAR(tty), ldata->char_map);
 			set_bit('\n', ldata->char_map);
 			set_bit(EOL_CHAR(tty), ldata->char_map);
+			set_bit(STATUS_CHAR(tty), ldata->char_map);
 			if (L_IEXTEN(tty)) {
 				set_bit(WERASE_CHAR(tty), ldata->char_map);
 				set_bit(LNEXT_CHAR(tty), ldata->char_map);
@@ -2413,6 +2422,26 @@  static unsigned long inq_canon(struct n_tty_data *ldata)
 	return nr;
 }
 
+static void n_tty_status(struct tty_struct *tty)
+{
+	struct n_tty_data *ldata = tty->disc_data;
+	char *msg;
+	size_t len;
+
+	msg = kzalloc(STATUS_LINE_LEN, GFP_KERNEL);
+
+	if (ldata->column != 0) {
+		*msg = '\n';
+		len = n_tty_get_status(tty, msg + 1, STATUS_LINE_LEN - 1);
+	} else {
+		len = n_tty_get_status(tty, msg, STATUS_LINE_LEN);
+	}
+
+	do_n_tty_write(tty, NULL, msg, len);
+
+	kfree(msg);
+}
+
 static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
@@ -2430,6 +2459,11 @@  static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
 			retval = read_cnt(ldata);
 		up_write(&tty->termios_rwsem);
 		return put_user(retval, (unsigned int __user *) arg);
+	case TIOCSTAT:
+		down_read(&tty->termios_rwsem);
+		n_tty_status(tty);
+		up_read(&tty->termios_rwsem);
+		return 0;
 	default:
 		return n_tty_ioctl_helper(tty, file, cmd, arg);
 	}
diff --git a/drivers/tty/n_tty_status.c b/drivers/tty/n_tty_status.c
new file mode 100644
index 000000000000..f0e053651368
--- /dev/null
+++ b/drivers/tty/n_tty_status.c
@@ -0,0 +1,181 @@ 
+// SPDX-License-Identifier: GPL-1.0+
+/*
+ * n_tty_status.c --- implements VSTATUS and TIOCSTAT from BSD
+ *
+ * Display a basic status message containing information about the
+ * foreground process and system load on the users tty, triggered by
+ * the VSTATUS character or TIOCSTAT. Ex,
+ *
+ *   load: 14.11  cmd: tcsh 19623 [running] 185756.62r 88.00u 17.50s 0% 4260k
+ *
+ */
+
+#include <linux/tty.h>
+#include <linux/mm.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/mm.h>
+
+/* Convert nanoseconds into centiseconds */
+static inline long ns_to_cs(long l)
+{
+	return l / (NSEC_PER_MSEC * 10);
+
+}
+
+/* We want the pid from the context of session */
+static inline pid_t __get_pid(struct task_struct *tsk, struct tty_struct *tty)
+{
+	struct pid_namespace *ns;
+
+	spin_lock_irq(&tty->ctrl.lock);
+	ns = ns_of_pid(tty->ctrl.session);
+	spin_unlock_irq(&tty->ctrl.lock);
+
+	return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
+}
+
+/* This is the same odd "bitmap" described in
+ * fs/proc/array.c:get_task_state().  Consistency with standard
+ * implementations of VSTATUS requires a different set of state
+ * names.
+ */
+static const char * const task_state_name_array[] = {
+	"running",
+	"sleeping",
+	"disk sleep",
+	"stopped",
+	"tracing stop",
+	"dead",
+	"zombie",
+	"parked",
+	"idle",
+};
+
+static inline const char *get_task_state_name(struct task_struct *tsk)
+{
+	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_name_array));
+	return task_state_name_array[task_state_index(tsk)];
+}
+
+static inline struct task_struct *compare(struct task_struct *new,
+					  struct task_struct *old)
+{
+	unsigned int ostate, nstate;
+
+	if (old == NULL)
+		return new;
+
+	ostate = task_state_index(old);
+	nstate = task_state_index(new);
+
+	if (ostate == nstate) {
+		if (old->start_time > new->start_time)
+			return old;
+		return new;
+	}
+
+	if (ostate < nstate)
+		return old;
+
+	return new;
+}
+
+static struct task_struct *pick_process(struct tty_struct *tty)
+{
+	struct task_struct *new, *winner = NULL;
+
+	read_lock(&tasklist_lock);
+	spin_lock_irq(&tty->ctrl.lock);
+
+	do_each_pid_task(tty->ctrl.pgrp, PIDTYPE_PGID, new) {
+		winner = compare(new, winner);
+	} while_each_pid_task(tty->ctrl.pgrp, PIDTYPE_PGID, new);
+
+	spin_unlock_irq(&tty->ctrl.lock);
+
+	if (winner)
+		winner = get_task_struct(winner);
+
+	read_unlock(&tasklist_lock);
+
+	return winner;
+}
+
+size_t n_tty_get_status(struct tty_struct *tty, char *msg, size_t msglen)
+{
+	struct task_struct *p;
+	struct mm_struct *mm;
+	struct rusage rusage;
+	unsigned long loadavg[3];
+	uint64_t pcpu, cputime, wallclock;
+	struct timespec64 utime, stime, rtime;
+	char tname[TASK_COMM_LEN];
+	unsigned int pid;
+	char *state;
+	unsigned long rss = 0;
+	size_t len = 0;
+
+	get_avenrun(loadavg, FIXED_1/200, 0);
+	len = scnprintf(msg + len, msglen - len, "load: %lu.%02lu  ",
+			LOAD_INT(loadavg[0]), LOAD_FRAC(loadavg[0]));
+
+	if (tty->ctrl.session == NULL) {
+		len += scnprintf(msg + len, msglen - len,
+				 "not a controlling terminal\n");
+		goto out;
+	}
+
+	if (tty->ctrl.pgrp == NULL) {
+		len += scnprintf(msg + len, msglen - len,
+				 "no foreground process group\n");
+		goto out;
+	}
+
+	/* Note that if p is refcounted */
+	p = pick_process(tty);
+	if (p == NULL) {
+		len += scnprintf(msg + len, msglen - len,
+				 "empty foreground process group\n");
+		goto out;
+	}
+
+	mm = get_task_mm(p);
+	if (mm) {
+		rss = get_mm_rss(mm) * PAGE_SIZE / 1024;
+		mmput(mm);
+	}
+	get_task_comm(tname, p);
+	getrusage(p, RUSAGE_BOTH, &rusage);
+	pid = __get_pid(p, tty);
+	state = (char *) get_task_state_name(p);
+	wallclock = ktime_get_ns() - p->start_time;
+	put_task_struct(p);
+
+	/* After this point, any of the information we have on p might
+	 * become stale.  It's OK if the status message is a little bit
+	 * lossy.
+	 */
+
+	utime.tv_sec = rusage.ru_utime.tv_sec;
+	utime.tv_nsec = rusage.ru_utime.tv_usec * NSEC_PER_USEC;
+	stime.tv_sec = rusage.ru_stime.tv_sec;
+	stime.tv_nsec = rusage.ru_stime.tv_usec * NSEC_PER_USEC;
+	rtime = ns_to_timespec64(wallclock);
+
+	cputime = timespec64_to_ns(&utime) + timespec64_to_ns(&stime);
+	pcpu = div64_u64(cputime * 100, wallclock);
+
+	len += scnprintf(msg + len, msglen - len,
+			 /* task, PID, task state */
+			 "cmd: %s %d [%s] "
+			 /* rtime,    utime,      stime,      %cpu   rss */
+			 "%llu.%02lur %llu.%02luu %llu.%02lus %llu%% %luk\n",
+			 tname,	pid, state,
+			 rtime.tv_sec, ns_to_cs(rtime.tv_nsec),
+			 utime.tv_sec, ns_to_cs(utime.tv_nsec),
+			 stime.tv_sec, ns_to_cs(stime.tv_nsec),
+			 pcpu, rss);
+
+out:
+	return len;
+}
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 6616d4a0d41d..f2f4f48ea502 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -125,7 +125,7 @@  struct ktermios tty_std_termios = {	/* for the benefit of tty drivers  */
 	.c_oflag = OPOST | ONLCR,
 	.c_cflag = B38400 | CS8 | CREAD | HUPCL,
 	.c_lflag = ISIG | ICANON | ECHO | ECHOE | ECHOK |
-		   ECHOCTL | ECHOKE | IEXTEN,
+		   ECHOCTL | ECHOKE | IEXTEN | NOKERNINFO,
 	.c_cc = INIT_C_CC,
 	.c_ispeed = 38400,
 	.c_ospeed = 38400,
diff --git a/include/linux/tty.h b/include/linux/tty.h
index cbe5d535a69d..2e483708608c 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -49,6 +49,7 @@ 
 #define WERASE_CHAR(tty) ((tty)->termios.c_cc[VWERASE])
 #define LNEXT_CHAR(tty)	((tty)->termios.c_cc[VLNEXT])
 #define EOL2_CHAR(tty) ((tty)->termios.c_cc[VEOL2])
+#define STATUS_CHAR(tty) ((tty)->termios.c_cc[VSTATUS])
 
 #define _I_FLAG(tty, f)	((tty)->termios.c_iflag & (f))
 #define _O_FLAG(tty, f)	((tty)->termios.c_oflag & (f))
@@ -114,6 +115,7 @@ 
 #define L_PENDIN(tty)	_L_FLAG((tty), PENDIN)
 #define L_IEXTEN(tty)	_L_FLAG((tty), IEXTEN)
 #define L_EXTPROC(tty)	_L_FLAG((tty), EXTPROC)
+#define L_NOKERNINFO(tty) _L_FLAG((tty), NOKERNINFO)
 
 struct device;
 struct signal_struct;
@@ -389,6 +391,9 @@  extern void __init n_tty_init(void);
 static inline void n_tty_init(void) { }
 #endif
 
+/* n_tty_status.c */
+size_t n_tty_get_status(struct tty_struct *tty, char *msg, size_t msglen);
+
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
 extern void tty_audit_exit(void);