diff mbox series

[v3] proc,fcntl: introduce F_SET_DESCRIPTION

Message ID 20200725052236.4062-1-kalou@tfz.net (mailing list archive)
State New, archived
Headers show
Series [v3] proc,fcntl: introduce F_SET_DESCRIPTION | expand

Commit Message

Pascal Bouchareine July 25, 2020, 5:22 a.m. UTC
This command attaches a description to a file descriptor for
troubleshooting purposes. The free string is displayed in the
process fdinfo file for that fd /proc/pid/fdinfo/fd.

One intended usage is to allow processes to self-document sockets
for netstat and friends to report

Signed-off-by: Pascal Bouchareine <kalou@tfz.net>
---
 Documentation/filesystems/proc.rst |  3 +++
 fs/fcntl.c                         | 19 +++++++++++++++++++
 fs/file_table.c                    |  2 ++
 fs/proc/fd.c                       |  5 +++++
 include/linux/fs.h                 |  3 +++
 include/uapi/linux/fcntl.h         |  5 +++++
 6 files changed, 37 insertions(+)

Comments

Alexey Dobriyan July 27, 2020, 2:21 p.m. UTC | #1
On Fri, Jul 24, 2020 at 10:22:36PM -0700, Pascal Bouchareine wrote:
> This command attaches a description to a file descriptor for
> troubleshooting purposes. The free string is displayed in the
> process fdinfo file for that fd /proc/pid/fdinfo/fd.
> 
> One intended usage is to allow processes to self-document sockets
> for netstat and friends to report

> +static long fcntl_set_description(struct file *file, char __user *desc)
> +{
> +	char *d;
> +
> +	d = strndup_user(desc, MAX_FILE_DESC_SIZE);

This should be kmem accounted because allocation is persistent.
To make things more entertaining, strndup_user() doesn't have gfp_t argument.

> +	if (IS_ERR(d))
> +		return PTR_ERR(d);
> +
> +	spin_lock(&file->f_lock);
> +	kfree(file->f_description);
> +	file->f_description = d;
> +	spin_unlock(&file->f_lock);

Generally kfree under spinlock is not good idea.
You can replace the pointer and free without spinlock.

> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -980,6 +980,9 @@ struct file {
>  	struct address_space	*f_mapping;
>  	errseq_t		f_wb_err;
>  	errseq_t		f_sb_err; /* for syncfs */
> +
> +#define MAX_FILE_DESC_SIZE 256
> +	char                    *f_description;

struct file is nicely aligned to 256 bytes on distro configs.
Will this break everything?

	$ cat /sys/kernel/slab/filp/object_size
Pascal Bouchareine July 28, 2020, 1:39 a.m. UTC | #2
Thanks for reviewing, I added some questions inline below

On Mon, Jul 27, 2020 at 7:21 AM Alexey Dobriyan <adobriyan@gmail.com> wrote:
> > +     d = strndup_user(desc, MAX_FILE_DESC_SIZE);
>
> This should be kmem accounted because allocation is persistent.
> To make things more entertaining, strndup_user() doesn't have gfp_t argument.

I had to look it up so I might be very far from it, but is that
__GFP_ACCOUNT and would it be correct to assume memdup_user() should
use it unconditionally?

Otherwise my simple option would be to implement the logic in the
set_description, but the benefit would be very local.

Please let me know what you think is best, happy to read more doc if
there's a more productive way to address that

>
> > +     if (IS_ERR(d))
> > +             return PTR_ERR(d);
> > +
> > +     spin_lock(&file->f_lock);
> > +     kfree(file->f_description);
> > +     file->f_description = d;
> > +     spin_unlock(&file->f_lock);
>
> Generally kfree under spinlock is not good idea.
> You can replace the pointer and free without spinlock.

Sorry I also need some pointers here - do you suggest I move the kfree
out of the locked section or that there is a safe way other than
spinlock?

> struct file is nicely aligned to 256 bytes on distro configs.
> Will this break everything?
>
>         $ cat /sys/kernel/slab/filp/object_size

Indeed on the config I'm using here this jumped to 264 bytes

Would it be better to move this to the inode struct? I don't know the
implications of this - any other option?

Thanks!
Pascal Bouchareine July 28, 2020, 6:25 a.m. UTC | #3
On Mon, Jul 27, 2020 at 6:39 PM Pascal Bouchareine <kalou@tfz.net> wrote:
> > struct file is nicely aligned to 256 bytes on distro configs.
> > Will this break everything?
> >
> >         $ cat /sys/kernel/slab/filp/object_size
>
> Indeed on the config I'm using here this jumped to 264 bytes
>
> Would it be better to move this to the inode struct? I don't know the
> implications of this - any other option?

Well it doesn't actually make much sense to share that at that level

However the above 264 is building from 5.4 without f_sb_err, so I
think master already passed the 256 bytes too ?
diff mbox series

Patch

diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 996f3cfe7030..ae8045650836 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -1918,6 +1918,9 @@  A typical output is::
 	flags:	0100002
 	mnt_id:	19
 
+An optional 'desc' is set if the process documented its usage of
+the file via the fcntl command F_SET_DESCRIPTION.
+
 All locks associated with a file descriptor are shown in its fdinfo too::
 
     lock:       1: FLOCK  ADVISORY  WRITE 359 00:13:11691 0 EOF
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2e4c0fa2074b..c1ef724a906e 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -319,6 +319,22 @@  static long fcntl_rw_hint(struct file *file, unsigned int cmd,
 	}
 }
 
+static long fcntl_set_description(struct file *file, char __user *desc)
+{
+	char *d;
+
+	d = strndup_user(desc, MAX_FILE_DESC_SIZE);
+	if (IS_ERR(d))
+		return PTR_ERR(d);
+
+	spin_lock(&file->f_lock);
+	kfree(file->f_description);
+	file->f_description = d;
+	spin_unlock(&file->f_lock);
+
+	return 0;
+}
+
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
@@ -426,6 +442,9 @@  static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_SET_FILE_RW_HINT:
 		err = fcntl_rw_hint(filp, cmd, arg);
 		break;
+	case F_SET_DESCRIPTION:
+		err = fcntl_set_description(filp, argp);
+		break;
 	default:
 		break;
 	}
diff --git a/fs/file_table.c b/fs/file_table.c
index 656647f9575a..6673a48d2ea1 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -272,6 +272,8 @@  static void __fput(struct file *file)
 	eventpoll_release(file);
 	locks_remove_file(file);
 
+	kfree(file->f_description);
+
 	ima_file_free(file);
 	if (unlikely(file->f_flags & FASYNC)) {
 		if (file->f_op->fasync)
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 81882a13212d..60b3ff971b2b 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -57,6 +57,11 @@  static int seq_show(struct seq_file *m, void *v)
 		   (long long)file->f_pos, f_flags,
 		   real_mount(file->f_path.mnt)->mnt_id);
 
+	spin_lock(&file->f_lock);
+	if (file->f_description)
+		seq_printf(m, "desc:\t%s\n", file->f_description);
+	spin_unlock(&file->f_lock);
+
 	show_fd_locks(m, file, files);
 	if (seq_has_overflowed(m))
 		goto out;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f5abba86107d..09717bfa4e3b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -980,6 +980,9 @@  struct file {
 	struct address_space	*f_mapping;
 	errseq_t		f_wb_err;
 	errseq_t		f_sb_err; /* for syncfs */
+
+#define MAX_FILE_DESC_SIZE 256
+	char                    *f_description;
 } __randomize_layout
   __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
 
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 2f86b2ad6d7e..f86ff6dc45c7 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -55,6 +55,11 @@ 
 #define F_GET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 13)
 #define F_SET_FILE_RW_HINT	(F_LINUX_SPECIFIC_BASE + 14)
 
+/*
+ * Set file description
+ */
+#define F_SET_DESCRIPTION	(F_LINUX_SPECIFIC_BASE + 15)
+
 /*
  * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
  * used to clear any hints previously set.