diff mbox series

[1/3] userfaultfd/sysctl: introduce unprivileged_userfaultfd

Message ID 20190311093701.15734-2-peterx@redhat.com (mailing list archive)
State New, archived
Headers show
Series userfaultfd: allow to forbid unprivileged users | expand

Commit Message

Peter Xu March 11, 2019, 9:36 a.m. UTC
Introduce a new sysctl called "vm.unprivileged_userfaultfd" that can
be used to decide whether userfaultfd syscalls are allowed by
unprivileged users.  It'll allow three modes:

  - disabled: disallow unprivileged users to use uffd

  - enabled:  allow unprivileged users to use uffd

  - kvm:      allow unprivileged users to use uffd only if the user
              had enough permission to open /dev/kvm (this option only
              exists if the kernel turned on KVM).

This patch only introduce the new interface but not yet applied it to
the userfaultfd syscalls, which will be done in the follow up patch.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 fs/userfaultfd.c              | 96 +++++++++++++++++++++++++++++++++++
 include/linux/userfaultfd_k.h |  5 ++
 init/Kconfig                  | 11 ++++
 kernel/sysctl.c               | 11 ++++
 4 files changed, 123 insertions(+)

Comments

Mike Rapoport March 12, 2019, 6:58 a.m. UTC | #1
On Mon, Mar 11, 2019 at 05:36:59PM +0800, Peter Xu wrote:
> Introduce a new sysctl called "vm.unprivileged_userfaultfd" that can
> be used to decide whether userfaultfd syscalls are allowed by
> unprivileged users.  It'll allow three modes:
> 
>   - disabled: disallow unprivileged users to use uffd
> 
>   - enabled:  allow unprivileged users to use uffd
> 
>   - kvm:      allow unprivileged users to use uffd only if the user
>               had enough permission to open /dev/kvm (this option only
>               exists if the kernel turned on KVM).
> 
> This patch only introduce the new interface but not yet applied it to
> the userfaultfd syscalls, which will be done in the follow up patch.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  fs/userfaultfd.c              | 96 +++++++++++++++++++++++++++++++++++
>  include/linux/userfaultfd_k.h |  5 ++
>  init/Kconfig                  | 11 ++++
>  kernel/sysctl.c               | 11 ++++
>  4 files changed, 123 insertions(+)
> 
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index 89800fc7dc9d..c2188464555a 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -29,6 +29,8 @@
>  #include <linux/ioctl.h>
>  #include <linux/security.h>
>  #include <linux/hugetlb.h>
> +#include <linux/sysctl.h>
> +#include <linux/string.h>
> 
>  static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
> 
> @@ -93,6 +95,95 @@ struct userfaultfd_wake_range {
>  	unsigned long len;
>  };
> 
> +enum unprivileged_userfaultfd {
> +	/* Disallow unprivileged users to use userfaultfd syscalls */
> +	UFFD_UNPRIV_DISABLED = 0,
> +	/* Allow unprivileged users to use userfaultfd syscalls */
> +	UFFD_UNPRIV_ENABLED,
> +#if IS_ENABLED(CONFIG_KVM)
> +	/*
> +	 * Allow unprivileged users to use userfaultfd syscalls only
> +	 * if the user had enough permission to open /dev/kvm
> +	 */
> +	UFFD_UNPRIV_KVM,
> +#endif
> +	UFFD_UNPRIV_NUM,
> +};
> +
> +static int unprivileged_userfaultfd __read_mostly;
> +static const char *unprivileged_userfaultfd_str[UFFD_UNPRIV_NUM] = {
> +	"disabled", "enabled",
> +#if IS_ENABLED(CONFIG_KVM)
> +	"kvm",
> +#endif
> +};
> +
> +static int unprivileged_uffd_parse(char *buf, size_t size)
> +{
> +	int i;
> +
> +	for (i = 0; i < UFFD_UNPRIV_NUM; i++) {
> +		if (!strncmp(unprivileged_userfaultfd_str[i], buf, size)) {
> +			unprivileged_userfaultfd = i;
> +			return 0;
> +		}
> +	}
> +
> +	return -EFAULT;
> +}
> +
> +static void unprivileged_uffd_dump(char *buf, size_t size)
> +{
> +	int i;
> +
> +	*buf = 0x00;
> +	for (i = 0; i < UFFD_UNPRIV_NUM; i++) {
> +		if (i == unprivileged_userfaultfd)
> +			strncat(buf, "[", size - strlen(buf));
> +		strncat(buf, unprivileged_userfaultfd_str[i],
> +			size - strlen(buf));
> +		if (i == unprivileged_userfaultfd)
> +			strncat(buf, "]", size - strlen(buf));
> +		strncat(buf, " ", size - strlen(buf));
> +	}
> +
> +}
> +
> +int proc_unprivileged_userfaultfd(struct ctl_table *table, int write,
> +				  void __user *buffer, size_t *lenp,
> +				  loff_t *ppos)
> +{
> +	struct ctl_table tmp_table = { .maxlen = 0 };
> +	int ret;
> +
> +	if (write) {
> +		tmp_table.maxlen = UFFD_UNPRIV_STRLEN;
> +		tmp_table.data = kmalloc(UFFD_UNPRIV_STRLEN, GFP_KERNEL);
> +
> +		ret = proc_dostring(&tmp_table, write, buffer, lenp, ppos);
> +		if (ret)
> +			goto out;
> +
> +		ret = unprivileged_uffd_parse(tmp_table.data,
> +					      UFFD_UNPRIV_STRLEN);
> +	} else {
> +		/* Leave space for "[]" */
> +		int len = UFFD_UNPRIV_STRLEN * UFFD_UNPRIV_NUM + 2;
> +
> +		tmp_table.maxlen = len;
> +		tmp_table.data = kmalloc(len, GFP_KERNEL);
> +
> +		unprivileged_uffd_dump(tmp_table.data, len);
> +
> +		ret = proc_dostring(&tmp_table, write, buffer, lenp, ppos);
> +	}
> +
> +out:
> +	if (tmp_table.data)
> +		kfree(tmp_table.data);
> +	return ret;
> +}
> +
>  static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
>  				     int wake_flags, void *key)
>  {
> @@ -1955,6 +2046,11 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
> 
>  static int __init userfaultfd_init(void)
>  {
> +	char unpriv_uffd[UFFD_UNPRIV_STRLEN] =
> +	    CONFIG_USERFAULTFD_UNPRIVILEGED_DEFAULT;
> +
> +	unprivileged_uffd_parse(unpriv_uffd, sizeof(unpriv_uffd));
> +
>  	userfaultfd_ctx_cachep = kmem_cache_create("userfaultfd_ctx_cache",
>  						sizeof(struct userfaultfd_ctx),
>  						0,
> diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
> index 37c9eba75c98..f53bc02ccffc 100644
> --- a/include/linux/userfaultfd_k.h
> +++ b/include/linux/userfaultfd_k.h
> @@ -28,6 +28,11 @@
>  #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
>  #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
> 
> +#define UFFD_UNPRIV_STRLEN 16
> +int proc_unprivileged_userfaultfd(struct ctl_table *table, int write,
> +				  void __user *buffer, size_t *lenp,
> +				  loff_t *ppos);
> +
>  extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
> 
>  extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
> diff --git a/init/Kconfig b/init/Kconfig
> index c9386a365eea..d90caa4fed17 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1512,6 +1512,17 @@ config USERFAULTFD
>  	  Enable the userfaultfd() system call that allows to intercept and
>  	  handle page faults in userland.
> 
> +config USERFAULTFD_UNPRIVILEGED_DEFAULT
> +        string "Default behavior for unprivileged userfault syscalls"
> +        depends on USERFAULTFD
> +        default "disabled"
> +        help
> +          Set this to "enabled" to allow userfaultfd syscalls from
> +          unprivileged users.  Set this to "disabled" to forbid
> +          userfaultfd syscalls from unprivileged users.  Set this to
> +          "kvm" to forbid unpriviledged users but still allow users
> +          who had enough permission to open /dev/kvm.

I'd phrase it a bit differently:

This option controls privilege level required to execute userfaultfd
system call.

Set this to "enabled" to allow userfaultfd system call from unprivileged
users. 
Set this to "disabled" to allow userfaultfd system call only for users who
have ptrace capability.
Set this to "kvm" to restrict userfaultfd system call usage to users with
permissions to open "/dev/kvm".
 
> +
>  config ARCH_HAS_MEMBARRIER_CALLBACKS
>  	bool
> 
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 7578e21a711b..5dc9f3d283dd 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -96,6 +96,9 @@
>  #ifdef CONFIG_LOCKUP_DETECTOR
>  #include <linux/nmi.h>
>  #endif
> +#ifdef CONFIG_USERFAULTFD
> +#include <linux/userfaultfd_k.h>
> +#endif
> 
>  #if defined(CONFIG_SYSCTL)
> 
> @@ -1704,6 +1707,14 @@ static struct ctl_table vm_table[] = {
>  		.extra1		= (void *)&mmap_rnd_compat_bits_min,
>  		.extra2		= (void *)&mmap_rnd_compat_bits_max,
>  	},
> +#endif
> +#ifdef CONFIG_USERFAULTFD
> +	{
> +		.procname	= "unprivileged_userfaultfd",
> +		.maxlen		= UFFD_UNPRIV_STRLEN,
> +		.mode		= 0644,
> +		.proc_handler	= proc_unprivileged_userfaultfd,
> +	},
>  #endif
>  	{ }
>  };
> -- 
> 2.17.1
>
Peter Xu March 12, 2019, 12:26 p.m. UTC | #2
On Tue, Mar 12, 2019 at 08:58:30AM +0200, Mike Rapoport wrote:

[...]

> > +config USERFAULTFD_UNPRIVILEGED_DEFAULT
> > +        string "Default behavior for unprivileged userfault syscalls"
> > +        depends on USERFAULTFD
> > +        default "disabled"
> > +        help
> > +          Set this to "enabled" to allow userfaultfd syscalls from
> > +          unprivileged users.  Set this to "disabled" to forbid
> > +          userfaultfd syscalls from unprivileged users.  Set this to
> > +          "kvm" to forbid unpriviledged users but still allow users
> > +          who had enough permission to open /dev/kvm.
> 
> I'd phrase it a bit differently:
> 
> This option controls privilege level required to execute userfaultfd
                      ^
                      +---- add " the default"?

> system call.
> 
> Set this to "enabled" to allow userfaultfd system call from unprivileged
> users. 
> Set this to "disabled" to allow userfaultfd system call only for users who
> have ptrace capability.
> Set this to "kvm" to restrict userfaultfd system call usage to users with
                                                                      ^
                         add " who have ptrace capability, or" -------+

> permissions to open "/dev/kvm".

I think your version is better than mine, but I'd like to confirm
about above two extra changes before I squash them into the patch. :)

Thanks!
Mike Rapoport March 12, 2019, 1:53 p.m. UTC | #3
On Tue, Mar 12, 2019 at 08:26:33PM +0800, Peter Xu wrote:
> On Tue, Mar 12, 2019 at 08:58:30AM +0200, Mike Rapoport wrote:
> 
> [...]
> 
> > > +config USERFAULTFD_UNPRIVILEGED_DEFAULT
> > > +        string "Default behavior for unprivileged userfault syscalls"
> > > +        depends on USERFAULTFD
> > > +        default "disabled"
> > > +        help
> > > +          Set this to "enabled" to allow userfaultfd syscalls from
> > > +          unprivileged users.  Set this to "disabled" to forbid
> > > +          userfaultfd syscalls from unprivileged users.  Set this to
> > > +          "kvm" to forbid unpriviledged users but still allow users
> > > +          who had enough permission to open /dev/kvm.
> > 
> > I'd phrase it a bit differently:
> > 
> > This option controls privilege level required to execute userfaultfd
>                       ^
>                       +---- add " the default"?
> 
> > system call.
> > 
> > Set this to "enabled" to allow userfaultfd system call from unprivileged
> > users. 
> > Set this to "disabled" to allow userfaultfd system call only for users who
> > have ptrace capability.
> > Set this to "kvm" to restrict userfaultfd system call usage to users with
>                                                                       ^
>                          add " who have ptrace capability, or" -------+
> 
> > permissions to open "/dev/kvm".
> 
> I think your version is better than mine, but I'd like to confirm
> about above two extra changes before I squash them into the patch. :)

I like your changes.
 
> Thanks!
> 
> -- 
> Peter Xu
>
diff mbox series

Patch

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 89800fc7dc9d..c2188464555a 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -29,6 +29,8 @@ 
 #include <linux/ioctl.h>
 #include <linux/security.h>
 #include <linux/hugetlb.h>
+#include <linux/sysctl.h>
+#include <linux/string.h>
 
 static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
 
@@ -93,6 +95,95 @@  struct userfaultfd_wake_range {
 	unsigned long len;
 };
 
+enum unprivileged_userfaultfd {
+	/* Disallow unprivileged users to use userfaultfd syscalls */
+	UFFD_UNPRIV_DISABLED = 0,
+	/* Allow unprivileged users to use userfaultfd syscalls */
+	UFFD_UNPRIV_ENABLED,
+#if IS_ENABLED(CONFIG_KVM)
+	/*
+	 * Allow unprivileged users to use userfaultfd syscalls only
+	 * if the user had enough permission to open /dev/kvm
+	 */
+	UFFD_UNPRIV_KVM,
+#endif
+	UFFD_UNPRIV_NUM,
+};
+
+static int unprivileged_userfaultfd __read_mostly;
+static const char *unprivileged_userfaultfd_str[UFFD_UNPRIV_NUM] = {
+	"disabled", "enabled",
+#if IS_ENABLED(CONFIG_KVM)
+	"kvm",
+#endif
+};
+
+static int unprivileged_uffd_parse(char *buf, size_t size)
+{
+	int i;
+
+	for (i = 0; i < UFFD_UNPRIV_NUM; i++) {
+		if (!strncmp(unprivileged_userfaultfd_str[i], buf, size)) {
+			unprivileged_userfaultfd = i;
+			return 0;
+		}
+	}
+
+	return -EFAULT;
+}
+
+static void unprivileged_uffd_dump(char *buf, size_t size)
+{
+	int i;
+
+	*buf = 0x00;
+	for (i = 0; i < UFFD_UNPRIV_NUM; i++) {
+		if (i == unprivileged_userfaultfd)
+			strncat(buf, "[", size - strlen(buf));
+		strncat(buf, unprivileged_userfaultfd_str[i],
+			size - strlen(buf));
+		if (i == unprivileged_userfaultfd)
+			strncat(buf, "]", size - strlen(buf));
+		strncat(buf, " ", size - strlen(buf));
+	}
+
+}
+
+int proc_unprivileged_userfaultfd(struct ctl_table *table, int write,
+				  void __user *buffer, size_t *lenp,
+				  loff_t *ppos)
+{
+	struct ctl_table tmp_table = { .maxlen = 0 };
+	int ret;
+
+	if (write) {
+		tmp_table.maxlen = UFFD_UNPRIV_STRLEN;
+		tmp_table.data = kmalloc(UFFD_UNPRIV_STRLEN, GFP_KERNEL);
+
+		ret = proc_dostring(&tmp_table, write, buffer, lenp, ppos);
+		if (ret)
+			goto out;
+
+		ret = unprivileged_uffd_parse(tmp_table.data,
+					      UFFD_UNPRIV_STRLEN);
+	} else {
+		/* Leave space for "[]" */
+		int len = UFFD_UNPRIV_STRLEN * UFFD_UNPRIV_NUM + 2;
+
+		tmp_table.maxlen = len;
+		tmp_table.data = kmalloc(len, GFP_KERNEL);
+
+		unprivileged_uffd_dump(tmp_table.data, len);
+
+		ret = proc_dostring(&tmp_table, write, buffer, lenp, ppos);
+	}
+
+out:
+	if (tmp_table.data)
+		kfree(tmp_table.data);
+	return ret;
+}
+
 static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode,
 				     int wake_flags, void *key)
 {
@@ -1955,6 +2046,11 @@  SYSCALL_DEFINE1(userfaultfd, int, flags)
 
 static int __init userfaultfd_init(void)
 {
+	char unpriv_uffd[UFFD_UNPRIV_STRLEN] =
+	    CONFIG_USERFAULTFD_UNPRIVILEGED_DEFAULT;
+
+	unprivileged_uffd_parse(unpriv_uffd, sizeof(unpriv_uffd));
+
 	userfaultfd_ctx_cachep = kmem_cache_create("userfaultfd_ctx_cache",
 						sizeof(struct userfaultfd_ctx),
 						0,
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 37c9eba75c98..f53bc02ccffc 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -28,6 +28,11 @@ 
 #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
 
+#define UFFD_UNPRIV_STRLEN 16
+int proc_unprivileged_userfaultfd(struct ctl_table *table, int write,
+				  void __user *buffer, size_t *lenp,
+				  loff_t *ppos);
+
 extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
 
 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
diff --git a/init/Kconfig b/init/Kconfig
index c9386a365eea..d90caa4fed17 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1512,6 +1512,17 @@  config USERFAULTFD
 	  Enable the userfaultfd() system call that allows to intercept and
 	  handle page faults in userland.
 
+config USERFAULTFD_UNPRIVILEGED_DEFAULT
+        string "Default behavior for unprivileged userfault syscalls"
+        depends on USERFAULTFD
+        default "disabled"
+        help
+          Set this to "enabled" to allow userfaultfd syscalls from
+          unprivileged users.  Set this to "disabled" to forbid
+          userfaultfd syscalls from unprivileged users.  Set this to
+          "kvm" to forbid unpriviledged users but still allow users
+          who had enough permission to open /dev/kvm.
+
 config ARCH_HAS_MEMBARRIER_CALLBACKS
 	bool
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7578e21a711b..5dc9f3d283dd 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -96,6 +96,9 @@ 
 #ifdef CONFIG_LOCKUP_DETECTOR
 #include <linux/nmi.h>
 #endif
+#ifdef CONFIG_USERFAULTFD
+#include <linux/userfaultfd_k.h>
+#endif
 
 #if defined(CONFIG_SYSCTL)
 
@@ -1704,6 +1707,14 @@  static struct ctl_table vm_table[] = {
 		.extra1		= (void *)&mmap_rnd_compat_bits_min,
 		.extra2		= (void *)&mmap_rnd_compat_bits_max,
 	},
+#endif
+#ifdef CONFIG_USERFAULTFD
+	{
+		.procname	= "unprivileged_userfaultfd",
+		.maxlen		= UFFD_UNPRIV_STRLEN,
+		.mode		= 0644,
+		.proc_handler	= proc_unprivileged_userfaultfd,
+	},
 #endif
 	{ }
 };