diff mbox series

[03/24] net: add a new sockptr_t type

Message ID 20200720124737.118617-4-hch@lst.de (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series [01/24] bpfilter: reject kernel addresses | expand

Commit Message

Christoph Hellwig July 20, 2020, 12:47 p.m. UTC
Add a uptr_t type that can hold a pointer to either a user or kernel
memory region, and simply helpers to copy to and from it.  For
architectures like x86 that have non-overlapping user and kernel
address space it just is a union and uses a TASK_SIZE check to
select the proper copy routine.  For architectures with overlapping
address spaces a flag to indicate the address space is used instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/sockptr.h | 121 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 include/linux/sockptr.h

Comments

Eric Biggers July 20, 2020, 4:37 p.m. UTC | #1
On Mon, Jul 20, 2020 at 02:47:16PM +0200, Christoph Hellwig wrote:
> Add a uptr_t type that can hold a pointer to either a user or kernel
> memory region, and simply helpers to copy to and from it.  For
> architectures like x86 that have non-overlapping user and kernel
> address space it just is a union and uses a TASK_SIZE check to
> select the proper copy routine.  For architectures with overlapping
> address spaces a flag to indicate the address space is used instead.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  include/linux/sockptr.h | 121 ++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 121 insertions(+)
>  create mode 100644 include/linux/sockptr.h
> 
> diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
> new file mode 100644
> index 00000000000000..e41dfa52555dec
> --- /dev/null
> +++ b/include/linux/sockptr.h
> @@ -0,0 +1,121 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2020 Christoph Hellwig.
> + *
> + * Support for "universal" pointers that can point to either kernel or userspace
> + * memory.
> + */
> +#ifndef _LINUX_SOCKPTR_H
> +#define _LINUX_SOCKPTR_H
> +
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +
> +#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> +typedef union {
> +	void		*kernel;
> +	void __user	*user;
> +} sockptr_t;
> +
> +static inline bool sockptr_is_kernel(sockptr_t sockptr)
> +{
> +	return (unsigned long)sockptr.kernel >= TASK_SIZE;
> +}
> +
> +static inline sockptr_t KERNEL_SOCKPTR(void *p)
> +{
> +	return (sockptr_t) { .kernel = p };
> +}
> +#else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
> +typedef struct {
> +	union {
> +		void		*kernel;
> +		void __user	*user;
> +	};
> +	bool		is_kernel : 1;
> +} sockptr_t;
> +
> +static inline bool sockptr_is_kernel(sockptr_t sockptr)
> +{
> +	return sockptr.is_kernel;
> +}
> +
> +static inline sockptr_t KERNEL_SOCKPTR(void *p)
> +{
> +	return (sockptr_t) { .kernel = p, .is_kernel = true };
> +}
> +#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
> +
> +static inline sockptr_t USER_SOCKPTR(void __user *p)
> +{
> +	return (sockptr_t) { .user = p };
> +}
> +
> +static inline bool sockptr_is_null(sockptr_t sockptr)
> +{
> +	return !sockptr.user && !sockptr.kernel;
> +}
> +
> +static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
> +{
> +	if (!sockptr_is_kernel(src))
> +		return copy_from_user(dst, src.user, size);
> +	memcpy(dst, src.kernel, size);
> +	return 0;
> +}

How does this not introduce a massive security hole when
CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE?

AFAICS, userspace can pass in a pointer >= TASK_SIZE,
and this code makes it be treated as a kernel pointer.

- Eric
Christoph Hellwig July 20, 2020, 5:43 p.m. UTC | #2
On Mon, Jul 20, 2020 at 09:37:48AM -0700, Eric Biggers wrote:
> How does this not introduce a massive security hole when
> CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE?
> 
> AFAICS, userspace can pass in a pointer >= TASK_SIZE,
> and this code makes it be treated as a kernel pointer.

Yeah, we'll need to validate that before initializing the pointer.

But thinking this a little further:  doesn't this mean any
set_fs(KERNEL_DS) that has other user pointers than the one it is
intended for has the same issue?  Pretty much all of these are gone
in mainline now, but in older stable kernels there might be some
interesting cases, especially in the compat ioctl handlers.
Eric Biggers July 20, 2020, 5:55 p.m. UTC | #3
On Mon, Jul 20, 2020 at 07:43:22PM +0200, Christoph Hellwig wrote:
> On Mon, Jul 20, 2020 at 09:37:48AM -0700, Eric Biggers wrote:
> > How does this not introduce a massive security hole when
> > CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE?
> > 
> > AFAICS, userspace can pass in a pointer >= TASK_SIZE,
> > and this code makes it be treated as a kernel pointer.
> 
> Yeah, we'll need to validate that before initializing the pointer.
> 
> But thinking this a little further:  doesn't this mean any
> set_fs(KERNEL_DS) that has other user pointers than the one it is
> intended for has the same issue?  Pretty much all of these are gone
> in mainline now, but in older stable kernels there might be some
> interesting cases, especially in the compat ioctl handlers.

Yes.  I thought that eliminating that class of bug is one of the main
motivations for your "remove set_fs" work.  See commit 128394eff343
("sg_write()/bsg_write() is not fit to be called under KERNEL_DS") for a case
where this type of bug was fixed.

Are you aware of any specific cases that weren't already fixed?  If there are
any, they need to be urgently fixed.

- Eric
David Laight July 21, 2020, 9:55 a.m. UTC | #4
From: Eric Biggers
> Sent: 20 July 2020 17:38
...
> How does this not introduce a massive security hole when
> CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE?
> 
> AFAICS, userspace can pass in a pointer >= TASK_SIZE,
> and this code makes it be treated as a kernel pointer.

One thought I've had is that on 64-bit architectures there
is almost always some part of the KVA that can never be valid
and is larger than the maximum size of a user VA.

If the user address is offset into this invalid area
then it can always be distinguished from a kernel address.

Indeed it may be worth considering offsetting kernel
addresses as well.

This forces code to use the correct accessors.

It doesn't solve the problem for 32bit systems with
CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE since
they are likely to have all 32bit addresses available
to both use and kernel.

If you end up with a 'fat pointer' then it may be worth
adding the length and making it a 'buffer descriptor'.
This can then be passed by address and the reduced
number of parameters will probably offset the cost
of the extra indirection.

The read/write functions could then take the 'buffer descriptor',
offset and length as parameters.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
David Laight July 21, 2020, 10:14 a.m. UTC | #5
From: Christoph Hellwig
> Sent: 20 July 2020 13:47
> 
> Add a uptr_t type that can hold a pointer to either a user or kernel
> memory region, and simply helpers to copy to and from it.  For
> architectures like x86 that have non-overlapping user and kernel
> address space it just is a union and uses a TASK_SIZE check to
> select the proper copy routine.  For architectures with overlapping
> address spaces a flag to indicate the address space is used instead.
> 
...
> +#else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
> +typedef struct {
> +	union {
> +		void		*kernel;
> +		void __user	*user;
> +	};
> +	bool		is_kernel : 1;
> +} sockptr_t;

If you need to do that you might as well make it a struct
where either the kernel or user address is defined.
Far safer for all architectures.

Indeed you could add the length (to save passing an
extra parameter through the layers).

The system call code could even copy the code into a
kernel buffer (setting both pointers).
So that code that didn't need to access beyond the end
of the implied buffer (most of it) could just access the
kernel buffer.

For getsockopt() you'd need some way of supressing the
'default' copy back of the user buffer.

This would also allow some of the sctp getsockopt to
read (usually 4 bytes) from the 'user' buffer without
the wrapper code always having to read in the entire
user buffer.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Christoph Hellwig July 22, 2020, 7:56 a.m. UTC | #6
On Mon, Jul 20, 2020 at 10:55:43AM -0700, Eric Biggers wrote:
> On Mon, Jul 20, 2020 at 07:43:22PM +0200, Christoph Hellwig wrote:
> > On Mon, Jul 20, 2020 at 09:37:48AM -0700, Eric Biggers wrote:
> > > How does this not introduce a massive security hole when
> > > CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE?
> > > 
> > > AFAICS, userspace can pass in a pointer >= TASK_SIZE,
> > > and this code makes it be treated as a kernel pointer.
> > 
> > Yeah, we'll need to validate that before initializing the pointer.
> > 
> > But thinking this a little further:  doesn't this mean any
> > set_fs(KERNEL_DS) that has other user pointers than the one it is
> > intended for has the same issue?  Pretty much all of these are gone
> > in mainline now, but in older stable kernels there might be some
> > interesting cases, especially in the compat ioctl handlers.
> 
> Yes.  I thought that eliminating that class of bug is one of the main
> motivations for your "remove set_fs" work.  See commit 128394eff343
> ("sg_write()/bsg_write() is not fit to be called under KERNEL_DS") for a case
> where this type of bug was fixed.
> 
> Are you aware of any specific cases that weren't already fixed?  If there are
> any, they need to be urgently fixed.

current mainline has almost no set_fs left, and setsockopt seems
pretty much safe.  But if we go back a long term stable release or two
I bet I'd find one or two.
diff mbox series

Patch

diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
new file mode 100644
index 00000000000000..e41dfa52555dec
--- /dev/null
+++ b/include/linux/sockptr.h
@@ -0,0 +1,121 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020 Christoph Hellwig.
+ *
+ * Support for "universal" pointers that can point to either kernel or userspace
+ * memory.
+ */
+#ifndef _LINUX_SOCKPTR_H
+#define _LINUX_SOCKPTR_H
+
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+typedef union {
+	void		*kernel;
+	void __user	*user;
+} sockptr_t;
+
+static inline bool sockptr_is_kernel(sockptr_t sockptr)
+{
+	return (unsigned long)sockptr.kernel >= TASK_SIZE;
+}
+
+static inline sockptr_t KERNEL_SOCKPTR(void *p)
+{
+	return (sockptr_t) { .kernel = p };
+}
+#else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
+typedef struct {
+	union {
+		void		*kernel;
+		void __user	*user;
+	};
+	bool		is_kernel : 1;
+} sockptr_t;
+
+static inline bool sockptr_is_kernel(sockptr_t sockptr)
+{
+	return sockptr.is_kernel;
+}
+
+static inline sockptr_t KERNEL_SOCKPTR(void *p)
+{
+	return (sockptr_t) { .kernel = p, .is_kernel = true };
+}
+#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
+
+static inline sockptr_t USER_SOCKPTR(void __user *p)
+{
+	return (sockptr_t) { .user = p };
+}
+
+static inline bool sockptr_is_null(sockptr_t sockptr)
+{
+	return !sockptr.user && !sockptr.kernel;
+}
+
+static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
+{
+	if (!sockptr_is_kernel(src))
+		return copy_from_user(dst, src.user, size);
+	memcpy(dst, src.kernel, size);
+	return 0;
+}
+
+static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size)
+{
+	if (!sockptr_is_kernel(dst))
+		return copy_to_user(dst.user, src, size);
+	memcpy(dst.kernel, src, size);
+	return 0;
+}
+
+static inline void *memdup_sockptr(sockptr_t src, size_t len)
+{
+	void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN);
+
+	if (!p)
+		return ERR_PTR(-ENOMEM);
+	if (copy_from_sockptr(p, src, len)) {
+		kfree(p);
+		return ERR_PTR(-EFAULT);
+	}
+	return p;
+}
+
+static inline void *memdup_sockptr_nul(sockptr_t src, size_t len)
+{
+	char *p = kmalloc_track_caller(len + 1, GFP_KERNEL);
+
+	if (!p)
+		return ERR_PTR(-ENOMEM);
+	if (copy_from_sockptr(p, src, len)) {
+		kfree(p);
+		return ERR_PTR(-EFAULT);
+	}
+	p[len] = '\0';
+	return p;
+}
+
+static inline void sockptr_advance(sockptr_t sockptr, size_t len)
+{
+	if (sockptr_is_kernel(sockptr))
+		sockptr.kernel += len;
+	else
+		sockptr.user += len;
+}
+
+static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count)
+{
+	if (sockptr_is_kernel(src)) {
+		size_t len = min(strnlen(src.kernel, count - 1) + 1, count);
+
+		memcpy(dst, src.kernel, len);
+		return len;
+	}
+	return strncpy_from_user(dst, src.user, count);
+}
+
+#endif /* _LINUX_SOCKPTR_H */