diff mbox series

[v2,1/1] mm, kasan, kmsan: copy_from/to_kernel_nofault

Message ID 20241005164813.2475778-2-snovitoll@gmail.com (mailing list archive)
State New
Headers show
Series [v2,1/1] mm, kasan, kmsan: copy_from/to_kernel_nofault | expand

Commit Message

Sabyrzhan Tasbolatov Oct. 5, 2024, 4:48 p.m. UTC
Instrument copy_from_kernel_nofault() with KMSAN for uninitialized kernel
memory check and copy_to_kernel_nofault() with KASAN, KCSAN to detect
the memory corruption.

syzbot reported that bpf_probe_read_kernel() kernel helper triggered
KASAN report via kasan_check_range() which is not the expected behaviour
as copy_from_kernel_nofault() is meant to be a non-faulting helper.

Solution is, suggested by Marco Elver, to replace KASAN, KCSAN check in
copy_from_kernel_nofault() with KMSAN detection of copying uninitilaized
kernel memory. In copy_to_kernel_nofault() we can retain
instrument_write() for the memory corruption instrumentation but before
pagefault_disable().

copy_to_kernel_nofault() is tested on x86_64 and arm64 with
CONFIG_KASAN_SW_TAGS. On arm64 with CONFIG_KASAN_HW_TAGS,
kunit test currently fails. Need more clarification on it
- currently, disabled in kunit test.

Link: https://lore.kernel.org/linux-mm/CANpmjNMAVFzqnCZhEity9cjiqQ9CVN1X7qeeeAp_6yKjwKo8iw@mail.gmail.com/
Suggested-by: Marco Elver <elver@google.com>
Reported-by: syzbot+61123a5daeb9f7454599@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=61123a5daeb9f7454599
Reported-by: Andrey Konovalov <andreyknvl@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=210505
Signed-off-by: Sabyrzhan Tasbolatov <snovitoll@gmail.com>
---
v2:
	- squashed previous submitted in -mm tree 2 patches based on Linus tree
---
 mm/kasan/kasan_test_c.c | 27 +++++++++++++++++++++++++++
 mm/kmsan/kmsan_test.c   | 17 +++++++++++++++++
 mm/maccess.c            |  7 +++++--
 3 files changed, 49 insertions(+), 2 deletions(-)

Comments

Marco Elver Oct. 8, 2024, 8:31 a.m. UTC | #1
On Sat, Oct 05, 2024 at 09:48PM +0500, Sabyrzhan Tasbolatov wrote:
> Instrument copy_from_kernel_nofault() with KMSAN for uninitialized kernel
> memory check and copy_to_kernel_nofault() with KASAN, KCSAN to detect
> the memory corruption.
> 
> syzbot reported that bpf_probe_read_kernel() kernel helper triggered
> KASAN report via kasan_check_range() which is not the expected behaviour
> as copy_from_kernel_nofault() is meant to be a non-faulting helper.
> 
> Solution is, suggested by Marco Elver, to replace KASAN, KCSAN check in
> copy_from_kernel_nofault() with KMSAN detection of copying uninitilaized
> kernel memory. In copy_to_kernel_nofault() we can retain
> instrument_write() for the memory corruption instrumentation but before
> pagefault_disable().

I don't understand why it has to be before the whole copy i.e. before
pagefault_disable()?

I think my suggestion was to only check the memory where no fault
occurred. See below.

> diff --git a/mm/maccess.c b/mm/maccess.c
> index 518a25667323..a91a39a56cfd 100644
> --- a/mm/maccess.c
> +++ b/mm/maccess.c
> @@ -15,7 +15,7 @@ bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
>  
>  #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label)	\
>  	while (len >= sizeof(type)) {					\
> -		__get_kernel_nofault(dst, src, type, err_label);		\
> +		__get_kernel_nofault(dst, src, type, err_label);	\
>  		dst += sizeof(type);					\
>  		src += sizeof(type);					\
>  		len -= sizeof(type);					\
> @@ -31,6 +31,8 @@ long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
>  	if (!copy_from_kernel_nofault_allowed(src, size))
>  		return -ERANGE;
>  
> +	/* Make sure uninitialized kernel memory isn't copied. */
> +	kmsan_check_memory(src, size);
>  	pagefault_disable();
>  	if (!(align & 7))
>  		copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
> @@ -49,7 +51,7 @@ EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);
>  
>  #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label)	\
>  	while (len >= sizeof(type)) {					\
> -		__put_kernel_nofault(dst, src, type, err_label);		\
> +		__put_kernel_nofault(dst, src, type, err_label);	\
>  		dst += sizeof(type);					\
>  		src += sizeof(type);					\
>  		len -= sizeof(type);					\
> @@ -62,6 +64,7 @@ long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
>  	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
>  		align = (unsigned long)dst | (unsigned long)src;
>  
> +	instrument_write(dst, size);
>  	pagefault_disable();

So this will check the whole range before the access. But if the copy
aborts because of a fault, then we may still end up with false
positives.

Why not something like the below - normally we check the accesses
before, but these are debug kernels anyway, so I see no harm in making
an exception in this case and checking the memory if there was no fault
i.e. it didn't jump to err_label yet. It's also slower because of
repeated calls, but these helpers aren't frequently used.

The alternative is to do the sanitizer check after the entire copy if we
know there was no fault at all. But that may still hide real bugs if
e.g. it starts copying some partial memory and then accesses an
unfaulted page.


diff --git a/mm/maccess.c b/mm/maccess.c
index a91a39a56cfd..3ca55ec63a6a 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -13,9 +13,14 @@ bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
 	return true;
 }
 
+/*
+ * The below only uses kmsan_check_memory() to ensure uninitialized kernel
+ * memory isn't leaked.
+ */
 #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label)	\
 	while (len >= sizeof(type)) {					\
 		__get_kernel_nofault(dst, src, type, err_label);	\
+		kmsan_check_memory(src, sizeof(type));			\
 		dst += sizeof(type);					\
 		src += sizeof(type);					\
 		len -= sizeof(type);					\
@@ -31,8 +36,6 @@ long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
 	if (!copy_from_kernel_nofault_allowed(src, size))
 		return -ERANGE;
 
-	/* Make sure uninitialized kernel memory isn't copied. */
-	kmsan_check_memory(src, size);
 	pagefault_disable();
 	if (!(align & 7))
 		copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
@@ -52,6 +55,7 @@ EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);
 #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label)	\
 	while (len >= sizeof(type)) {					\
 		__put_kernel_nofault(dst, src, type, err_label);	\
+		instrument_write(dst, sizeof(type));			\
 		dst += sizeof(type);					\
 		src += sizeof(type);					\
 		len -= sizeof(type);					\
@@ -64,7 +68,6 @@ long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 		align = (unsigned long)dst | (unsigned long)src;
 
-	instrument_write(dst, size);
 	pagefault_disable();
 	if (!(align & 7))
 		copy_to_kernel_nofault_loop(dst, src, size, u64, Efault);
Sabyrzhan Tasbolatov Oct. 8, 2024, 8:46 a.m. UTC | #2
On Tue, Oct 8, 2024 at 1:32 PM Marco Elver <elver@google.com> wrote:
>
> On Sat, Oct 05, 2024 at 09:48PM +0500, Sabyrzhan Tasbolatov wrote:
> > Instrument copy_from_kernel_nofault() with KMSAN for uninitialized kernel
> > memory check and copy_to_kernel_nofault() with KASAN, KCSAN to detect
> > the memory corruption.
> >
> > syzbot reported that bpf_probe_read_kernel() kernel helper triggered
> > KASAN report via kasan_check_range() which is not the expected behaviour
> > as copy_from_kernel_nofault() is meant to be a non-faulting helper.
> >
> > Solution is, suggested by Marco Elver, to replace KASAN, KCSAN check in
> > copy_from_kernel_nofault() with KMSAN detection of copying uninitilaized
> > kernel memory. In copy_to_kernel_nofault() we can retain
> > instrument_write() for the memory corruption instrumentation but before
> > pagefault_disable().
>
> I don't understand why it has to be before the whole copy i.e. before
> pagefault_disable()?
>

I was unsure about this decision as well - I should've waited for your response
before sending the PATCH when I was asking for clarification. Sorry
for the confusion,
I thought that what you meant as the instrumentation was already done after
pagefault_disable().

Let me send the v3 with your suggested diff, I will also ask Andrew to drop
merged to -mm patch.
https://lore.kernel.org/all/20241008020150.4795AC4CEC6@smtp.kernel.org/

Thanks for the review.

> I think my suggestion was to only check the memory where no fault
> occurred. See below.
>
> > diff --git a/mm/maccess.c b/mm/maccess.c
> > index 518a25667323..a91a39a56cfd 100644
> > --- a/mm/maccess.c
> > +++ b/mm/maccess.c
> > @@ -15,7 +15,7 @@ bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
> >
> >  #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label)        \
> >       while (len >= sizeof(type)) {                                   \
> > -             __get_kernel_nofault(dst, src, type, err_label);                \
> > +             __get_kernel_nofault(dst, src, type, err_label);        \
> >               dst += sizeof(type);                                    \
> >               src += sizeof(type);                                    \
> >               len -= sizeof(type);                                    \
> > @@ -31,6 +31,8 @@ long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
> >       if (!copy_from_kernel_nofault_allowed(src, size))
> >               return -ERANGE;
> >
> > +     /* Make sure uninitialized kernel memory isn't copied. */
> > +     kmsan_check_memory(src, size);
> >       pagefault_disable();
> >       if (!(align & 7))
> >               copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
> > @@ -49,7 +51,7 @@ EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);
> >
> >  #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label)  \
> >       while (len >= sizeof(type)) {                                   \
> > -             __put_kernel_nofault(dst, src, type, err_label);                \
> > +             __put_kernel_nofault(dst, src, type, err_label);        \
> >               dst += sizeof(type);                                    \
> >               src += sizeof(type);                                    \
> >               len -= sizeof(type);                                    \
> > @@ -62,6 +64,7 @@ long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
> >       if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
> >               align = (unsigned long)dst | (unsigned long)src;
> >
> > +     instrument_write(dst, size);
> >       pagefault_disable();
>
> So this will check the whole range before the access. But if the copy
> aborts because of a fault, then we may still end up with false
> positives.
>
> Why not something like the below - normally we check the accesses
> before, but these are debug kernels anyway, so I see no harm in making
> an exception in this case and checking the memory if there was no fault
> i.e. it didn't jump to err_label yet. It's also slower because of
> repeated calls, but these helpers aren't frequently used.
>
> The alternative is to do the sanitizer check after the entire copy if we
> know there was no fault at all. But that may still hide real bugs if
> e.g. it starts copying some partial memory and then accesses an
> unfaulted page.
>
>
> diff --git a/mm/maccess.c b/mm/maccess.c
> index a91a39a56cfd..3ca55ec63a6a 100644
> --- a/mm/maccess.c
> +++ b/mm/maccess.c
> @@ -13,9 +13,14 @@ bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
>         return true;
>  }
>
> +/*
> + * The below only uses kmsan_check_memory() to ensure uninitialized kernel
> + * memory isn't leaked.
> + */
>  #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label)  \
>         while (len >= sizeof(type)) {                                   \
>                 __get_kernel_nofault(dst, src, type, err_label);        \
> +               kmsan_check_memory(src, sizeof(type));                  \
>                 dst += sizeof(type);                                    \
>                 src += sizeof(type);                                    \
>                 len -= sizeof(type);                                    \
> @@ -31,8 +36,6 @@ long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
>         if (!copy_from_kernel_nofault_allowed(src, size))
>                 return -ERANGE;
>
> -       /* Make sure uninitialized kernel memory isn't copied. */
> -       kmsan_check_memory(src, size);
>         pagefault_disable();
>         if (!(align & 7))
>                 copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
> @@ -52,6 +55,7 @@ EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);
>  #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label)    \
>         while (len >= sizeof(type)) {                                   \
>                 __put_kernel_nofault(dst, src, type, err_label);        \
> +               instrument_write(dst, sizeof(type));                    \
>                 dst += sizeof(type);                                    \
>                 src += sizeof(type);                                    \
>                 len -= sizeof(type);                                    \
> @@ -64,7 +68,6 @@ long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
>         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
>                 align = (unsigned long)dst | (unsigned long)src;
>
> -       instrument_write(dst, size);
>         pagefault_disable();
>         if (!(align & 7))
>                 copy_to_kernel_nofault_loop(dst, src, size, u64, Efault);
Marco Elver Oct. 8, 2024, 9:27 a.m. UTC | #3
On Tue, Oct 08, 2024 at 01:46PM +0500, Sabyrzhan Tasbolatov wrote:
> On Tue, Oct 8, 2024 at 1:32 PM Marco Elver <elver@google.com> wrote:
> >
> > On Sat, Oct 05, 2024 at 09:48PM +0500, Sabyrzhan Tasbolatov wrote:
> > > Instrument copy_from_kernel_nofault() with KMSAN for uninitialized kernel
> > > memory check and copy_to_kernel_nofault() with KASAN, KCSAN to detect
> > > the memory corruption.
> > >
> > > syzbot reported that bpf_probe_read_kernel() kernel helper triggered
> > > KASAN report via kasan_check_range() which is not the expected behaviour
> > > as copy_from_kernel_nofault() is meant to be a non-faulting helper.
> > >
> > > Solution is, suggested by Marco Elver, to replace KASAN, KCSAN check in
> > > copy_from_kernel_nofault() with KMSAN detection of copying uninitilaized
> > > kernel memory. In copy_to_kernel_nofault() we can retain
> > > instrument_write() for the memory corruption instrumentation but before
> > > pagefault_disable().
> >
> > I don't understand why it has to be before the whole copy i.e. before
> > pagefault_disable()?
> >
> 
> I was unsure about this decision as well - I should've waited for your response
> before sending the PATCH when I was asking for clarification. Sorry
> for the confusion,
> I thought that what you meant as the instrumentation was already done after
> pagefault_disable().

I just did some digging and there is some existing instrumentation, but
not for what we want.  The accesses in the loop on x86 do this:

copy_to_kernel_nofault:

	#define __put_kernel_nofault(dst, src, type, err_label)			\
		__put_user_size(*((type *)(src)), (__force type __user *)(dst),	\
				sizeof(type), err_label)


and __put_user_size:

	#define __put_user_size(x, ptr, size, label)				\
	do {									\
		__typeof__(*(ptr)) __x = (x); /* eval x once */			\
		__typeof__(ptr) __ptr = (ptr); /* eval ptr once */		\
		__chk_user_ptr(__ptr);						\
		switch (size) {							\
		case 1:								\
			__put_user_goto(__x, __ptr, "b", "iq", label);		\
			break;							\
		case 2:								\
			__put_user_goto(__x, __ptr, "w", "ir", label);		\
			break;							\
		case 4:								\
			__put_user_goto(__x, __ptr, "l", "ir", label);		\
			break;							\
		case 8:								\
			__put_user_goto_u64(__x, __ptr, label);			\
			break;							\
		default:							\
			__put_user_bad();					\
		}								\
		instrument_put_user(__x, __ptr, size);				\
	} while (0)


which already has an instrument_put_user, which expands to this:

	#define instrument_put_user(from, ptr, size)			\
	({								\
		kmsan_copy_to_user(ptr, &from, sizeof(from), 0);	\
	})

So this is already instrumented for KMSAN, to check no uninitialized
memory is accessed - but that's only useful if copying to user space.
__put_kernel_nofault is "abusing" the same helper to copy to the kernel,
so adding explicit instrumentation as proposed still makes sense.

Thanks,
-- Marco
diff mbox series

Patch

diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c
index a181e4780d9d..5cff90f831db 100644
--- a/mm/kasan/kasan_test_c.c
+++ b/mm/kasan/kasan_test_c.c
@@ -1954,6 +1954,32 @@  static void rust_uaf(struct kunit *test)
 	KUNIT_EXPECT_KASAN_FAIL(test, kasan_test_rust_uaf());
 }
 
+static void copy_to_kernel_nofault_oob(struct kunit *test)
+{
+	char *ptr;
+	char buf[128];
+	size_t size = sizeof(buf);
+
+	/* Not detecting fails currently with HW_TAGS */
+	KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_HW_TAGS);
+
+	ptr = kmalloc(size - KASAN_GRANULE_SIZE, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+	OPTIMIZER_HIDE_VAR(ptr);
+
+	if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) {
+		/* Check that the returned pointer is tagged. */
+		KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN);
+		KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
+	}
+
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		copy_to_kernel_nofault(&buf[0], ptr, size));
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		copy_to_kernel_nofault(ptr, &buf[0], size));
+	kfree(ptr);
+}
+
 static struct kunit_case kasan_kunit_test_cases[] = {
 	KUNIT_CASE(kmalloc_oob_right),
 	KUNIT_CASE(kmalloc_oob_left),
@@ -2027,6 +2053,7 @@  static struct kunit_case kasan_kunit_test_cases[] = {
 	KUNIT_CASE(match_all_not_assigned),
 	KUNIT_CASE(match_all_ptr_tag),
 	KUNIT_CASE(match_all_mem_tag),
+	KUNIT_CASE(copy_to_kernel_nofault_oob),
 	KUNIT_CASE(rust_uaf),
 	{}
 };
diff --git a/mm/kmsan/kmsan_test.c b/mm/kmsan/kmsan_test.c
index 13236d579eba..9733a22c46c1 100644
--- a/mm/kmsan/kmsan_test.c
+++ b/mm/kmsan/kmsan_test.c
@@ -640,6 +640,22 @@  static void test_unpoison_memory(struct kunit *test)
 	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
 }
 
+static void test_copy_from_kernel_nofault(struct kunit *test)
+{
+	long ret;
+	char buf[4], src[4];
+	size_t size = sizeof(buf);
+
+	EXPECTATION_UNINIT_VALUE_FN(expect, "copy_from_kernel_nofault");
+	kunit_info(
+		test,
+		"testing copy_from_kernel_nofault with uninitialized memory\n");
+
+	ret = copy_from_kernel_nofault((char *)&buf[0], (char *)&src[0], size);
+	USE(ret);
+	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
 static struct kunit_case kmsan_test_cases[] = {
 	KUNIT_CASE(test_uninit_kmalloc),
 	KUNIT_CASE(test_init_kmalloc),
@@ -664,6 +680,7 @@  static struct kunit_case kmsan_test_cases[] = {
 	KUNIT_CASE(test_long_origin_chain),
 	KUNIT_CASE(test_stackdepot_roundtrip),
 	KUNIT_CASE(test_unpoison_memory),
+	KUNIT_CASE(test_copy_from_kernel_nofault),
 	{},
 };
 
diff --git a/mm/maccess.c b/mm/maccess.c
index 518a25667323..a91a39a56cfd 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -15,7 +15,7 @@  bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
 
 #define copy_from_kernel_nofault_loop(dst, src, len, type, err_label)	\
 	while (len >= sizeof(type)) {					\
-		__get_kernel_nofault(dst, src, type, err_label);		\
+		__get_kernel_nofault(dst, src, type, err_label);	\
 		dst += sizeof(type);					\
 		src += sizeof(type);					\
 		len -= sizeof(type);					\
@@ -31,6 +31,8 @@  long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
 	if (!copy_from_kernel_nofault_allowed(src, size))
 		return -ERANGE;
 
+	/* Make sure uninitialized kernel memory isn't copied. */
+	kmsan_check_memory(src, size);
 	pagefault_disable();
 	if (!(align & 7))
 		copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
@@ -49,7 +51,7 @@  EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);
 
 #define copy_to_kernel_nofault_loop(dst, src, len, type, err_label)	\
 	while (len >= sizeof(type)) {					\
-		__put_kernel_nofault(dst, src, type, err_label);		\
+		__put_kernel_nofault(dst, src, type, err_label);	\
 		dst += sizeof(type);					\
 		src += sizeof(type);					\
 		len -= sizeof(type);					\
@@ -62,6 +64,7 @@  long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 		align = (unsigned long)dst | (unsigned long)src;
 
+	instrument_write(dst, size);
 	pagefault_disable();
 	if (!(align & 7))
 		copy_to_kernel_nofault_loop(dst, src, size, u64, Efault);