diff mbox

KVM: eventfd: fix NULL deref irqbypass consumer

Message ID 1483607111-2780-1-git-send-email-wanpeng.li@hotmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wanpeng Li Jan. 5, 2017, 9:05 a.m. UTC
From: Wanpeng Li <wanpeng.li@hotmail.com>

Reported syzkaller:

    BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
    IP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass]
    PGD 0 
   
    Oops: 0002 [#1] SMP
    CPU: 1 PID: 125 Comm: kworker/1:1 Not tainted 4.9.0+ #1
    Workqueue: kvm-irqfd-cleanup irqfd_shutdown [kvm]
    task: ffff9bbe0dfbb900 task.stack: ffffb61802014000
    RIP: 0010:irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass]
    Call Trace:
     irqfd_shutdown+0x66/0xa0 [kvm]
     process_one_work+0x16b/0x480
     worker_thread+0x4b/0x500
     kthread+0x101/0x140
     ? process_one_work+0x480/0x480
     ? kthread_create_on_node+0x60/0x60
     ret_from_fork+0x25/0x30
    RIP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass] RSP: ffffb61802017e20
    CR2: 0000000000000008

The syzkaller folks reported a NULL pointer dereference that due to unregister 
an consumer which fails registration before. The syzkaller creates two VMs w/ 
an equal eventfd occassionally. So the second VM fails to register an irqbypass 
consumer. It will make irqfd as inactive and queue an workqueue work to shutdown 
irqfd and unregister the irqbypass consumer when eventfd is closed. However, the 
second consumer has been initialized though it fails registration. So the token
(same as the first VM's) is taken to unregister the consumer in the workqueue, 
the consumer of the first VM is found and unregistered, then NULL deref incurred 
in the path of deleting consumer from the consumers list.

#include <fcntl.h>
#include <pthread.h>
#include <setjmp.h>
#include <signal.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>

 __thread int skip_segv;
 __thread jmp_buf segv_env;

static void segv_handler(int sig, siginfo_t* info, void* uctx)
{
	if (__atomic_load_n(&skip_segv, __ATOMIC_RELAXED))
		_longjmp(segv_env, 1);
	exit(sig);
}

static void install_segv_handler()
{
	struct sigaction sa;
	memset(&sa, 0, sizeof(sa));
	sa.sa_sigaction = segv_handler;
	sa.sa_flags = SA_NODEFER | SA_SIGINFO;
	sigaction(SIGSEGV, &sa, NULL);
	sigaction(SIGBUS, &sa, NULL);
}

#define NONFAILING(...)                                                \
{                                                                    \
	__atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST);               \
	if (_setjmp(segv_env) == 0) {                                      \
		__VA_ARGS__;                                                     \
	}                                                                  \
	__atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST);               \
}

static uintptr_t execute_syscall(int nr, uintptr_t a0, uintptr_t a1,
		uintptr_t a2, uintptr_t a3,
		uintptr_t a4, uintptr_t a5,
		uintptr_t a6, uintptr_t a7,
		uintptr_t a8)
{
	return syscall(nr, a0, a1, a2, a3, a4, a5);
}

long r[28];
void* thr(void* arg)
{
	switch ((long)arg) {
		case 0:
			r[0] =
				execute_syscall(__NR_mmap, 0x20000000ul, 0xd000ul, 0x3ul,
						0x32ul, 0xfffffffffffffffful, 0x0ul, 0, 0, 0);
			break;
		case 1:
			r[2] = syscall(__NR_open, "/dev/kvm", 0x40042ul, 0, 0, 0, 0, 0, 0);
			break;
		case 2:
			r[3] = execute_syscall(__NR_ioctl, r[2], 0xae01ul, 0x0ul, 0, 0, 0,
					0, 0, 0);
			break;
		case 3:
			r[4] = execute_syscall(__NR_ioctl, r[3], 0xae41ul, 0x3fful, 0, 0, 0,
					0, 0, 0);
			break;
		case 4:
			r[5] = execute_syscall(__NR_ioctl, r[4], 0xae9aul, 0, 0, 0, 0, 0, 0,
					0);
			break;
		case 5:
			r[6] = execute_syscall(__NR_eventfd2, 0x8ul, 0x801ul, 0, 0, 0, 0, 0,
					0, 0);
			break;
		case 6:
			NONFAILING(*(uint32_t*)0x2000c000 = r[6]);
			NONFAILING(*(uint32_t*)0x2000c004 = (uint32_t)0x98cd);
			NONFAILING(*(uint32_t*)0x2000c008 = (uint32_t)0x0);
			NONFAILING(*(uint32_t*)0x2000c00c = (uint32_t)0xffffffffffffffff);
			NONFAILING(*(uint8_t*)0x2000c010 = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c011 = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c012 = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c013 = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c014 = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c015 = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c016 = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c017 = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c018 = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c019 = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c01a = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c01b = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c01c = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c01d = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c01e = (uint8_t)0x0);
			NONFAILING(*(uint8_t*)0x2000c01f = (uint8_t)0x0);
			r[27] = execute_syscall(__NR_ioctl, r[3], 0x4020ae76ul,
					0x2000c000ul, 0, 0, 0, 0, 0, 0);
			break;
	}
	return 0;
}

int main()
{
	long i;
	pthread_t th[14];

	install_segv_handler();
	memset(r, -1, sizeof(r));
	srand(getpid());
	for (i = 0; i < 7; i++) {
		pthread_create(&th[i], 0, thr, (void*)i);
		usleep(10000);
	}
	for (i = 0; i < 7; i++) {
		pthread_create(&th[7 + i], 0, thr, (void*)i);
		if (rand() % 2)
			usleep(rand() % 10000);
	}
	usleep(100000);
	return 0;
}

This patch fix it by destroying the consumer which just fails registration.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
 virt/kvm/eventfd.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

Comments

Paolo Bonzini Jan. 5, 2017, 10:24 a.m. UTC | #1
On 05/01/2017 10:05, Wanpeng Li wrote:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
> 
> Reported syzkaller:
> 
>     BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
>     IP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass]
>     PGD 0 
>    
>     Oops: 0002 [#1] SMP
>     CPU: 1 PID: 125 Comm: kworker/1:1 Not tainted 4.9.0+ #1
>     Workqueue: kvm-irqfd-cleanup irqfd_shutdown [kvm]
>     task: ffff9bbe0dfbb900 task.stack: ffffb61802014000
>     RIP: 0010:irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass]
>     Call Trace:
>      irqfd_shutdown+0x66/0xa0 [kvm]
>      process_one_work+0x16b/0x480
>      worker_thread+0x4b/0x500
>      kthread+0x101/0x140
>      ? process_one_work+0x480/0x480
>      ? kthread_create_on_node+0x60/0x60
>      ret_from_fork+0x25/0x30
>     RIP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass] RSP: ffffb61802017e20
>     CR2: 0000000000000008
> 
> The syzkaller folks reported a NULL pointer dereference that due to unregister 
> an consumer which fails registration before. The syzkaller creates two VMs w/ 
> an equal eventfd occassionally. So the second VM fails to register an irqbypass 
> consumer. It will make irqfd as inactive and queue an workqueue work to shutdown 
> irqfd and unregister the irqbypass consumer when eventfd is closed. However, the 
> second consumer has been initialized though it fails registration. So the token
> (same as the first VM's) is taken to unregister the consumer in the workqueue, 
> the consumer of the first VM is found and unregistered, then NULL deref incurred 
> in the path of deleting consumer from the consumers list.

Thanks Wanpend!

However, I'm wondering if we should improve the API instead.  Maybe the
token should be passed to irq_bypass_register_{producer,consumer} and
only assigned if the functions succeed.  Alex, what do you think?

Thanks,

Paolo

> 
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index a29786d..eeaf056 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -415,9 +415,15 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
>  		irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
>  		irqfd->consumer.start = kvm_arch_irq_bypass_start;
>  		ret = irq_bypass_register_consumer(&irqfd->consumer);
> -		if (ret)
> +		if (ret) {
>  			pr_info("irq bypass consumer (token %p) registration fails: %d\n",
>  				irqfd->consumer.token, ret);
> +			irqfd->consumer.token = NULL;
> +			irqfd->consumer.add_producer = NULL;
> +			irqfd->consumer.del_producer = NULL;
> +			irqfd->consumer.stop = NULL;
> +			irqfd->consumer.start = NULL;
> +		}
>  	}
>  #endif
>  
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Jan. 5, 2017, 10:51 a.m. UTC | #2
2017-01-05 18:24 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
>
>
> On 05/01/2017 10:05, Wanpeng Li wrote:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> Reported syzkaller:
>>
>>     BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
>>     IP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass]
>>     PGD 0
>>
>>     Oops: 0002 [#1] SMP
>>     CPU: 1 PID: 125 Comm: kworker/1:1 Not tainted 4.9.0+ #1
>>     Workqueue: kvm-irqfd-cleanup irqfd_shutdown [kvm]
>>     task: ffff9bbe0dfbb900 task.stack: ffffb61802014000
>>     RIP: 0010:irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass]
>>     Call Trace:
>>      irqfd_shutdown+0x66/0xa0 [kvm]
>>      process_one_work+0x16b/0x480
>>      worker_thread+0x4b/0x500
>>      kthread+0x101/0x140
>>      ? process_one_work+0x480/0x480
>>      ? kthread_create_on_node+0x60/0x60
>>      ret_from_fork+0x25/0x30
>>     RIP: irq_bypass_unregister_consumer+0x9d/0xb70 [irqbypass] RSP: ffffb61802017e20
>>     CR2: 0000000000000008
>>
>> The syzkaller folks reported a NULL pointer dereference that due to unregister
>> an consumer which fails registration before. The syzkaller creates two VMs w/
>> an equal eventfd occassionally. So the second VM fails to register an irqbypass
>> consumer. It will make irqfd as inactive and queue an workqueue work to shutdown
>> irqfd and unregister the irqbypass consumer when eventfd is closed. However, the
>> second consumer has been initialized though it fails registration. So the token
>> (same as the first VM's) is taken to unregister the consumer in the workqueue,
>> the consumer of the first VM is found and unregistered, then NULL deref incurred
>> in the path of deleting consumer from the consumers list.
>
> Thanks Wanpend!

NP. :)

>
> However, I'm wondering if we should improve the API instead.  Maybe the
> token should be passed to irq_bypass_register_{producer,consumer} and
> only assigned if the functions succeed.  Alex, what do you think?

Yeah, I can continue to improve the patch if we reach an agreement with Alex. :)

Regards,
Wanpeng Li
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a29786d..eeaf056 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -415,9 +415,15 @@  kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 		irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
 		irqfd->consumer.start = kvm_arch_irq_bypass_start;
 		ret = irq_bypass_register_consumer(&irqfd->consumer);
-		if (ret)
+		if (ret) {
 			pr_info("irq bypass consumer (token %p) registration fails: %d\n",
 				irqfd->consumer.token, ret);
+			irqfd->consumer.token = NULL;
+			irqfd->consumer.add_producer = NULL;
+			irqfd->consumer.del_producer = NULL;
+			irqfd->consumer.stop = NULL;
+			irqfd->consumer.start = NULL;
+		}
 	}
 #endif