@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -g -I../../../../usr/include/
+CFLAGS += -g -I../../../../usr/include/ -pthread
TEST_GEN_PROGS := close_range_test
@@ -3,15 +3,22 @@
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
-#include <linux/kernel.h>
#include <limits.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
#include <syscall.h>
#include <unistd.h>
-#include <sys/resource.h>
#include "../kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
@@ -384,4 +391,224 @@ TEST(close_range_cloexec_unshare)
}
}
+static uint64_t current_time_ms(void)
+{
+ struct timespec ts;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts))
+ exit(EXIT_FAILURE);
+
+ return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
+}
+
+static void thread_start(void *(*fn)(void *), void *arg)
+{
+ int i;
+ pthread_t th;
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 128 << 10);
+
+ for (i = 0; i < 100; i++) {
+ if (pthread_create(&th, &attr, fn, arg) == 0) {
+ pthread_attr_destroy(&attr);
+ return;
+ }
+
+ if (errno == EAGAIN) {
+ usleep(50);
+ continue;
+ }
+
+ break;
+ }
+
+ exit(EXIT_FAILURE);
+}
+
+static void event_init(int *state)
+{
+ *state = 0;
+}
+
+static void event_reset(int *state)
+{
+ *state = 0;
+}
+
+static void event_set(int *state)
+{
+ if (*state)
+ exit(EXIT_FAILURE);
+
+ __atomic_store_n(state, 1, __ATOMIC_RELEASE);
+ syscall(SYS_futex, state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
+}
+
+static void event_wait(int *state)
+{
+ while (!__atomic_load_n(state, __ATOMIC_ACQUIRE))
+ syscall(SYS_futex, state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
+}
+
+static int event_isset(int *state)
+{
+ return __atomic_load_n(state, __ATOMIC_ACQUIRE);
+}
+
+static int event_timedwait(int *state, uint64_t timeout)
+{
+ uint64_t start = current_time_ms();
+ uint64_t now = start;
+ for (;;) {
+ struct timespec ts;
+ uint64_t remain = timeout - (now - start);
+
+ ts.tv_sec = remain / 1000;
+ ts.tv_nsec = (remain % 1000) * 1000 * 1000;
+
+ syscall(SYS_futex, state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
+
+ if (__atomic_load_n(state, __ATOMIC_ACQUIRE))
+ return 1;
+
+ now = current_time_ms();
+ if (now - start > timeout)
+ return 0;
+ }
+}
+
+struct thread_t {
+ int created;
+ int call;
+ int ready;
+ int done;
+};
+
+static struct thread_t threads[4];
+static int running;
+
+static void thread_close_range_call(int call)
+{
+ int fd = 0;
+
+ switch (call) {
+ case 0:
+ fd = openat(-1, "/dev/null", 0, 0);
+ if (fd < 0)
+ fd = 0;
+ break;
+ case 1:
+ sys_close_range(fd, -1, CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC);
+ break;
+ }
+}
+
+static void *thread_close_range(void *arg)
+{
+ struct thread_t *th = (struct thread_t *)arg;
+ for (;;) {
+ event_wait(&th->ready);
+ event_reset(&th->ready);
+ thread_close_range_call(th->call);
+ __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED);
+ event_set(&th->done);
+ }
+ return 0;
+}
+
+static void threaded_close_range(void)
+{
+ int i, fd, call, thread;
+ for (call = 0; call < 2; call++) {
+ for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0])); thread++) {
+ struct thread_t *th = &threads[thread];
+ if (!th->created) {
+ th->created = 1;
+ event_init(&th->ready);
+ event_init(&th->done);
+ event_set(&th->done);
+ thread_start(thread_close_range, th);
+ }
+
+ if (!event_isset(&th->done))
+ continue;
+
+ event_reset(&th->done);
+ th->call = call;
+ __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED);
+ event_set(&th->ready);
+ event_timedwait(&th->done, 45);
+ break;
+ }
+ }
+
+ for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++)
+ usleep(1000);
+
+ for (fd = 3; fd < 30; fd++)
+ close(fd);
+}
+
+/*
+ * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
+ */
+TEST(close_range_cloexec_unshare_threaded_syzbot)
+{
+ int iter;
+ int fd1, fd2, fd3;
+
+ /*
+ * Create a huge gap in the fd table. When we now call
+ * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
+ * bound the kernel will only copy up to fd1 file descriptors into the
+ * new fd table. If max_fd in the close_range() codepaths isn't
+ * correctly set when requesting CLOSE_RANGE_CLOEXEC with all of these
+ * fds we will see NULL pointer derefs!
+ */
+ fd1 = open("/dev/null", O_RDWR);
+ EXPECT_GT(fd1, 0);
+
+ fd3 = dup2(fd1, 1000);
+ EXPECT_GT(fd3, 0);
+
+ for (iter = 0; iter <= 1000; iter++) {
+ pid_t pid;
+ int status;
+ uint64_t start;
+
+ pid = fork();
+ if (pid < 0)
+ exit(EXIT_FAILURE);
+ if (pid == 0) {
+ EXPECT_EQ(prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0), 0);
+ setpgrp();
+
+ threaded_close_range();
+ exit(EXIT_SUCCESS);
+ }
+
+ status = 0;
+ start = current_time_ms();
+ for (;;) {
+ if (waitpid(-1, &status, WNOHANG | __WALL) == pid)
+ break;
+
+ usleep(1000);
+
+ if (current_time_ms() - start < 5 * 1000)
+ continue;
+
+ kill(pid, SIGKILL);
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+
+ EXPECT_EQ(true, WIFEXITED(status));
+
+ EXPECT_EQ(0, WEXITSTATUS(status));
+ }
+ }
+}
+
TEST_HARNESS_MAIN
This test is a minimalized version of the reproducer given by syzbot (cf. [1]). After introducing CLOSE_RANGE_CLOEXEC syzbot reported a crash when CLOSE_RANGE_CLOEXEC is specified in conjunction with CLOSE_RANGE_UNSHARE. When CLOSE_RANGE_UNSHARE is specified the caller will receive a private file descriptor table in case their file descriptor table is currently shared. When the caller requests that all file descriptors are supposed to be operated on via e.g. a call like close_range(3, ~0U) and the caller shares their file descriptor table then the kernel will only copy all files in the range from 0 to 3 and no others. The original bug used the maximum of the old file descriptor table not the new one. In order to test this bug we need to first create a huge large gap in the fd table. When we now call CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper bound the kernel will only copy up to fd1 file descriptors into the new fd table. If max_fd in the close_range() codepaths isn't correctly set when requesting CLOSE_RANGE_CLOEXEC with all of these fds we will see NULL pointer derefs! This test passes on a fixed kernel. Cc: Giuseppe Scrivano <gscrivan@redhat.com> [1]: https://syzkaller.appspot.com/text?tag=KernelConfig&x=db720fe37a6a41d8 Link: syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> --- tools/testing/selftests/core/Makefile | 2 +- .../testing/selftests/core/close_range_test.c | 231 +++++++++++++++++- 2 files changed, 230 insertions(+), 3 deletions(-)