From patchwork Fri Feb  3 15:30:05 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Hoeun Ryu <hoeun.ryu@gmail.com>
X-Patchwork-Id: 9554439
Return-Path: 
 <kernel-hardening-return-6266-patchwork-kernel-hardening=patchwork.kernel.org@lists.openwall.com>
Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org
	[172.30.200.125])
	by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id
	0533660424 for <patchwork-kernel-hardening@patchwork.kernel.org>;
	Fri,  3 Feb 2017 15:41:34 +0000 (UTC)
Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1])
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id EE50A26E96
	for <patchwork-kernel-hardening@patchwork.kernel.org>;
	Fri,  3 Feb 2017 15:41:33 +0000 (UTC)
Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486)
	id DF61427F89; Fri,  3 Feb 2017 15:41:33 +0000 (UTC)
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on
	pdx-wl-mail.web.codeaurora.org
X-Spam-Level: 
X-Spam-Status: No, score=-4.1 required=2.0 tests=BAYES_00,
	DKIM_ADSP_CUSTOM_MED,
	DKIM_SIGNED, FREEMAIL_FROM, RCVD_IN_DNSWL_MED,
	T_DKIM_INVALID autolearn=ham version=3.3.1
Received: from mother.openwall.net (mother.openwall.net [195.42.179.200])
	by mail.wl.linuxfoundation.org (Postfix) with SMTP id E5D1526E96
	for <patchwork-kernel-hardening@patchwork.kernel.org>;
	Fri,  3 Feb 2017 15:41:32 +0000 (UTC)
Received: (qmail 22060 invoked by uid 550); 3 Feb 2017 15:41:31 -0000
Mailing-List: contact kernel-hardening-help@lists.openwall.com; run by ezmlm
Precedence: bulk
List-Post: <mailto:kernel-hardening@lists.openwall.com>
List-Help: <mailto:kernel-hardening-help@lists.openwall.com>
List-Unsubscribe: <mailto:kernel-hardening-unsubscribe@lists.openwall.com>
List-Subscribe: <mailto:kernel-hardening-subscribe@lists.openwall.com>
List-ID: <kernel-hardening.lists.openwall.com>
Delivered-To: mailing list kernel-hardening@lists.openwall.com
Received: (qmail 22041 invoked from network); 3 Feb 2017 15:41:30 -0000
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=gmail.com; s=20161025;
	h=from:to:cc:subject:date:message-id;
	bh=IjVh5tK/lRTBqKQzSzhHccqHCURZJh2UkISbXCz1tBk=;
	b=ojURZr3ZUQvGJleyp8pRLzUhwSm0/zJkSsqoZlug+tXyeobAjEzZ5MwhFo5LAPIV/M
	tntVqo9pUdZOAAplypicPk6jNMEFPxRhlkjmWup90c3S/p0bcYALxcH3C4zovTWZAdta
	X42RpMxZYtPRhkjrdi/k22CfcwypMNrBfBrMeV65WpMa3anjKYJU6c6NDZSQn/8AC4Iw
	vTugcNVEr9hxN2HfJyzWJES8vst3u6dA7xIbSNJqOx70YYEf1gZiH48FgmosGL+9PGzC
	UXVA6Dt919Xa0GLDIjeqNxIZ80szRlzxY83XGoDa+ni5oau+BYdz5wDkg40/litws3oF
	z1Jw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=1e100.net; s=20161025;
	h=x-gm-message-state:from:to:cc:subject:date:message-id;
	bh=IjVh5tK/lRTBqKQzSzhHccqHCURZJh2UkISbXCz1tBk=;
	b=GJcBt1B5FSG5Vawchrpe3IIYQzWAjjz+sbMbm7zkrrv7cTWWFnMd5q6uN0oCeIUr7h
	ISnP/tX9P1qHb1dyOBWQIW1+UE8bWioTdLpuhZeAiNIOR/wR1yd6z54FFHfgTCvppuR+
	JZKxKfX2ke9difEg7fAdMjmxRnXMWXcup/i9gTg1kOHgSfjM/m97Zbxt1HGmgVsqWhul
	dsgzKtwrsDSE+T1iSU+xpm9Qo2ppc/vNkSh14sJ6YKlN1Nphc9ruX72M0/ZoZOQJMmMd
	LnEjPID4U8aiXEkp5yL0JZRCxTxhVNsiPHXwWcKRpxz/7VjZoBCFc6csRKyIAK93m6xi
	qZLQ==
X-Gm-Message-State: 
 AIkVDXLxjo3wfq6MOk10WP+Ik1oAnHNNBD8Pxj238265sWu0+UabukPMMm8uBVbJwzbDUw==
X-Received: by 10.98.211.8 with SMTP id q8mr18715481pfg.164.1486135950441;
	Fri, 03 Feb 2017 07:32:30 -0800 (PST)
From: Hoeun Ryu <hoeun.ryu@gmail.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Michal Hocko <mhocko@suse.com>, Ingo Molnar <mingo@kernel.org>,
	Andy Lutomirski <luto@kernel.org>, Kees Cook <keescook@chromium.org>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	Mateusz Guzik <mguzik@redhat.com>
Cc: linux-kernel@vger.kernel.org, kernel-hardening@lists.openwall.com,
	Hoeun Ryu <hoeun.ryu@gmail.com>
Date: Sat,  4 Feb 2017 00:30:05 +0900
Message-Id: <1486135892-27249-1-git-send-email-hoeun.ryu@gmail.com>
X-Mailer: git-send-email 2.7.4
Subject: [kernel-hardening] [PATCH 1/3] fork: dynamically allocate cache
	array for vmapped stacks using cpuhp
X-Virus-Scanned: ClamAV using ClamSMTP

Using virtually mapped stack, kernel stacks are allocated via vmalloc.
In the current implementation, two stacks per cpu can be cached when
tasks are freed and the cached stacks are used again in task duplications.
but the array for the cached stacks is statically allocated by per-cpu api.
 In this new implementation, the array for the cached stacks are dynamically
allocted and freed by cpu hotplug callbacks and the cached stacks are freed
when cpu is down. setup for cpu hotplug is established in fork_init().

Signed-off-by: Hoeun Ryu <hoeun.ryu@gmail.com>
---
 kernel/fork.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 64 insertions(+), 17 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 61284d8..54421a9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -167,26 +167,71 @@ void __weak arch_release_thread_stack(unsigned long *stack)
  * flush.  Try to minimize the number of calls by caching stacks.
  */
 #define NR_CACHED_STACKS 2
-static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
+
+struct vm_stack_cache {
+	struct vm_struct **vm_stacks;
+	int nr;
+	int cur;
+};
+
+static DEFINE_PER_CPU(struct vm_stack_cache, vm_stacks);
+
+static int alloc_vm_stack_cache(unsigned int cpu)
+{
+	struct vm_stack_cache *vm_stack_cache = &per_cpu(vm_stacks, cpu);
+	struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks;
+	int i;
+
+	/* if free_vm_stack_cache() didn't free it */
+	if (!vm_stacks) {
+		vm_stacks =
+			vzalloc(sizeof(struct vm_struct *) * NR_CACHED_STACKS);
+		if (!vm_stacks)
+			return -ENOMEM;
+	}
+
+	vm_stack_cache->vm_stacks = vm_stacks;
+	vm_stack_cache->cur = 0;
+	vm_stack_cache->nr = 0;
+
+	return 0;
+}
+
+static int free_vm_stack_cache(unsigned int cpu)
+{
+	struct vm_stack_cache *vm_stack_cache = &per_cpu(vm_stacks, cpu);
+	struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks;
+	int i;
+
+	for (i = 0; i < vm_stack_cache->nr; i++) {
+		vfree(vm_stacks[i]->addr);
+		vm_stacks[i] = NULL;
+	}
+
+	vm_stack_cache->nr = 0;
+	vm_stack_cache->cur = 0;
+	/* do not free vm_stack[cpu]->vm_stacks itself, reused in allocation */
+
+	return 0;
+}
+
 #endif
 
 static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
 #ifdef CONFIG_VMAP_STACK
+	struct vm_stack_cache *vm_stack_cache =
+		&per_cpu(vm_stacks, smp_processor_id());
+	struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks;
 	void *stack;
-	int i;
 
 	local_irq_disable();
-	for (i = 0; i < NR_CACHED_STACKS; i++) {
-		struct vm_struct *s = this_cpu_read(cached_stacks[i]);
-
-		if (!s)
-			continue;
-		this_cpu_write(cached_stacks[i], NULL);
-
-		tsk->stack_vm_area = s;
+	if (vm_stack_cache->cur > 0) {
+		struct vm_struct *vm_stack = vm_stacks[--vm_stack_cache->cur];
+		tsk->stack_vm_area = vm_stack;
 		local_irq_enable();
-		return s->addr;
+
+		return vm_stack->addr;
 	}
 	local_irq_enable();
 
@@ -216,15 +261,14 @@ static inline void free_thread_stack(struct task_struct *tsk)
 {
 #ifdef CONFIG_VMAP_STACK
 	if (task_stack_vm_area(tsk)) {
+		struct vm_stack_cache *vm_stack_cache =
+			&per_cpu(vm_stacks, smp_processor_id());
+		struct vm_struct **vm_stacks = vm_stack_cache->vm_stacks;
 		unsigned long flags;
-		int i;
 
 		local_irq_save(flags);
-		for (i = 0; i < NR_CACHED_STACKS; i++) {
-			if (this_cpu_read(cached_stacks[i]))
-				continue;
-
-			this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
+		if (vm_stack_cache->cur < vm_stack_cache->nr) {
+			vm_stacks[vm_stack_cache->cur++] = tsk->stack_vm_area;
 			local_irq_restore(flags);
 			return;
 		}
@@ -456,6 +500,9 @@ void __init fork_init(void)
 	for (i = 0; i < UCOUNT_COUNTS; i++) {
 		init_user_ns.ucount_max[i] = max_threads/2;
 	}
+
+	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "vm_stack_cache",
+			  alloc_vm_stack_cache, free_vm_stack_cache);
 }
 
 int __weak arch_dup_task_struct(struct task_struct *dst,