diff mbox series

[RFC,2/2] kfence: Alloc kfence_pool after system startup

Message ID 20220303031505.28495-3-dtcccc@linux.alibaba.com (mailing list archive)
State New
Headers show
Series Alloc kfence_pool after system startup | expand

Commit Message

Tianchen Ding March 3, 2022, 3:15 a.m. UTC
KFENCE aims at production environments, but it does not allow enabling
after system startup because kfence_pool only alloc pages from memblock.
Consider the following production scene:
At first, for performance considerations, production machines do not
enable KFENCE.
However, after running for a while, the kernel is suspected to have
memory errors. (e.g., a sibling machine crashed.)
So other production machines need to enable KFENCE, but it's hard for
them to reboot.

Allow enabling KFENCE by alloc pages after system startup, even if
KFENCE is not enabled during booting.

Signed-off-by: Tianchen Ding <dtcccc@linux.alibaba.com>
---
This patch is similar to what the KFENCE(early version) do on ARM64.
Instead of alloc_pages(), we'd prefer alloc_contig_pages() to get exact
number of pages.
I'm not sure about the impact of breaking __ro_after_init. I've tested
with hackbench, and it seems no performance regression.
Or any problem about security?
---
 mm/kfence/core.c | 96 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 76 insertions(+), 20 deletions(-)

Comments

Marco Elver March 4, 2022, 6:14 p.m. UTC | #1
On Thu, 3 Mar 2022 at 04:15, Tianchen Ding <dtcccc@linux.alibaba.com> wrote:
>
> KFENCE aims at production environments, but it does not allow enabling
> after system startup because kfence_pool only alloc pages from memblock.
> Consider the following production scene:
> At first, for performance considerations, production machines do not
> enable KFENCE.
> However, after running for a while, the kernel is suspected to have
> memory errors. (e.g., a sibling machine crashed.)
> So other production machines need to enable KFENCE, but it's hard for
> them to reboot.

I think having this flexibility isn't bad, but your usecase just
doesn't make sense (to us at least, based on our experience).

So I would simply remove the above as it will give folks the wrong
impression. The below paragraph can be improved a little, but should
be enough.

> Allow enabling KFENCE by alloc pages after system startup, even if
> KFENCE is not enabled during booting.

The above doesn't parse very well -- my suggestion:
  "Allow enabling KFENCE after system startup by allocating its pool
via the page allocator. This provides the flexibility to enable KFENCE
even if it wasn't enabled at boot time."

> Signed-off-by: Tianchen Ding <dtcccc@linux.alibaba.com>
> ---
> This patch is similar to what the KFENCE(early version) do on ARM64.
> Instead of alloc_pages(), we'd prefer alloc_contig_pages() to get exact
> number of pages.
> I'm not sure about the impact of breaking __ro_after_init. I've tested
> with hackbench, and it seems no performance regression.
> Or any problem about security?

Performance would be the main consideration. However, I think
__read_mostly should be as good as __ro_after_init in terms of
performance.

> ---
>  mm/kfence/core.c | 96 ++++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 76 insertions(+), 20 deletions(-)
>
> diff --git a/mm/kfence/core.c b/mm/kfence/core.c
> index 19eb123c0bba..ae69b2a113a4 100644
> --- a/mm/kfence/core.c
> +++ b/mm/kfence/core.c
> @@ -93,7 +93,7 @@ static unsigned long kfence_skip_covered_thresh __read_mostly = 75;
>  module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644);
>
>  /* The pool of pages used for guard pages and objects. */
> -char *__kfence_pool __ro_after_init;
> +char *__kfence_pool __read_mostly;
>  EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
>
>  /*
> @@ -534,17 +534,18 @@ static void rcu_guarded_free(struct rcu_head *h)
>         kfence_guarded_free((void *)meta->addr, meta, false);
>  }
>
> -static bool __init kfence_init_pool(void)
> +/*
> + * The main part of init kfence pool.

"Initialization of the KFENCE pool after its allocation."

> + * Return 0 if succeed. Otherwise return the address where error occurs.

"Return 0 on success; otherwise returns the address up to which
partial initialization succeeded."

> + */
> +static unsigned long __kfence_init_pool(void)

Keep this function simply named 'kfence_init_pool()' - it's a static
function, and we can be more descriptive with the other function
names.

>  {
>         unsigned long addr = (unsigned long)__kfence_pool;
>         struct page *pages;
>         int i;
>
> -       if (!__kfence_pool)
> -               return false;
> -
>         if (!arch_kfence_init_pool())
> -               goto err;
> +               return addr;
>
>         pages = virt_to_page(addr);
>
> @@ -562,7 +563,7 @@ static bool __init kfence_init_pool(void)
>
>                 /* Verify we do not have a compound head page. */
>                 if (WARN_ON(compound_head(&pages[i]) != &pages[i]))
> -                       goto err;
> +                       return addr;
>
>                 __SetPageSlab(&pages[i]);
>         }
> @@ -575,7 +576,7 @@ static bool __init kfence_init_pool(void)
>          */
>         for (i = 0; i < 2; i++) {
>                 if (unlikely(!kfence_protect(addr)))
> -                       goto err;
> +                       return addr;
>
>                 addr += PAGE_SIZE;
>         }
> @@ -592,7 +593,7 @@ static bool __init kfence_init_pool(void)
>
>                 /* Protect the right redzone. */
>                 if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
> -                       goto err;
> +                       return addr;
>
>                 addr += 2 * PAGE_SIZE;
>         }
> @@ -605,9 +606,21 @@ static bool __init kfence_init_pool(void)
>          */
>         kmemleak_free(__kfence_pool);
>
> -       return true;
> +       return 0;
> +}
> +
> +static bool __init kfence_init_pool(void)

Just call this kfence_init_pool_early().

> +{
> +       unsigned long addr;
> +
> +       if (!__kfence_pool)
> +               return false;
> +
> +       addr = __kfence_init_pool();
> +
> +       if (!addr)
> +               return true;
>
> -err:
>         /*
>          * Only release unprotected pages, and do not try to go back and change
>          * page attributes due to risk of failing to do so as well. If changing
> @@ -620,6 +633,22 @@ static bool __init kfence_init_pool(void)
>         return false;
>  }
>
> +static bool kfence_init_pool_late(void)
> +{
> +       unsigned long addr, free_pages;
> +
> +       addr = __kfence_init_pool();
> +
> +       if (!addr)
> +               return true;
> +
> +       /* Same as above. */
> +       free_pages = (KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool)) / PAGE_SIZE;
> +       free_contig_range(page_to_pfn(virt_to_page(addr)), free_pages);
> +       __kfence_pool = NULL;
> +       return false;
> +}
> +
>  /* === DebugFS Interface ==================================================== */
>
>  static int stats_show(struct seq_file *seq, void *v)
> @@ -768,31 +797,58 @@ void __init kfence_alloc_pool(void)
>                 pr_err("failed to allocate pool\n");
>  }
>
> +static inline void __kfence_init(void)

Don't make this 'inline', I see no reason for it. If the compiler
thinks it's really worth inlining, it'll do it anyway.

Also, just call it 'kfence_init_enable()' (sprinkling '__' everywhere
really doesn't improve readability if we can avoid it).

> +{
> +       if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
> +               static_branch_enable(&kfence_allocation_key);
> +       WRITE_ONCE(kfence_enabled, true);
> +       queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
> +       pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
> +               CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
> +               (void *)(__kfence_pool + KFENCE_POOL_SIZE));
> +}
> +
>  void __init kfence_init(void)
>  {
> +       stack_hash_seed = (u32)random_get_entropy();
> +
>         /* Setting kfence_sample_interval to 0 on boot disables KFENCE. */
>         if (!kfence_sample_interval)
>                 return;
>
> -       stack_hash_seed = (u32)random_get_entropy();
>         if (!kfence_init_pool()) {
>                 pr_err("%s failed\n", __func__);
>                 return;
>         }
>
> -       if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
> -               static_branch_enable(&kfence_allocation_key);
> -       WRITE_ONCE(kfence_enabled, true);
> -       queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
> -       pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
> -               CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
> -               (void *)(__kfence_pool + KFENCE_POOL_SIZE));
> +       __kfence_init();
> +}
> +
> +static int kfence_init_late(void)
> +{
> +       struct page *pages;
> +       const unsigned long nr_pages = KFENCE_POOL_SIZE / PAGE_SIZE;

Order 'nr_pages' above 'pages' (reverse xmas-tree).


> +       pages = alloc_contig_pages(nr_pages, GFP_KERNEL, first_online_node, NULL);
> +
> +       if (!pages)
> +               return -ENOMEM;
> +
> +       __kfence_pool = page_to_virt(pages);
> +
> +       if (!kfence_init_pool_late()) {
> +               pr_err("%s failed\n", __func__);
> +               return -EBUSY;
> +       }
> +
> +       __kfence_init();
> +       return 0;
>  }
>
>  static int kfence_enable_late(void)
>  {
>         if (!__kfence_pool)
> -               return -EINVAL;
> +               return kfence_init_late();
>
>         WRITE_ONCE(kfence_enabled, true);
>         queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
> --
> 2.27.0
>
> --
> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20220303031505.28495-3-dtcccc%40linux.alibaba.com.
diff mbox series

Patch

diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 19eb123c0bba..ae69b2a113a4 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -93,7 +93,7 @@  static unsigned long kfence_skip_covered_thresh __read_mostly = 75;
 module_param_named(skip_covered_thresh, kfence_skip_covered_thresh, ulong, 0644);
 
 /* The pool of pages used for guard pages and objects. */
-char *__kfence_pool __ro_after_init;
+char *__kfence_pool __read_mostly;
 EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
 
 /*
@@ -534,17 +534,18 @@  static void rcu_guarded_free(struct rcu_head *h)
 	kfence_guarded_free((void *)meta->addr, meta, false);
 }
 
-static bool __init kfence_init_pool(void)
+/*
+ * The main part of init kfence pool.
+ * Return 0 if succeed. Otherwise return the address where error occurs.
+ */
+static unsigned long __kfence_init_pool(void)
 {
 	unsigned long addr = (unsigned long)__kfence_pool;
 	struct page *pages;
 	int i;
 
-	if (!__kfence_pool)
-		return false;
-
 	if (!arch_kfence_init_pool())
-		goto err;
+		return addr;
 
 	pages = virt_to_page(addr);
 
@@ -562,7 +563,7 @@  static bool __init kfence_init_pool(void)
 
 		/* Verify we do not have a compound head page. */
 		if (WARN_ON(compound_head(&pages[i]) != &pages[i]))
-			goto err;
+			return addr;
 
 		__SetPageSlab(&pages[i]);
 	}
@@ -575,7 +576,7 @@  static bool __init kfence_init_pool(void)
 	 */
 	for (i = 0; i < 2; i++) {
 		if (unlikely(!kfence_protect(addr)))
-			goto err;
+			return addr;
 
 		addr += PAGE_SIZE;
 	}
@@ -592,7 +593,7 @@  static bool __init kfence_init_pool(void)
 
 		/* Protect the right redzone. */
 		if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
-			goto err;
+			return addr;
 
 		addr += 2 * PAGE_SIZE;
 	}
@@ -605,9 +606,21 @@  static bool __init kfence_init_pool(void)
 	 */
 	kmemleak_free(__kfence_pool);
 
-	return true;
+	return 0;
+}
+
+static bool __init kfence_init_pool(void)
+{
+	unsigned long addr;
+
+	if (!__kfence_pool)
+		return false;
+
+	addr = __kfence_init_pool();
+
+	if (!addr)
+		return true;
 
-err:
 	/*
 	 * Only release unprotected pages, and do not try to go back and change
 	 * page attributes due to risk of failing to do so as well. If changing
@@ -620,6 +633,22 @@  static bool __init kfence_init_pool(void)
 	return false;
 }
 
+static bool kfence_init_pool_late(void)
+{
+	unsigned long addr, free_pages;
+
+	addr = __kfence_init_pool();
+
+	if (!addr)
+		return true;
+
+	/* Same as above. */
+	free_pages = (KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool)) / PAGE_SIZE;
+	free_contig_range(page_to_pfn(virt_to_page(addr)), free_pages);
+	__kfence_pool = NULL;
+	return false;
+}
+
 /* === DebugFS Interface ==================================================== */
 
 static int stats_show(struct seq_file *seq, void *v)
@@ -768,31 +797,58 @@  void __init kfence_alloc_pool(void)
 		pr_err("failed to allocate pool\n");
 }
 
+static inline void __kfence_init(void)
+{
+	if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
+		static_branch_enable(&kfence_allocation_key);
+	WRITE_ONCE(kfence_enabled, true);
+	queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
+	pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
+		CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
+		(void *)(__kfence_pool + KFENCE_POOL_SIZE));
+}
+
 void __init kfence_init(void)
 {
+	stack_hash_seed = (u32)random_get_entropy();
+
 	/* Setting kfence_sample_interval to 0 on boot disables KFENCE. */
 	if (!kfence_sample_interval)
 		return;
 
-	stack_hash_seed = (u32)random_get_entropy();
 	if (!kfence_init_pool()) {
 		pr_err("%s failed\n", __func__);
 		return;
 	}
 
-	if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
-		static_branch_enable(&kfence_allocation_key);
-	WRITE_ONCE(kfence_enabled, true);
-	queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
-	pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
-		CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
-		(void *)(__kfence_pool + KFENCE_POOL_SIZE));
+	__kfence_init();
+}
+
+static int kfence_init_late(void)
+{
+	struct page *pages;
+	const unsigned long nr_pages = KFENCE_POOL_SIZE / PAGE_SIZE;
+
+	pages = alloc_contig_pages(nr_pages, GFP_KERNEL, first_online_node, NULL);
+
+	if (!pages)
+		return -ENOMEM;
+
+	__kfence_pool = page_to_virt(pages);
+
+	if (!kfence_init_pool_late()) {
+		pr_err("%s failed\n", __func__);
+		return -EBUSY;
+	}
+
+	__kfence_init();
+	return 0;
 }
 
 static int kfence_enable_late(void)
 {
 	if (!__kfence_pool)
-		return -EINVAL;
+		return kfence_init_late();
 
 	WRITE_ONCE(kfence_enabled, true);
 	queue_delayed_work(system_unbound_wq, &kfence_timer, 0);