diff mbox series

[4/4,v2] LoongArch: Add KFENCE support

Message ID 20230725061451.1231480-5-lienze@kylinos.cn (mailing list archive)
State New
Headers show
Series Add KFENCE support for LoongArch | expand

Commit Message

Enze Li July 25, 2023, 6:14 a.m. UTC
The LoongArch architecture is quite different from other architectures.
When the allocating of KFENCE itself is done, it is mapped to the direct
mapping configuration window [1] by default on LoongArch.  It means that
it is not possible to use the page table mapped mode which required by
the KFENCE system and therefore it should be remapped to the appropriate
region.

This patch adds architecture specific implementation details for KFENCE.
In particular, this implements the required interface in <asm/kfence.h>.

Tested this patch by running the testcases and all passed.

[1] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#virtual-address-space-and-address-translation-mode

Signed-off-by: Enze Li <lienze@kylinos.cn>
---
 arch/loongarch/Kconfig               |  1 +
 arch/loongarch/include/asm/kfence.h  | 62 ++++++++++++++++++++++++++++
 arch/loongarch/include/asm/pgtable.h | 14 ++++++-
 arch/loongarch/mm/fault.c            | 22 ++++++----
 4 files changed, 90 insertions(+), 9 deletions(-)
 create mode 100644 arch/loongarch/include/asm/kfence.h

Comments

Huacai Chen July 25, 2023, 7:48 a.m. UTC | #1
Hi, Enze,

On Tue, Jul 25, 2023 at 2:15 PM Enze Li <lienze@kylinos.cn> wrote:
>
> The LoongArch architecture is quite different from other architectures.
> When the allocating of KFENCE itself is done, it is mapped to the direct
> mapping configuration window [1] by default on LoongArch.  It means that
> it is not possible to use the page table mapped mode which required by
> the KFENCE system and therefore it should be remapped to the appropriate
> region.
>
> This patch adds architecture specific implementation details for KFENCE.
> In particular, this implements the required interface in <asm/kfence.h>.
>
> Tested this patch by running the testcases and all passed.
>
> [1] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#virtual-address-space-and-address-translation-mode
>
> Signed-off-by: Enze Li <lienze@kylinos.cn>
> ---
>  arch/loongarch/Kconfig               |  1 +
>  arch/loongarch/include/asm/kfence.h  | 62 ++++++++++++++++++++++++++++
>  arch/loongarch/include/asm/pgtable.h | 14 ++++++-
>  arch/loongarch/mm/fault.c            | 22 ++++++----
>  4 files changed, 90 insertions(+), 9 deletions(-)
>  create mode 100644 arch/loongarch/include/asm/kfence.h
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 70635ea3d1e4..5b63b16be49e 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -91,6 +91,7 @@ config LOONGARCH
>         select HAVE_ARCH_AUDITSYSCALL
>         select HAVE_ARCH_JUMP_LABEL
>         select HAVE_ARCH_JUMP_LABEL_RELATIVE
> +       select HAVE_ARCH_KFENCE
>         select HAVE_ARCH_MMAP_RND_BITS if MMU
>         select HAVE_ARCH_SECCOMP_FILTER
>         select HAVE_ARCH_TRACEHOOK
> diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
> new file mode 100644
> index 000000000000..fb39076fe4d7
> --- /dev/null
> +++ b/arch/loongarch/include/asm/kfence.h
> @@ -0,0 +1,62 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * KFENCE support for LoongArch.
> + *
> + * Author: Enze Li <lienze@kylinos.cn>
> + * Copyright (C) 2022-2023 KylinSoft Corporation.
> + */
> +
> +#ifndef _ASM_LOONGARCH_KFENCE_H
> +#define _ASM_LOONGARCH_KFENCE_H
> +
> +#include <linux/kfence.h>
> +#include <asm/pgtable.h>
> +#include <asm/tlb.h>
> +
> +static inline bool arch_kfence_init_pool(void)
> +{
> +       char *kfence_pool = __kfence_pool;
> +       struct vm_struct *area;
> +       int err;
> +
> +       area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
> +                                   KFENCE_AREA_START, KFENCE_AREA_END,
> +                                   __builtin_return_address(0));
> +       if (!area)
> +               return false;
> +
> +       __kfence_pool = (char *)area->addr;
> +       err = ioremap_page_range((unsigned long)__kfence_pool,
> +                                (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
> +                                virt_to_phys((void *)kfence_pool),
> +                                PAGE_KERNEL);
> +       if (err) {
> +               free_vm_area(area);
> +               return false;
> +       }
> +
> +       return true;
> +}
> +
> +/* Protect the given page and flush TLB. */
> +static inline bool kfence_protect_page(unsigned long addr, bool protect)
> +{
> +       pte_t *pte = virt_to_kpte(addr);
> +
> +       if (WARN_ON(!pte) || pte_none(*pte))
> +               return false;
> +
> +       if (protect)
> +               set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
> +       else
> +               set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
> +
> +       /* Flush this CPU's TLB. */
This comment can be removed since the logic is obvious.

> +       preempt_disable();
> +       local_flush_tlb_one(addr);
> +       preempt_enable();
> +
> +       return true;
> +}
> +
> +#endif /* _ASM_LOONGARCH_KFENCE_H */
> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
> index 98a0c98de9d1..2702a6ba7122 100644
> --- a/arch/loongarch/include/asm/pgtable.h
> +++ b/arch/loongarch/include/asm/pgtable.h
> @@ -77,6 +77,13 @@ extern unsigned long zero_page_mask;
>         (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
>  #define __HAVE_COLOR_ZERO_PAGE
>
> +#ifdef CONFIG_KFENCE
> +#define KFENCE_AREA_SIZE \
> +       (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
Needn't change to a new line.

Others look good to me.

Huacai

> +#else
> +#define KFENCE_AREA_SIZE       0
> +#endif
> +
>  /*
>   * TLB refill handlers may also map the vmalloc area into xkvrange.
>   * Avoid the first couple of pages so NULL pointer dereferences will
> @@ -88,11 +95,16 @@ extern unsigned long zero_page_mask;
>  #define VMALLOC_START  MODULES_END
>  #define VMALLOC_END    \
>         (vm_map_base +  \
> -        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
> +        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
>
>  #define vmemmap                ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
>  #define VMEMMAP_END    ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
>
> +#ifdef CONFIG_KFENCE
> +#define KFENCE_AREA_START      VMEMMAP_END
> +#define KFENCE_AREA_END                (KFENCE_AREA_START + KFENCE_AREA_SIZE)
> +#endif
> +
>  #define pte_ERROR(e) \
>         pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
>  #ifndef __PAGETABLE_PMD_FOLDED
> diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
> index da5b6d518cdb..c0319128b221 100644
> --- a/arch/loongarch/mm/fault.c
> +++ b/arch/loongarch/mm/fault.c
> @@ -23,6 +23,7 @@
>  #include <linux/kprobes.h>
>  #include <linux/perf_event.h>
>  #include <linux/uaccess.h>
> +#include <linux/kfence.h>
>
>  #include <asm/branch.h>
>  #include <asm/mmu_context.h>
> @@ -30,7 +31,8 @@
>
>  int show_unhandled_signals = 1;
>
> -static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
> +static void __kprobes no_context(struct pt_regs *regs, unsigned long address,
> +                                unsigned long write)
>  {
>         const int field = sizeof(unsigned long) * 2;
>
> @@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
>         if (fixup_exception(regs))
>                 return;
>
> +       if (kfence_handle_page_fault(address, write, regs))
> +               return;
> +
>         /*
>          * Oops. The kernel tried to access some bad page. We'll have to
>          * terminate things with extreme prejudice.
> @@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
>         die("Oops", regs);
>  }
>
> -static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
> +static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address,
> +                                      unsigned long write)
>  {
>         /*
>          * We ran out of memory, call the OOM killer, and return the userspace
>          * (which will retry the fault, or kill us if we got oom-killed).
>          */
>         if (!user_mode(regs)) {
> -               no_context(regs, address);
> +               no_context(regs, address, write);
>                 return;
>         }
>         pagefault_out_of_memory();
> @@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs,
>  {
>         /* Kernel mode? Handle exceptions or die */
>         if (!user_mode(regs)) {
> -               no_context(regs, address);
> +               no_context(regs, address, write);
>                 return;
>         }
>
> @@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
>
>         /* Kernel mode? Handle exceptions or die */
>         if (!user_mode(regs)) {
> -               no_context(regs, address);
> +               no_context(regs, address, write);
>                 return;
>         }
>
> @@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
>          */
>         if (address & __UA_LIMIT) {
>                 if (!user_mode(regs))
> -                       no_context(regs, address);
> +                       no_context(regs, address, write);
>                 else
>                         do_sigsegv(regs, write, address, si_code);
>                 return;
> @@ -211,7 +217,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
>
>         if (fault_signal_pending(fault, regs)) {
>                 if (!user_mode(regs))
> -                       no_context(regs, address);
> +                       no_context(regs, address, write);
>                 return;
>         }
>
> @@ -232,7 +238,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
>         if (unlikely(fault & VM_FAULT_ERROR)) {
>                 mmap_read_unlock(mm);
>                 if (fault & VM_FAULT_OOM) {
> -                       do_out_of_memory(regs, address);
> +                       do_out_of_memory(regs, address, write);
>                         return;
>                 } else if (fault & VM_FAULT_SIGSEGV) {
>                         do_sigsegv(regs, write, address, si_code);
> --
> 2.34.1
>
>
Jackie Liu July 25, 2023, 2:34 p.m. UTC | #2
在 2023/7/25 14:14, Enze Li 写道:
> The LoongArch architecture is quite different from other architectures.
> When the allocating of KFENCE itself is done, it is mapped to the direct
> mapping configuration window [1] by default on LoongArch.  It means that
> it is not possible to use the page table mapped mode which required by
> the KFENCE system and therefore it should be remapped to the appropriate
> region.
>
> This patch adds architecture specific implementation details for KFENCE.
> In particular, this implements the required interface in <asm/kfence.h>.
>
> Tested this patch by running the testcases and all passed.
>
> [1] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#virtual-address-space-and-address-translation-mode
>
> Signed-off-by: Enze Li <lienze@kylinos.cn>
> ---
>   arch/loongarch/Kconfig               |  1 +
>   arch/loongarch/include/asm/kfence.h  | 62 ++++++++++++++++++++++++++++
>   arch/loongarch/include/asm/pgtable.h | 14 ++++++-
>   arch/loongarch/mm/fault.c            | 22 ++++++----
>   4 files changed, 90 insertions(+), 9 deletions(-)
>   create mode 100644 arch/loongarch/include/asm/kfence.h
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 70635ea3d1e4..5b63b16be49e 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -91,6 +91,7 @@ config LOONGARCH
>   	select HAVE_ARCH_AUDITSYSCALL
>   	select HAVE_ARCH_JUMP_LABEL
>   	select HAVE_ARCH_JUMP_LABEL_RELATIVE
> +	select HAVE_ARCH_KFENCE
>   	select HAVE_ARCH_MMAP_RND_BITS if MMU
>   	select HAVE_ARCH_SECCOMP_FILTER
>   	select HAVE_ARCH_TRACEHOOK
> diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
> new file mode 100644
> index 000000000000..fb39076fe4d7
> --- /dev/null
> +++ b/arch/loongarch/include/asm/kfence.h
> @@ -0,0 +1,62 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * KFENCE support for LoongArch.
> + *
> + * Author: Enze Li <lienze@kylinos.cn>
> + * Copyright (C) 2022-2023 KylinSoft Corporation.
> + */
> +
> +#ifndef _ASM_LOONGARCH_KFENCE_H
> +#define _ASM_LOONGARCH_KFENCE_H
> +
> +#include <linux/kfence.h>
> +#include <asm/pgtable.h>
> +#include <asm/tlb.h>
> +
> +static inline bool arch_kfence_init_pool(void)
> +{
> +	char *kfence_pool = __kfence_pool;
> +	struct vm_struct *area;
> +	int err;
> +
> +	area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
> +				    KFENCE_AREA_START, KFENCE_AREA_END,
> +				    __builtin_return_address(0));
> +	if (!area)
> +		return false;
> +
> +	__kfence_pool = (char *)area->addr;

I think there should be something wrong here.

> +	err = ioremap_page_range((unsigned long)__kfence_pool,
> +				 (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
> +				 virt_to_phys((void *)kfence_pool),
> +				 PAGE_KERNEL);
> +	if (err) {
> +		free_vm_area(area);

If err > 0, return area->addr here, It's not correct.
Huacai Chen July 27, 2023, 1:26 a.m. UTC | #3
On Tue, Jul 25, 2023 at 2:15 PM Enze Li <lienze@kylinos.cn> wrote:
>
> The LoongArch architecture is quite different from other architectures.
> When the allocating of KFENCE itself is done, it is mapped to the direct
> mapping configuration window [1] by default on LoongArch.  It means that
> it is not possible to use the page table mapped mode which required by
> the KFENCE system and therefore it should be remapped to the appropriate
> region.
>
> This patch adds architecture specific implementation details for KFENCE.
> In particular, this implements the required interface in <asm/kfence.h>.
>
> Tested this patch by running the testcases and all passed.
>
> [1] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#virtual-address-space-and-address-translation-mode
>
> Signed-off-by: Enze Li <lienze@kylinos.cn>
> ---
>  arch/loongarch/Kconfig               |  1 +
>  arch/loongarch/include/asm/kfence.h  | 62 ++++++++++++++++++++++++++++
>  arch/loongarch/include/asm/pgtable.h | 14 ++++++-
>  arch/loongarch/mm/fault.c            | 22 ++++++----
>  4 files changed, 90 insertions(+), 9 deletions(-)
>  create mode 100644 arch/loongarch/include/asm/kfence.h
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 70635ea3d1e4..5b63b16be49e 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -91,6 +91,7 @@ config LOONGARCH
>         select HAVE_ARCH_AUDITSYSCALL
>         select HAVE_ARCH_JUMP_LABEL
>         select HAVE_ARCH_JUMP_LABEL_RELATIVE
> +       select HAVE_ARCH_KFENCE
>         select HAVE_ARCH_MMAP_RND_BITS if MMU
>         select HAVE_ARCH_SECCOMP_FILTER
>         select HAVE_ARCH_TRACEHOOK
> diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
> new file mode 100644
> index 000000000000..fb39076fe4d7
> --- /dev/null
> +++ b/arch/loongarch/include/asm/kfence.h
> @@ -0,0 +1,62 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * KFENCE support for LoongArch.
> + *
> + * Author: Enze Li <lienze@kylinos.cn>
> + * Copyright (C) 2022-2023 KylinSoft Corporation.
> + */
> +
> +#ifndef _ASM_LOONGARCH_KFENCE_H
> +#define _ASM_LOONGARCH_KFENCE_H
> +
> +#include <linux/kfence.h>
> +#include <asm/pgtable.h>
> +#include <asm/tlb.h>
> +
> +static inline bool arch_kfence_init_pool(void)
> +{
> +       char *kfence_pool = __kfence_pool;
> +       struct vm_struct *area;
> +       int err;
> +
> +       area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
> +                                   KFENCE_AREA_START, KFENCE_AREA_END,
> +                                   __builtin_return_address(0));
> +       if (!area)
> +               return false;
> +
> +       __kfence_pool = (char *)area->addr;
> +       err = ioremap_page_range((unsigned long)__kfence_pool,
> +                                (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
> +                                virt_to_phys((void *)kfence_pool),
> +                                PAGE_KERNEL);
> +       if (err) {
> +               free_vm_area(area);
> +               return false;
> +       }
> +
> +       return true;
> +}
> +
> +/* Protect the given page and flush TLB. */
> +static inline bool kfence_protect_page(unsigned long addr, bool protect)
> +{
> +       pte_t *pte = virt_to_kpte(addr);
> +
> +       if (WARN_ON(!pte) || pte_none(*pte))
> +               return false;
> +
> +       if (protect)
> +               set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
> +       else
> +               set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
> +
> +       /* Flush this CPU's TLB. */
> +       preempt_disable();
> +       local_flush_tlb_one(addr);
> +       preempt_enable();
> +
> +       return true;
> +}
> +
> +#endif /* _ASM_LOONGARCH_KFENCE_H */
> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
> index 98a0c98de9d1..2702a6ba7122 100644
> --- a/arch/loongarch/include/asm/pgtable.h
> +++ b/arch/loongarch/include/asm/pgtable.h
> @@ -77,6 +77,13 @@ extern unsigned long zero_page_mask;
>         (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
>  #define __HAVE_COLOR_ZERO_PAGE
>
> +#ifdef CONFIG_KFENCE
> +#define KFENCE_AREA_SIZE \
> +       (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
Another question: Why define KFENCE_AREA_SIZE while there is already
KFENCE_POOL_SIZE? And why is KFENCE_AREA_SIZE a little larger than
KFENCE_POOL_SIZE? If we can reuse KFENCE_POOL_SIZE,
KFENCE_AREA_START/KFENCE_AREA_END can be renamed to
KFENCE_POOL_START/KFENCE_POOL_END.

Huacai

> +#else
> +#define KFENCE_AREA_SIZE       0
> +#endif
> +
>  /*
>   * TLB refill handlers may also map the vmalloc area into xkvrange.
>   * Avoid the first couple of pages so NULL pointer dereferences will
> @@ -88,11 +95,16 @@ extern unsigned long zero_page_mask;
>  #define VMALLOC_START  MODULES_END
>  #define VMALLOC_END    \
>         (vm_map_base +  \
> -        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
> +        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
>
>  #define vmemmap                ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
>  #define VMEMMAP_END    ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
>
> +#ifdef CONFIG_KFENCE
> +#define KFENCE_AREA_START      VMEMMAP_END
> +#define KFENCE_AREA_END                (KFENCE_AREA_START + KFENCE_AREA_SIZE)
> +#endif
> +
>  #define pte_ERROR(e) \
>         pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
>  #ifndef __PAGETABLE_PMD_FOLDED
> diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
> index da5b6d518cdb..c0319128b221 100644
> --- a/arch/loongarch/mm/fault.c
> +++ b/arch/loongarch/mm/fault.c
> @@ -23,6 +23,7 @@
>  #include <linux/kprobes.h>
>  #include <linux/perf_event.h>
>  #include <linux/uaccess.h>
> +#include <linux/kfence.h>
>
>  #include <asm/branch.h>
>  #include <asm/mmu_context.h>
> @@ -30,7 +31,8 @@
>
>  int show_unhandled_signals = 1;
>
> -static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
> +static void __kprobes no_context(struct pt_regs *regs, unsigned long address,
> +                                unsigned long write)
>  {
>         const int field = sizeof(unsigned long) * 2;
>
> @@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
>         if (fixup_exception(regs))
>                 return;
>
> +       if (kfence_handle_page_fault(address, write, regs))
> +               return;
> +
>         /*
>          * Oops. The kernel tried to access some bad page. We'll have to
>          * terminate things with extreme prejudice.
> @@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
>         die("Oops", regs);
>  }
>
> -static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
> +static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address,
> +                                      unsigned long write)
>  {
>         /*
>          * We ran out of memory, call the OOM killer, and return the userspace
>          * (which will retry the fault, or kill us if we got oom-killed).
>          */
>         if (!user_mode(regs)) {
> -               no_context(regs, address);
> +               no_context(regs, address, write);
>                 return;
>         }
>         pagefault_out_of_memory();
> @@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs,
>  {
>         /* Kernel mode? Handle exceptions or die */
>         if (!user_mode(regs)) {
> -               no_context(regs, address);
> +               no_context(regs, address, write);
>                 return;
>         }
>
> @@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
>
>         /* Kernel mode? Handle exceptions or die */
>         if (!user_mode(regs)) {
> -               no_context(regs, address);
> +               no_context(regs, address, write);
>                 return;
>         }
>
> @@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
>          */
>         if (address & __UA_LIMIT) {
>                 if (!user_mode(regs))
> -                       no_context(regs, address);
> +                       no_context(regs, address, write);
>                 else
>                         do_sigsegv(regs, write, address, si_code);
>                 return;
> @@ -211,7 +217,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
>
>         if (fault_signal_pending(fault, regs)) {
>                 if (!user_mode(regs))
> -                       no_context(regs, address);
> +                       no_context(regs, address, write);
>                 return;
>         }
>
> @@ -232,7 +238,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
>         if (unlikely(fault & VM_FAULT_ERROR)) {
>                 mmap_read_unlock(mm);
>                 if (fault & VM_FAULT_OOM) {
> -                       do_out_of_memory(regs, address);
> +                       do_out_of_memory(regs, address, write);
>                         return;
>                 } else if (fault & VM_FAULT_SIGSEGV) {
>                         do_sigsegv(regs, write, address, si_code);
> --
> 2.34.1
>
>
Enze Li July 28, 2023, 3:27 a.m. UTC | #4
On Thu, Jul 27 2023 at 09:26:04 AM +0800, Huacai Chen wrote:

> On Tue, Jul 25, 2023 at 2:15 PM Enze Li <lienze@kylinos.cn> wrote:
>>
>> The LoongArch architecture is quite different from other architectures.
>> When the allocating of KFENCE itself is done, it is mapped to the direct
>> mapping configuration window [1] by default on LoongArch.  It means that
>> it is not possible to use the page table mapped mode which required by
>> the KFENCE system and therefore it should be remapped to the appropriate
>> region.
>>
>> This patch adds architecture specific implementation details for KFENCE.
>> In particular, this implements the required interface in <asm/kfence.h>.
>>
>> Tested this patch by running the testcases and all passed.
>>
>> [1] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#virtual-address-space-and-address-translation-mode
>>
>> Signed-off-by: Enze Li <lienze@kylinos.cn>
>> ---
>>  arch/loongarch/Kconfig               |  1 +
>>  arch/loongarch/include/asm/kfence.h  | 62 ++++++++++++++++++++++++++++
>>  arch/loongarch/include/asm/pgtable.h | 14 ++++++-
>>  arch/loongarch/mm/fault.c            | 22 ++++++----
>>  4 files changed, 90 insertions(+), 9 deletions(-)
>>  create mode 100644 arch/loongarch/include/asm/kfence.h
>>
>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>> index 70635ea3d1e4..5b63b16be49e 100644
>> --- a/arch/loongarch/Kconfig
>> +++ b/arch/loongarch/Kconfig
>> @@ -91,6 +91,7 @@ config LOONGARCH
>>         select HAVE_ARCH_AUDITSYSCALL
>>         select HAVE_ARCH_JUMP_LABEL
>>         select HAVE_ARCH_JUMP_LABEL_RELATIVE
>> +       select HAVE_ARCH_KFENCE
>>         select HAVE_ARCH_MMAP_RND_BITS if MMU
>>         select HAVE_ARCH_SECCOMP_FILTER
>>         select HAVE_ARCH_TRACEHOOK
>> diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
>> new file mode 100644
>> index 000000000000..fb39076fe4d7
>> --- /dev/null
>> +++ b/arch/loongarch/include/asm/kfence.h
>> @@ -0,0 +1,62 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * KFENCE support for LoongArch.
>> + *
>> + * Author: Enze Li <lienze@kylinos.cn>
>> + * Copyright (C) 2022-2023 KylinSoft Corporation.
>> + */
>> +
>> +#ifndef _ASM_LOONGARCH_KFENCE_H
>> +#define _ASM_LOONGARCH_KFENCE_H
>> +
>> +#include <linux/kfence.h>
>> +#include <asm/pgtable.h>
>> +#include <asm/tlb.h>
>> +
>> +static inline bool arch_kfence_init_pool(void)
>> +{
>> +       char *kfence_pool = __kfence_pool;
>> +       struct vm_struct *area;
>> +       int err;
>> +
>> +       area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
>> +                                   KFENCE_AREA_START, KFENCE_AREA_END,
>> +                                   __builtin_return_address(0));
>> +       if (!area)
>> +               return false;
>> +
>> +       __kfence_pool = (char *)area->addr;
>> +       err = ioremap_page_range((unsigned long)__kfence_pool,
>> +                                (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
>> +                                virt_to_phys((void *)kfence_pool),
>> +                                PAGE_KERNEL);
>> +       if (err) {
>> +               free_vm_area(area);
>> +               return false;
>> +       }
>> +
>> +       return true;
>> +}
>> +
>> +/* Protect the given page and flush TLB. */
>> +static inline bool kfence_protect_page(unsigned long addr, bool protect)
>> +{
>> +       pte_t *pte = virt_to_kpte(addr);
>> +
>> +       if (WARN_ON(!pte) || pte_none(*pte))
>> +               return false;
>> +
>> +       if (protect)
>> +               set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
>> +       else
>> +               set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
>> +
>> +       /* Flush this CPU's TLB. */
>> +       preempt_disable();
>> +       local_flush_tlb_one(addr);
>> +       preempt_enable();
>> +
>> +       return true;
>> +}
>> +
>> +#endif /* _ASM_LOONGARCH_KFENCE_H */
>> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
>> index 98a0c98de9d1..2702a6ba7122 100644
>> --- a/arch/loongarch/include/asm/pgtable.h
>> +++ b/arch/loongarch/include/asm/pgtable.h
>> @@ -77,6 +77,13 @@ extern unsigned long zero_page_mask;
>>         (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
>>  #define __HAVE_COLOR_ZERO_PAGE
>>
>> +#ifdef CONFIG_KFENCE
>> +#define KFENCE_AREA_SIZE \
>> +       (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)

Hi Huacai,

> Another question: Why define KFENCE_AREA_SIZE while there is already
> KFENCE_POOL_SIZE?

The KFENCE_POOL_SIZE macro is defined in linux/kfence.h.  When I trying
to include this header file, I see the following error,

----------------------------------------------------------------------
  CC      arch/loongarch/kernel/asm-offsets.s
In file included from ./arch/loongarch/include/asm/pgtable.h:64,
                 from ./include/linux/pgtable.h:6,
                 from ./include/linux/mm.h:29,
                 from arch/loongarch/kernel/asm-offsets.c:9:
./include/linux/kfence.h:93:35: warning: ‘struct kmem_cache’ declared inside parameter list will not be visible outside of this definition or declaration
   93 | void kfence_shutdown_cache(struct kmem_cache *s);
      |                                   ^~~~~~~~~~
./include/linux/kfence.h:99:29: warning: ‘struct kmem_cache’ declared inside parameter list will not be visible outside of this definition or declaration
   99 | void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags);
      |                             ^~~~~~~~~~
./include/linux/kfence.h:117:50: warning: ‘struct kmem_cache’ declared inside parameter list will not be visible outside of this definition or declaration
  117 | static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
      |                                                  ^~~~~~~~~~
./include/linux/kfence.h: In function ‘kfence_alloc’:
./include/linux/kfence.h:128:31: error: passing argument 1 of ‘__kfence_alloc’ from incompatible pointer type [-Werror=incompatible-pointer-types]
  128 |         return __kfence_alloc(s, size, flags);
      |                               ^
      |                               |
      |                               struct kmem_cache *
--------------------------------------------------------------------

The root cause of this issue is that linux/kfence.h should be expanded
after linux/mm.h, not before.  That said, we can not put any
"high-level" header files in the "low-level" ones.

> And why is KFENCE_AREA_SIZE a little larger than
> KFENCE_POOL_SIZE? If we can reuse KFENCE_POOL_SIZE,
> KFENCE_AREA_START/KFENCE_AREA_END can be renamed to
> KFENCE_POOL_START/KFENCE_POOL_END.

+#define KFENCE_AREA_SIZE \
+       (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
                                              ^^^^^
                                              
Here I've added two extra pages, that's due to working with
__get_vm_area_caller() to request the space correctly.

1. arch_kfence_init_pool
     __get_vm_area_caller
       __get_vm_area_node
         =================================
           if (!(flags & VM_NO_GUARD))
                   size += PAGE_SIZE;
         =================================

If we do not set VM_NO_GUARD, we would get one more page as "GUARD".
Setting VM_NO_GUARD is dangerous behavior and I suggest we keep this
page.

2. arch_kfence_init_pool
     __get_vm_area_caller
       __get_vm_area_node                        !!!This is my comment--
           =======================================                     |
           if (flags & VM_IOREMAP)                                     |
                   align = 1ul << clamp_t(int, ...                     |
           *** We got "align==0x200000" here.  Based on the default  <--
               KFENCE objects of 255, we got the maximum align here. ***
           =======================================

           alloc_vmap_area
             __alloc_vmap_area
               =================================
               nva_start_addr = ALIGN(vstart, align);
               *** When running here, the starting address will be
                   moved forward one byte due to alignment
                   requirements.  If we do not give enough space, we'll
                   fail on the next line. ***
               
               if (nva_start_addr + size > vend)
                       return vend;
               =================================
               
Theoretically, this alignment requires at most 2MB of space.  However,
considering that the starting address is fixed (the starting position is
determined by VMEMMAP_END), I think that adding another page will be
enough.

Best Regards,
Enze

>> +#else
>> +#define KFENCE_AREA_SIZE       0
>> +#endif
>> +
>>  /*
>>   * TLB refill handlers may also map the vmalloc area into xkvrange.
>>   * Avoid the first couple of pages so NULL pointer dereferences will
>> @@ -88,11 +95,16 @@ extern unsigned long zero_page_mask;
>>  #define VMALLOC_START  MODULES_END
>>  #define VMALLOC_END    \
>>         (vm_map_base +  \
>> -        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
>> +        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
>>
>>  #define vmemmap                ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
>>  #define VMEMMAP_END    ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
>>
>> +#ifdef CONFIG_KFENCE
>> +#define KFENCE_AREA_START      VMEMMAP_END
>> +#define KFENCE_AREA_END                (KFENCE_AREA_START + KFENCE_AREA_SIZE)
>> +#endif
>> +
>>  #define pte_ERROR(e) \
>>         pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
>>  #ifndef __PAGETABLE_PMD_FOLDED
>> diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
>> index da5b6d518cdb..c0319128b221 100644
>> --- a/arch/loongarch/mm/fault.c
>> +++ b/arch/loongarch/mm/fault.c
>> @@ -23,6 +23,7 @@
>>  #include <linux/kprobes.h>
>>  #include <linux/perf_event.h>
>>  #include <linux/uaccess.h>
>> +#include <linux/kfence.h>
>>
>>  #include <asm/branch.h>
>>  #include <asm/mmu_context.h>
>> @@ -30,7 +31,8 @@
>>
>>  int show_unhandled_signals = 1;
>>
>> -static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
>> +static void __kprobes no_context(struct pt_regs *regs, unsigned long address,
>> +                                unsigned long write)
>>  {
>>         const int field = sizeof(unsigned long) * 2;
>>
>> @@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
>>         if (fixup_exception(regs))
>>                 return;
>>
>> +       if (kfence_handle_page_fault(address, write, regs))
>> +               return;
>> +
>>         /*
>>          * Oops. The kernel tried to access some bad page. We'll have to
>>          * terminate things with extreme prejudice.
>> @@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
>>         die("Oops", regs);
>>  }
>>
>> -static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
>> +static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address,
>> +                                      unsigned long write)
>>  {
>>         /*
>>          * We ran out of memory, call the OOM killer, and return the userspace
>>          * (which will retry the fault, or kill us if we got oom-killed).
>>          */
>>         if (!user_mode(regs)) {
>> -               no_context(regs, address);
>> +               no_context(regs, address, write);
>>                 return;
>>         }
>>         pagefault_out_of_memory();
>> @@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs,
>>  {
>>         /* Kernel mode? Handle exceptions or die */
>>         if (!user_mode(regs)) {
>> -               no_context(regs, address);
>> +               no_context(regs, address, write);
>>                 return;
>>         }
>>
>> @@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
>>
>>         /* Kernel mode? Handle exceptions or die */
>>         if (!user_mode(regs)) {
>> -               no_context(regs, address);
>> +               no_context(regs, address, write);
>>                 return;
>>         }
>>
>> @@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
>>          */
>>         if (address & __UA_LIMIT) {
>>                 if (!user_mode(regs))
>> -                       no_context(regs, address);
>> +                       no_context(regs, address, write);
>>                 else
>>                         do_sigsegv(regs, write, address, si_code);
>>                 return;
>> @@ -211,7 +217,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
>>
>>         if (fault_signal_pending(fault, regs)) {
>>                 if (!user_mode(regs))
>> -                       no_context(regs, address);
>> +                       no_context(regs, address, write);
>>                 return;
>>         }
>>
>> @@ -232,7 +238,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
>>         if (unlikely(fault & VM_FAULT_ERROR)) {
>>                 mmap_read_unlock(mm);
>>                 if (fault & VM_FAULT_OOM) {
>> -                       do_out_of_memory(regs, address);
>> +                       do_out_of_memory(regs, address, write);
>>                         return;
>>                 } else if (fault & VM_FAULT_SIGSEGV) {
>>                         do_sigsegv(regs, write, address, si_code);
>> --
>> 2.34.1
>>
>>
Huacai Chen July 28, 2023, 4:33 a.m. UTC | #5
On Fri, Jul 28, 2023 at 11:28 AM Enze Li <lienze@kylinos.cn> wrote:
>
> On Thu, Jul 27 2023 at 09:26:04 AM +0800, Huacai Chen wrote:
>
> > On Tue, Jul 25, 2023 at 2:15 PM Enze Li <lienze@kylinos.cn> wrote:
> >>
> >> The LoongArch architecture is quite different from other architectures.
> >> When the allocating of KFENCE itself is done, it is mapped to the direct
> >> mapping configuration window [1] by default on LoongArch.  It means that
> >> it is not possible to use the page table mapped mode which required by
> >> the KFENCE system and therefore it should be remapped to the appropriate
> >> region.
> >>
> >> This patch adds architecture specific implementation details for KFENCE.
> >> In particular, this implements the required interface in <asm/kfence.h>.
> >>
> >> Tested this patch by running the testcases and all passed.
> >>
> >> [1] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#virtual-address-space-and-address-translation-mode
> >>
> >> Signed-off-by: Enze Li <lienze@kylinos.cn>
> >> ---
> >>  arch/loongarch/Kconfig               |  1 +
> >>  arch/loongarch/include/asm/kfence.h  | 62 ++++++++++++++++++++++++++++
> >>  arch/loongarch/include/asm/pgtable.h | 14 ++++++-
> >>  arch/loongarch/mm/fault.c            | 22 ++++++----
> >>  4 files changed, 90 insertions(+), 9 deletions(-)
> >>  create mode 100644 arch/loongarch/include/asm/kfence.h
> >>
> >> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> >> index 70635ea3d1e4..5b63b16be49e 100644
> >> --- a/arch/loongarch/Kconfig
> >> +++ b/arch/loongarch/Kconfig
> >> @@ -91,6 +91,7 @@ config LOONGARCH
> >>         select HAVE_ARCH_AUDITSYSCALL
> >>         select HAVE_ARCH_JUMP_LABEL
> >>         select HAVE_ARCH_JUMP_LABEL_RELATIVE
> >> +       select HAVE_ARCH_KFENCE
> >>         select HAVE_ARCH_MMAP_RND_BITS if MMU
> >>         select HAVE_ARCH_SECCOMP_FILTER
> >>         select HAVE_ARCH_TRACEHOOK
> >> diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
> >> new file mode 100644
> >> index 000000000000..fb39076fe4d7
> >> --- /dev/null
> >> +++ b/arch/loongarch/include/asm/kfence.h
> >> @@ -0,0 +1,62 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +/*
> >> + * KFENCE support for LoongArch.
> >> + *
> >> + * Author: Enze Li <lienze@kylinos.cn>
> >> + * Copyright (C) 2022-2023 KylinSoft Corporation.
> >> + */
> >> +
> >> +#ifndef _ASM_LOONGARCH_KFENCE_H
> >> +#define _ASM_LOONGARCH_KFENCE_H
> >> +
> >> +#include <linux/kfence.h>
> >> +#include <asm/pgtable.h>
> >> +#include <asm/tlb.h>
> >> +
> >> +static inline bool arch_kfence_init_pool(void)
> >> +{
> >> +       char *kfence_pool = __kfence_pool;
> >> +       struct vm_struct *area;
> >> +       int err;
> >> +
> >> +       area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
> >> +                                   KFENCE_AREA_START, KFENCE_AREA_END,
> >> +                                   __builtin_return_address(0));
> >> +       if (!area)
> >> +               return false;
> >> +
> >> +       __kfence_pool = (char *)area->addr;
> >> +       err = ioremap_page_range((unsigned long)__kfence_pool,
> >> +                                (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
> >> +                                virt_to_phys((void *)kfence_pool),
> >> +                                PAGE_KERNEL);
> >> +       if (err) {
> >> +               free_vm_area(area);
> >> +               return false;
> >> +       }
> >> +
> >> +       return true;
> >> +}
> >> +
> >> +/* Protect the given page and flush TLB. */
> >> +static inline bool kfence_protect_page(unsigned long addr, bool protect)
> >> +{
> >> +       pte_t *pte = virt_to_kpte(addr);
> >> +
> >> +       if (WARN_ON(!pte) || pte_none(*pte))
> >> +               return false;
> >> +
> >> +       if (protect)
> >> +               set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
> >> +       else
> >> +               set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
> >> +
> >> +       /* Flush this CPU's TLB. */
> >> +       preempt_disable();
> >> +       local_flush_tlb_one(addr);
> >> +       preempt_enable();
> >> +
> >> +       return true;
> >> +}
> >> +
> >> +#endif /* _ASM_LOONGARCH_KFENCE_H */
> >> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
> >> index 98a0c98de9d1..2702a6ba7122 100644
> >> --- a/arch/loongarch/include/asm/pgtable.h
> >> +++ b/arch/loongarch/include/asm/pgtable.h
> >> @@ -77,6 +77,13 @@ extern unsigned long zero_page_mask;
> >>         (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
> >>  #define __HAVE_COLOR_ZERO_PAGE
> >>
> >> +#ifdef CONFIG_KFENCE
> >> +#define KFENCE_AREA_SIZE \
> >> +       (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
>
> Hi Huacai,
>
> > Another question: Why define KFENCE_AREA_SIZE while there is already
> > KFENCE_POOL_SIZE?
>
> The KFENCE_POOL_SIZE macro is defined in linux/kfence.h.  When I trying
> to include this header file, I see the following error,
>
> ----------------------------------------------------------------------
>   CC      arch/loongarch/kernel/asm-offsets.s
> In file included from ./arch/loongarch/include/asm/pgtable.h:64,
>                  from ./include/linux/pgtable.h:6,
>                  from ./include/linux/mm.h:29,
>                  from arch/loongarch/kernel/asm-offsets.c:9:
> ./include/linux/kfence.h:93:35: warning: ‘struct kmem_cache’ declared inside parameter list will not be visible outside of this definition or declaration
>    93 | void kfence_shutdown_cache(struct kmem_cache *s);
>       |                                   ^~~~~~~~~~
> ./include/linux/kfence.h:99:29: warning: ‘struct kmem_cache’ declared inside parameter list will not be visible outside of this definition or declaration
>    99 | void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags);
>       |                             ^~~~~~~~~~
> ./include/linux/kfence.h:117:50: warning: ‘struct kmem_cache’ declared inside parameter list will not be visible outside of this definition or declaration
>   117 | static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
>       |                                                  ^~~~~~~~~~
> ./include/linux/kfence.h: In function ‘kfence_alloc’:
> ./include/linux/kfence.h:128:31: error: passing argument 1 of ‘__kfence_alloc’ from incompatible pointer type [-Werror=incompatible-pointer-types]
>   128 |         return __kfence_alloc(s, size, flags);
>       |                               ^
>       |                               |
>       |                               struct kmem_cache *
> --------------------------------------------------------------------
>
> The root cause of this issue is that linux/kfence.h should be expanded
> after linux/mm.h, not before.  That said, we can not put any
> "high-level" header files in the "low-level" ones.
>
> > And why is KFENCE_AREA_SIZE a little larger than
> > KFENCE_POOL_SIZE? If we can reuse KFENCE_POOL_SIZE,
> > KFENCE_AREA_START/KFENCE_AREA_END can be renamed to
> > KFENCE_POOL_START/KFENCE_POOL_END.
>
> +#define KFENCE_AREA_SIZE \
> +       (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
>                                               ^^^^^
>
> Here I've added two extra pages, that's due to working with
> __get_vm_area_caller() to request the space correctly.
>
> 1. arch_kfence_init_pool
>      __get_vm_area_caller
>        __get_vm_area_node
>          =================================
>            if (!(flags & VM_NO_GUARD))
>                    size += PAGE_SIZE;
>          =================================
>
> If we do not set VM_NO_GUARD, we would get one more page as "GUARD".
> Setting VM_NO_GUARD is dangerous behavior and I suggest we keep this
> page.
>
> 2. arch_kfence_init_pool
>      __get_vm_area_caller
>        __get_vm_area_node                        !!!This is my comment--
>            =======================================                     |
>            if (flags & VM_IOREMAP)                                     |
>                    align = 1ul << clamp_t(int, ...                     |
>            *** We got "align==0x200000" here.  Based on the default  <--
>                KFENCE objects of 255, we got the maximum align here. ***
>            =======================================
>
>            alloc_vmap_area
>              __alloc_vmap_area
>                =================================
>                nva_start_addr = ALIGN(vstart, align);
>                *** When running here, the starting address will be
>                    moved forward one byte due to alignment
>                    requirements.  If we do not give enough space, we'll
>                    fail on the next line. ***
>
>                if (nva_start_addr + size > vend)
>                        return vend;
>                =================================
>
> Theoretically, this alignment requires at most 2MB of space.  However,
> considering that the starting address is fixed (the starting position is
> determined by VMEMMAP_END), I think that adding another page will be
> enough.
OK, that makes sense.

Huacai
>
> Best Regards,
> Enze
>
> >> +#else
> >> +#define KFENCE_AREA_SIZE       0
> >> +#endif
> >> +
> >>  /*
> >>   * TLB refill handlers may also map the vmalloc area into xkvrange.
> >>   * Avoid the first couple of pages so NULL pointer dereferences will
> >> @@ -88,11 +95,16 @@ extern unsigned long zero_page_mask;
> >>  #define VMALLOC_START  MODULES_END
> >>  #define VMALLOC_END    \
> >>         (vm_map_base +  \
> >> -        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
> >> +        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
> >>
> >>  #define vmemmap                ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
> >>  #define VMEMMAP_END    ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
> >>
> >> +#ifdef CONFIG_KFENCE
> >> +#define KFENCE_AREA_START      VMEMMAP_END
> >> +#define KFENCE_AREA_END                (KFENCE_AREA_START + KFENCE_AREA_SIZE)
> >> +#endif
> >> +
> >>  #define pte_ERROR(e) \
> >>         pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
> >>  #ifndef __PAGETABLE_PMD_FOLDED
> >> diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
> >> index da5b6d518cdb..c0319128b221 100644
> >> --- a/arch/loongarch/mm/fault.c
> >> +++ b/arch/loongarch/mm/fault.c
> >> @@ -23,6 +23,7 @@
> >>  #include <linux/kprobes.h>
> >>  #include <linux/perf_event.h>
> >>  #include <linux/uaccess.h>
> >> +#include <linux/kfence.h>
> >>
> >>  #include <asm/branch.h>
> >>  #include <asm/mmu_context.h>
> >> @@ -30,7 +31,8 @@
> >>
> >>  int show_unhandled_signals = 1;
> >>
> >> -static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
> >> +static void __kprobes no_context(struct pt_regs *regs, unsigned long address,
> >> +                                unsigned long write)
> >>  {
> >>         const int field = sizeof(unsigned long) * 2;
> >>
> >> @@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
> >>         if (fixup_exception(regs))
> >>                 return;
> >>
> >> +       if (kfence_handle_page_fault(address, write, regs))
> >> +               return;
> >> +
> >>         /*
> >>          * Oops. The kernel tried to access some bad page. We'll have to
> >>          * terminate things with extreme prejudice.
> >> @@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
> >>         die("Oops", regs);
> >>  }
> >>
> >> -static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
> >> +static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address,
> >> +                                      unsigned long write)
> >>  {
> >>         /*
> >>          * We ran out of memory, call the OOM killer, and return the userspace
> >>          * (which will retry the fault, or kill us if we got oom-killed).
> >>          */
> >>         if (!user_mode(regs)) {
> >> -               no_context(regs, address);
> >> +               no_context(regs, address, write);
> >>                 return;
> >>         }
> >>         pagefault_out_of_memory();
> >> @@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs,
> >>  {
> >>         /* Kernel mode? Handle exceptions or die */
> >>         if (!user_mode(regs)) {
> >> -               no_context(regs, address);
> >> +               no_context(regs, address, write);
> >>                 return;
> >>         }
> >>
> >> @@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
> >>
> >>         /* Kernel mode? Handle exceptions or die */
> >>         if (!user_mode(regs)) {
> >> -               no_context(regs, address);
> >> +               no_context(regs, address, write);
> >>                 return;
> >>         }
> >>
> >> @@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
> >>          */
> >>         if (address & __UA_LIMIT) {
> >>                 if (!user_mode(regs))
> >> -                       no_context(regs, address);
> >> +                       no_context(regs, address, write);
> >>                 else
> >>                         do_sigsegv(regs, write, address, si_code);
> >>                 return;
> >> @@ -211,7 +217,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
> >>
> >>         if (fault_signal_pending(fault, regs)) {
> >>                 if (!user_mode(regs))
> >> -                       no_context(regs, address);
> >> +                       no_context(regs, address, write);
> >>                 return;
> >>         }
> >>
> >> @@ -232,7 +238,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
> >>         if (unlikely(fault & VM_FAULT_ERROR)) {
> >>                 mmap_read_unlock(mm);
> >>                 if (fault & VM_FAULT_OOM) {
> >> -                       do_out_of_memory(regs, address);
> >> +                       do_out_of_memory(regs, address, write);
> >>                         return;
> >>                 } else if (fault & VM_FAULT_SIGSEGV) {
> >>                         do_sigsegv(regs, write, address, si_code);
> >> --
> >> 2.34.1
> >>
> >>
>
Enze Li July 28, 2023, 6:01 a.m. UTC | #6
On Tue, Jul 25 2023 at 10:34:50 PM +0800, Jackie Liu wrote:

> 在 2023/7/25 14:14, Enze Li 写道:
>> The LoongArch architecture is quite different from other architectures.
>> When the allocating of KFENCE itself is done, it is mapped to the direct
>> mapping configuration window [1] by default on LoongArch.  It means that
>> it is not possible to use the page table mapped mode which required by
>> the KFENCE system and therefore it should be remapped to the appropriate
>> region.
>>
>> This patch adds architecture specific implementation details for KFENCE.
>> In particular, this implements the required interface in <asm/kfence.h>.
>>
>> Tested this patch by running the testcases and all passed.
>>
>> [1] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#virtual-address-space-and-address-translation-mode
>>
>> Signed-off-by: Enze Li <lienze@kylinos.cn>
>> ---
>>   arch/loongarch/Kconfig               |  1 +
>>   arch/loongarch/include/asm/kfence.h  | 62 ++++++++++++++++++++++++++++
>>   arch/loongarch/include/asm/pgtable.h | 14 ++++++-
>>   arch/loongarch/mm/fault.c            | 22 ++++++----
>>   4 files changed, 90 insertions(+), 9 deletions(-)
>>   create mode 100644 arch/loongarch/include/asm/kfence.h
>>
>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>> index 70635ea3d1e4..5b63b16be49e 100644
>> --- a/arch/loongarch/Kconfig
>> +++ b/arch/loongarch/Kconfig
>> @@ -91,6 +91,7 @@ config LOONGARCH
>>   	select HAVE_ARCH_AUDITSYSCALL
>>   	select HAVE_ARCH_JUMP_LABEL
>>   	select HAVE_ARCH_JUMP_LABEL_RELATIVE
>> +	select HAVE_ARCH_KFENCE
>>   	select HAVE_ARCH_MMAP_RND_BITS if MMU
>>   	select HAVE_ARCH_SECCOMP_FILTER
>>   	select HAVE_ARCH_TRACEHOOK
>> diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
>> new file mode 100644
>> index 000000000000..fb39076fe4d7
>> --- /dev/null
>> +++ b/arch/loongarch/include/asm/kfence.h
>> @@ -0,0 +1,62 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * KFENCE support for LoongArch.
>> + *
>> + * Author: Enze Li <lienze@kylinos.cn>
>> + * Copyright (C) 2022-2023 KylinSoft Corporation.
>> + */
>> +
>> +#ifndef _ASM_LOONGARCH_KFENCE_H
>> +#define _ASM_LOONGARCH_KFENCE_H
>> +
>> +#include <linux/kfence.h>
>> +#include <asm/pgtable.h>
>> +#include <asm/tlb.h>
>> +
>> +static inline bool arch_kfence_init_pool(void)
>> +{
>> +	char *kfence_pool = __kfence_pool;
>> +	struct vm_struct *area;
>> +	int err;
>> +
>> +	area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
>> +				    KFENCE_AREA_START, KFENCE_AREA_END,
>> +				    __builtin_return_address(0));
>> +	if (!area)
>> +		return false;
>> +
>> +	__kfence_pool = (char *)area->addr;
>
> I think there should be something wrong here.
>
>> +	err = ioremap_page_range((unsigned long)__kfence_pool,
>> +				 (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
>> +				 virt_to_phys((void *)kfence_pool),
>> +				 PAGE_KERNEL);
>> +	if (err) {
>> +		free_vm_area(area);
>
> If err > 0, return area->addr here, It's not correct.

Hi Jackie,

Good catch!  I'll fix this issue in v3.

Cheers!
Enze
diff mbox series

Patch

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 70635ea3d1e4..5b63b16be49e 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -91,6 +91,7 @@  config LOONGARCH
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
+	select HAVE_ARCH_KFENCE
 	select HAVE_ARCH_MMAP_RND_BITS if MMU
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
new file mode 100644
index 000000000000..fb39076fe4d7
--- /dev/null
+++ b/arch/loongarch/include/asm/kfence.h
@@ -0,0 +1,62 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KFENCE support for LoongArch.
+ *
+ * Author: Enze Li <lienze@kylinos.cn>
+ * Copyright (C) 2022-2023 KylinSoft Corporation.
+ */
+
+#ifndef _ASM_LOONGARCH_KFENCE_H
+#define _ASM_LOONGARCH_KFENCE_H
+
+#include <linux/kfence.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+static inline bool arch_kfence_init_pool(void)
+{
+	char *kfence_pool = __kfence_pool;
+	struct vm_struct *area;
+	int err;
+
+	area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
+				    KFENCE_AREA_START, KFENCE_AREA_END,
+				    __builtin_return_address(0));
+	if (!area)
+		return false;
+
+	__kfence_pool = (char *)area->addr;
+	err = ioremap_page_range((unsigned long)__kfence_pool,
+				 (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
+				 virt_to_phys((void *)kfence_pool),
+				 PAGE_KERNEL);
+	if (err) {
+		free_vm_area(area);
+		return false;
+	}
+
+	return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+	pte_t *pte = virt_to_kpte(addr);
+
+	if (WARN_ON(!pte) || pte_none(*pte))
+		return false;
+
+	if (protect)
+		set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
+	else
+		set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
+
+	/* Flush this CPU's TLB. */
+	preempt_disable();
+	local_flush_tlb_one(addr);
+	preempt_enable();
+
+	return true;
+}
+
+#endif /* _ASM_LOONGARCH_KFENCE_H */
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 98a0c98de9d1..2702a6ba7122 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -77,6 +77,13 @@  extern unsigned long zero_page_mask;
 	(virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
 #define __HAVE_COLOR_ZERO_PAGE
 
+#ifdef CONFIG_KFENCE
+#define KFENCE_AREA_SIZE \
+	(((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
+#else
+#define KFENCE_AREA_SIZE	0
+#endif
+
 /*
  * TLB refill handlers may also map the vmalloc area into xkvrange.
  * Avoid the first couple of pages so NULL pointer dereferences will
@@ -88,11 +95,16 @@  extern unsigned long zero_page_mask;
 #define VMALLOC_START	MODULES_END
 #define VMALLOC_END	\
 	(vm_map_base +	\
-	 min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+	 min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
 
 #define vmemmap		((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
 #define VMEMMAP_END	((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
 
+#ifdef CONFIG_KFENCE
+#define KFENCE_AREA_START	VMEMMAP_END
+#define KFENCE_AREA_END		(KFENCE_AREA_START + KFENCE_AREA_SIZE)
+#endif
+
 #define pte_ERROR(e) \
 	pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
 #ifndef __PAGETABLE_PMD_FOLDED
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index da5b6d518cdb..c0319128b221 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -23,6 +23,7 @@ 
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
 #include <linux/uaccess.h>
+#include <linux/kfence.h>
 
 #include <asm/branch.h>
 #include <asm/mmu_context.h>
@@ -30,7 +31,8 @@ 
 
 int show_unhandled_signals = 1;
 
-static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
+static void __kprobes no_context(struct pt_regs *regs, unsigned long address,
+				 unsigned long write)
 {
 	const int field = sizeof(unsigned long) * 2;
 
@@ -38,6 +40,9 @@  static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
 	if (fixup_exception(regs))
 		return;
 
+	if (kfence_handle_page_fault(address, write, regs))
+		return;
+
 	/*
 	 * Oops. The kernel tried to access some bad page. We'll have to
 	 * terminate things with extreme prejudice.
@@ -51,14 +56,15 @@  static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
 	die("Oops", regs);
 }
 
-static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
+static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address,
+				       unsigned long write)
 {
 	/*
 	 * We ran out of memory, call the OOM killer, and return the userspace
 	 * (which will retry the fault, or kill us if we got oom-killed).
 	 */
 	if (!user_mode(regs)) {
-		no_context(regs, address);
+		no_context(regs, address, write);
 		return;
 	}
 	pagefault_out_of_memory();
@@ -69,7 +75,7 @@  static void __kprobes do_sigbus(struct pt_regs *regs,
 {
 	/* Kernel mode? Handle exceptions or die */
 	if (!user_mode(regs)) {
-		no_context(regs, address);
+		no_context(regs, address, write);
 		return;
 	}
 
@@ -90,7 +96,7 @@  static void __kprobes do_sigsegv(struct pt_regs *regs,
 
 	/* Kernel mode? Handle exceptions or die */
 	if (!user_mode(regs)) {
-		no_context(regs, address);
+		no_context(regs, address, write);
 		return;
 	}
 
@@ -149,7 +155,7 @@  static void __kprobes __do_page_fault(struct pt_regs *regs,
 	 */
 	if (address & __UA_LIMIT) {
 		if (!user_mode(regs))
-			no_context(regs, address);
+			no_context(regs, address, write);
 		else
 			do_sigsegv(regs, write, address, si_code);
 		return;
@@ -211,7 +217,7 @@  static void __kprobes __do_page_fault(struct pt_regs *regs,
 
 	if (fault_signal_pending(fault, regs)) {
 		if (!user_mode(regs))
-			no_context(regs, address);
+			no_context(regs, address, write);
 		return;
 	}
 
@@ -232,7 +238,7 @@  static void __kprobes __do_page_fault(struct pt_regs *regs,
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		mmap_read_unlock(mm);
 		if (fault & VM_FAULT_OOM) {
-			do_out_of_memory(regs, address);
+			do_out_of_memory(regs, address, write);
 			return;
 		} else if (fault & VM_FAULT_SIGSEGV) {
 			do_sigsegv(regs, write, address, si_code);