diff mbox series

[v5] mm,kfence: decouple kfence from page granularity mapping judgement

Message ID 1678683825-11866-1-git-send-email-quic_zhenhuah@quicinc.com (mailing list archive)
State New, archived
Headers show
Series [v5] mm,kfence: decouple kfence from page granularity mapping judgement | expand

Commit Message

Zhenhua Huang March 13, 2023, 5:03 a.m. UTC
Kfence only needs its pool to be mapped as page granularity, previous
judgement was a bit over protected. From [1], Mark suggested to "just
map the KFENCE region a page granularity". So I decouple it from judgement
and do page granularity mapping for kfence pool only.

Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
platform. Like what I've tested on QEMU(emulated 1GB RAM) with
gki_defconfig, also turning off rodata protection:
Before:
[root@liebao ]# cat /proc/meminfo
MemTotal:         999484 kB
After:
[root@liebao ]# cat /proc/meminfo
MemTotal:        1001480 kB

To implement this, also relocate the kfence pool allocation before the
linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
addr, __kfence_pool is to be set after linear mapping set up.

LINK: [1] https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
---
 arch/arm64/mm/mmu.c      | 42 ++++++++++++++++++++++++++++++++++++++++++
 arch/arm64/mm/pageattr.c |  5 ++---
 include/linux/kfence.h   |  8 ++++++++
 mm/kfence/core.c         |  9 +++++++++
 4 files changed, 61 insertions(+), 3 deletions(-)

Comments

Marco Elver March 13, 2023, 7:50 a.m. UTC | #1
On Mon, 13 Mar 2023 at 06:04, Zhenhua Huang <quic_zhenhuah@quicinc.com> wrote:
>
> Kfence only needs its pool to be mapped as page granularity, previous
> judgement was a bit over protected. From [1], Mark suggested to "just
> map the KFENCE region a page granularity". So I decouple it from judgement
> and do page granularity mapping for kfence pool only.
>
> Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
> platform. Like what I've tested on QEMU(emulated 1GB RAM) with
> gki_defconfig, also turning off rodata protection:
> Before:
> [root@liebao ]# cat /proc/meminfo
> MemTotal:         999484 kB
> After:
> [root@liebao ]# cat /proc/meminfo
> MemTotal:        1001480 kB
>
> To implement this, also relocate the kfence pool allocation before the
> linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
> addr, __kfence_pool is to be set after linear mapping set up.

This patch still breaks the late-init capabilities that Kefeng pointed out.

I think the only viable option is:

 1. If KFENCE early init is requested on arm64, do what you're doing here.

 2. If KFENCE is compiled in, but not enabled, do what was done
before, so it can be enabled late.

Am I missing an option?

>
> LINK: [1] https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/
> Suggested-by: Mark Rutland <mark.rutland@arm.com>
> Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
> ---
>  arch/arm64/mm/mmu.c      | 42 ++++++++++++++++++++++++++++++++++++++++++
>  arch/arm64/mm/pageattr.c |  5 ++---
>  include/linux/kfence.h   |  8 ++++++++
>  mm/kfence/core.c         |  9 +++++++++
>  4 files changed, 61 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 6f9d889..ca5c932 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -24,6 +24,7 @@
>  #include <linux/mm.h>
>  #include <linux/vmalloc.h>
>  #include <linux/set_memory.h>
> +#include <linux/kfence.h>
>
>  #include <asm/barrier.h>
>  #include <asm/cputype.h>
> @@ -525,6 +526,31 @@ static int __init enable_crash_mem_map(char *arg)
>  }
>  early_param("crashkernel", enable_crash_mem_map);
>
> +#ifdef CONFIG_KFENCE
> +
> +static phys_addr_t arm64_kfence_alloc_pool(void)
> +{
> +       phys_addr_t kfence_pool;
> +
> +       if (!kfence_sample_interval)
> +               return 0;
> +
> +       kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
> +       if (!kfence_pool)
> +               pr_err("failed to allocate kfence pool\n");
> +
> +       return kfence_pool;
> +}
> +
> +#else
> +
> +static phys_addr_t arm64_kfence_alloc_pool(void)
> +{
> +       return 0;
> +}
> +
> +#endif
> +
>  static void __init map_mem(pgd_t *pgdp)
>  {
>         static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
> @@ -532,6 +558,7 @@ static void __init map_mem(pgd_t *pgdp)
>         phys_addr_t kernel_end = __pa_symbol(__init_begin);
>         phys_addr_t start, end;
>         int flags = NO_EXEC_MAPPINGS;
> +       phys_addr_t kfence_pool;
>         u64 i;
>
>         /*
> @@ -564,6 +591,10 @@ static void __init map_mem(pgd_t *pgdp)
>         }
>  #endif
>
> +       kfence_pool = arm64_kfence_alloc_pool();
> +       if (kfence_pool)
> +               memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +
>         /* map all the memory banks */
>         for_each_mem_range(i, &start, &end) {
>                 if (start >= end)
> @@ -608,6 +639,17 @@ static void __init map_mem(pgd_t *pgdp)
>                 }
>         }
>  #endif
> +
> +       /* Kfence pool needs page-level mapping */
> +       if (kfence_pool) {
> +               __map_memblock(pgdp, kfence_pool,
> +                       kfence_pool + KFENCE_POOL_SIZE,
> +                       pgprot_tagged(PAGE_KERNEL),
> +                       NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
> +               memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +               /* kfence_pool really mapped now */
> +               kfence_set_pool(kfence_pool);
> +       }
>  }
>
>  void mark_rodata_ro(void)
> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
> index 79dd201..61156d0 100644
> --- a/arch/arm64/mm/pageattr.c
> +++ b/arch/arm64/mm/pageattr.c
> @@ -22,12 +22,11 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED
>  bool can_set_direct_map(void)
>  {
>         /*
> -        * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be
> +        * rodata_full and DEBUG_PAGEALLOC require linear map to be
>          * mapped at page granularity, so that it is possible to
>          * protect/unprotect single pages.
>          */
> -       return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
> -               IS_ENABLED(CONFIG_KFENCE);
> +       return (rodata_enabled && rodata_full) || debug_pagealloc_enabled();
>  }
>
>  static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
> diff --git a/include/linux/kfence.h b/include/linux/kfence.h
> index 726857a..570d4e3 100644
> --- a/include/linux/kfence.h
> +++ b/include/linux/kfence.h
> @@ -64,6 +64,12 @@ static __always_inline bool is_kfence_address(const void *addr)
>  void __init kfence_alloc_pool(void);
>
>  /**
> + * kfence_set_pool() - allows an arch to set the
> + * KFENCE pool during early init
> + */
> +void __init kfence_set_pool(phys_addr_t addr);
> +
> +/**
>   * kfence_init() - perform KFENCE initialization at boot time
>   *
>   * Requires that kfence_alloc_pool() was called before. This sets up the
> @@ -222,8 +228,10 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla
>
>  #else /* CONFIG_KFENCE */
>
> +#define KFENCE_POOL_SIZE 0
>  static inline bool is_kfence_address(const void *addr) { return false; }
>  static inline void kfence_alloc_pool(void) { }
> +static inline void kfence_set_pool(phys_addr_t addr) { }
>  static inline void kfence_init(void) { }
>  static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
>  static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
> diff --git a/mm/kfence/core.c b/mm/kfence/core.c
> index 5349c37..0765395 100644
> --- a/mm/kfence/core.c
> +++ b/mm/kfence/core.c
> @@ -814,12 +814,21 @@ void __init kfence_alloc_pool(void)
>         if (!kfence_sample_interval)
>                 return;
>
> +       /* if the pool has already been initialized by arch, skip the below */
> +       if (__kfence_pool)
> +               return;
> +
>         __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
>
>         if (!__kfence_pool)
>                 pr_err("failed to allocate pool\n");
>  }
>
> +void __init kfence_set_pool(phys_addr_t addr)
> +{
> +       __kfence_pool = phys_to_virt(addr);
> +}
> +
>  static void kfence_init_enable(void)
>  {
>         if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
> --
> 2.7.4
>
Zhenhua Huang March 13, 2023, 9:05 a.m. UTC | #2
Thanks Marco!

On 2023/3/13 15:50, Marco Elver wrote:
> On Mon, 13 Mar 2023 at 06:04, Zhenhua Huang <quic_zhenhuah@quicinc.com> wrote:
>>
>> Kfence only needs its pool to be mapped as page granularity, previous
>> judgement was a bit over protected. From [1], Mark suggested to "just
>> map the KFENCE region a page granularity". So I decouple it from judgement
>> and do page granularity mapping for kfence pool only.
>>
>> Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
>> platform. Like what I've tested on QEMU(emulated 1GB RAM) with
>> gki_defconfig, also turning off rodata protection:
>> Before:
>> [root@liebao ]# cat /proc/meminfo
>> MemTotal:         999484 kB
>> After:
>> [root@liebao ]# cat /proc/meminfo
>> MemTotal:        1001480 kB
>>
>> To implement this, also relocate the kfence pool allocation before the
>> linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
>> addr, __kfence_pool is to be set after linear mapping set up.
> 
> This patch still breaks the late-init capabilities that Kefeng pointed out.
> 
> I think the only viable option is:
> 
>   1. If KFENCE early init is requested on arm64, do what you're doing here.
> 
>   2. If KFENCE is compiled in, but not enabled, do what was done
> before, so it can be enabled late.

I'm fine with above solution as well. The Disadvantage is if we want to 
dynamically disable kfence through kfence_sample_interval, it must be 
mapped into page granularity still.

> 
> Am I missing an option?
> 

Another option is what Kefeng firstly thought and I had proposed on 
comments of patchsetV3, actually I wanted to do in an separate patch:

"
So how about we raise another change, like you mentioned bootargs 
indicating to use late init of b33f778bba5e ("kfence: alloc kfence_pool 
after system startup").
1. in arm64_kfence_alloc_pool():
    if (!kfence_sample_interval && !using_late_init)
              return 0;
    else
              allocate pool
2. also do the check in late allocation,like
    if (do_allocation_late && !using_late_init)
              BUG();
"
The thought is to allocate pool early as well if we need to 
using_late_init.

Kefeng, Marco,

How's your idea?

>>
>> LINK: [1] https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/
>> Suggested-by: Mark Rutland <mark.rutland@arm.com>
>> Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
>> ---
>>   arch/arm64/mm/mmu.c      | 42 ++++++++++++++++++++++++++++++++++++++++++
>>   arch/arm64/mm/pageattr.c |  5 ++---
>>   include/linux/kfence.h   |  8 ++++++++
>>   mm/kfence/core.c         |  9 +++++++++
>>   4 files changed, 61 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index 6f9d889..ca5c932 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -24,6 +24,7 @@
>>   #include <linux/mm.h>
>>   #include <linux/vmalloc.h>
>>   #include <linux/set_memory.h>
>> +#include <linux/kfence.h>
>>
>>   #include <asm/barrier.h>
>>   #include <asm/cputype.h>
>> @@ -525,6 +526,31 @@ static int __init enable_crash_mem_map(char *arg)
>>   }
>>   early_param("crashkernel", enable_crash_mem_map);
>>
>> +#ifdef CONFIG_KFENCE
>> +
>> +static phys_addr_t arm64_kfence_alloc_pool(void)
>> +{
>> +       phys_addr_t kfence_pool;
>> +
>> +       if (!kfence_sample_interval)
>> +               return 0;
>> +
>> +       kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
>> +       if (!kfence_pool)
>> +               pr_err("failed to allocate kfence pool\n");
>> +
>> +       return kfence_pool;
>> +}
>> +
>> +#else
>> +
>> +static phys_addr_t arm64_kfence_alloc_pool(void)
>> +{
>> +       return 0;
>> +}
>> +
>> +#endif
>> +
>>   static void __init map_mem(pgd_t *pgdp)
>>   {
>>          static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
>> @@ -532,6 +558,7 @@ static void __init map_mem(pgd_t *pgdp)
>>          phys_addr_t kernel_end = __pa_symbol(__init_begin);
>>          phys_addr_t start, end;
>>          int flags = NO_EXEC_MAPPINGS;
>> +       phys_addr_t kfence_pool;
>>          u64 i;
>>
>>          /*
>> @@ -564,6 +591,10 @@ static void __init map_mem(pgd_t *pgdp)
>>          }
>>   #endif
>>
>> +       kfence_pool = arm64_kfence_alloc_pool();
>> +       if (kfence_pool)
>> +               memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
>> +
>>          /* map all the memory banks */
>>          for_each_mem_range(i, &start, &end) {
>>                  if (start >= end)
>> @@ -608,6 +639,17 @@ static void __init map_mem(pgd_t *pgdp)
>>                  }
>>          }
>>   #endif
>> +
>> +       /* Kfence pool needs page-level mapping */
>> +       if (kfence_pool) {
>> +               __map_memblock(pgdp, kfence_pool,
>> +                       kfence_pool + KFENCE_POOL_SIZE,
>> +                       pgprot_tagged(PAGE_KERNEL),
>> +                       NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
>> +               memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
>> +               /* kfence_pool really mapped now */
>> +               kfence_set_pool(kfence_pool);
>> +       }
>>   }
>>
>>   void mark_rodata_ro(void)
>> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
>> index 79dd201..61156d0 100644
>> --- a/arch/arm64/mm/pageattr.c
>> +++ b/arch/arm64/mm/pageattr.c
>> @@ -22,12 +22,11 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED
>>   bool can_set_direct_map(void)
>>   {
>>          /*
>> -        * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be
>> +        * rodata_full and DEBUG_PAGEALLOC require linear map to be
>>           * mapped at page granularity, so that it is possible to
>>           * protect/unprotect single pages.
>>           */
>> -       return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
>> -               IS_ENABLED(CONFIG_KFENCE);
>> +       return (rodata_enabled && rodata_full) || debug_pagealloc_enabled();
>>   }
>>
>>   static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
>> diff --git a/include/linux/kfence.h b/include/linux/kfence.h
>> index 726857a..570d4e3 100644
>> --- a/include/linux/kfence.h
>> +++ b/include/linux/kfence.h
>> @@ -64,6 +64,12 @@ static __always_inline bool is_kfence_address(const void *addr)
>>   void __init kfence_alloc_pool(void);
>>
>>   /**
>> + * kfence_set_pool() - allows an arch to set the
>> + * KFENCE pool during early init
>> + */
>> +void __init kfence_set_pool(phys_addr_t addr);
>> +
>> +/**
>>    * kfence_init() - perform KFENCE initialization at boot time
>>    *
>>    * Requires that kfence_alloc_pool() was called before. This sets up the
>> @@ -222,8 +228,10 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla
>>
>>   #else /* CONFIG_KFENCE */
>>
>> +#define KFENCE_POOL_SIZE 0
>>   static inline bool is_kfence_address(const void *addr) { return false; }
>>   static inline void kfence_alloc_pool(void) { }
>> +static inline void kfence_set_pool(phys_addr_t addr) { }
>>   static inline void kfence_init(void) { }
>>   static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
>>   static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
>> diff --git a/mm/kfence/core.c b/mm/kfence/core.c
>> index 5349c37..0765395 100644
>> --- a/mm/kfence/core.c
>> +++ b/mm/kfence/core.c
>> @@ -814,12 +814,21 @@ void __init kfence_alloc_pool(void)
>>          if (!kfence_sample_interval)
>>                  return;
>>
>> +       /* if the pool has already been initialized by arch, skip the below */
>> +       if (__kfence_pool)
>> +               return;
>> +
>>          __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
>>
>>          if (!__kfence_pool)
>>                  pr_err("failed to allocate pool\n");
>>   }
>>
>> +void __init kfence_set_pool(phys_addr_t addr)
>> +{
>> +       __kfence_pool = phys_to_virt(addr);
>> +}
>> +
>>   static void kfence_init_enable(void)
>>   {
>>          if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
>> --
>> 2.7.4
>>
>
Marco Elver March 13, 2023, 9:49 a.m. UTC | #3
On Mon, 13 Mar 2023 at 10:05, Zhenhua Huang <quic_zhenhuah@quicinc.com> wrote:
>
> Thanks Marco!
>
> On 2023/3/13 15:50, Marco Elver wrote:
> > On Mon, 13 Mar 2023 at 06:04, Zhenhua Huang <quic_zhenhuah@quicinc.com> wrote:
> >>
> >> Kfence only needs its pool to be mapped as page granularity, previous
> >> judgement was a bit over protected. From [1], Mark suggested to "just
> >> map the KFENCE region a page granularity". So I decouple it from judgement
> >> and do page granularity mapping for kfence pool only.
> >>
> >> Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
> >> platform. Like what I've tested on QEMU(emulated 1GB RAM) with
> >> gki_defconfig, also turning off rodata protection:
> >> Before:
> >> [root@liebao ]# cat /proc/meminfo
> >> MemTotal:         999484 kB
> >> After:
> >> [root@liebao ]# cat /proc/meminfo
> >> MemTotal:        1001480 kB
> >>
> >> To implement this, also relocate the kfence pool allocation before the
> >> linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
> >> addr, __kfence_pool is to be set after linear mapping set up.
> >
> > This patch still breaks the late-init capabilities that Kefeng pointed out.
> >
> > I think the only viable option is:
> >
> >   1. If KFENCE early init is requested on arm64, do what you're doing here.
> >
> >   2. If KFENCE is compiled in, but not enabled, do what was done
> > before, so it can be enabled late.
>
> I'm fine with above solution as well. The Disadvantage is if we want to
> dynamically disable kfence through kfence_sample_interval, it must be
> mapped into page granularity still.
>
> >
> > Am I missing an option?
> >
>
> Another option is what Kefeng firstly thought and I had proposed on
> comments of patchsetV3, actually I wanted to do in an separate patch:

Please do it in the same patch (or patch series), otherwise we end up
with a regression.

> "
> So how about we raise another change, like you mentioned bootargs
> indicating to use late init of b33f778bba5e ("kfence: alloc kfence_pool

Please avoid introducing another bootarg just for this. It will
confuse users and will cause serious annoyance (bad UX).

> after system startup").
> 1. in arm64_kfence_alloc_pool():
>     if (!kfence_sample_interval && !using_late_init)
>               return 0;
>     else
>               allocate pool

The whole point of late allocation was that the entire pool is _not_
allocated until it's needed (during late init). So for space-conscious
users, this option is actually worse.

> 2. also do the check in late allocation,like
>     if (do_allocation_late && !using_late_init)
>               BUG();

BUG() needs to be avoided. Just because a user used the system wrong,
should not cause it to crash (WARN instead)... but I'd really prefer
you avoid introducing another boot arg, because it'll lead to bad UX.

> "
> The thought is to allocate pool early as well if we need to
> using_late_init.
>
> Kefeng, Marco,
>
> How's your idea?

I recommend that you just make can_set_direct_map() conditional on
KFENCE being initialized early or not. With rodata protection most
arm64 kernels likely pay the page granular direct map cost anyway. And
for your special usecase where you want to optimize memory use, but
know that KFENCE is enabled, it'll result in the savings you desire.

Thanks,
-- Marco
Zhenhua Huang March 13, 2023, 10:27 a.m. UTC | #4
Thanks Marco!

On 2023/3/13 17:49, Marco Elver wrote:
> On Mon, 13 Mar 2023 at 10:05, Zhenhua Huang <quic_zhenhuah@quicinc.com> wrote:
>>
>> Thanks Marco!
>>
>> On 2023/3/13 15:50, Marco Elver wrote:
>>> On Mon, 13 Mar 2023 at 06:04, Zhenhua Huang <quic_zhenhuah@quicinc.com> wrote:
>>>>
>>>> Kfence only needs its pool to be mapped as page granularity, previous
>>>> judgement was a bit over protected. From [1], Mark suggested to "just
>>>> map the KFENCE region a page granularity". So I decouple it from judgement
>>>> and do page granularity mapping for kfence pool only.
>>>>
>>>> Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
>>>> platform. Like what I've tested on QEMU(emulated 1GB RAM) with
>>>> gki_defconfig, also turning off rodata protection:
>>>> Before:
>>>> [root@liebao ]# cat /proc/meminfo
>>>> MemTotal:         999484 kB
>>>> After:
>>>> [root@liebao ]# cat /proc/meminfo
>>>> MemTotal:        1001480 kB
>>>>
>>>> To implement this, also relocate the kfence pool allocation before the
>>>> linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
>>>> addr, __kfence_pool is to be set after linear mapping set up.
>>>
>>> This patch still breaks the late-init capabilities that Kefeng pointed out.
>>>
>>> I think the only viable option is:
>>>
>>>    1. If KFENCE early init is requested on arm64, do what you're doing here.
>>>
>>>    2. If KFENCE is compiled in, but not enabled, do what was done
>>> before, so it can be enabled late.
>>
>> I'm fine with above solution as well. The Disadvantage is if we want to
>> dynamically disable kfence through kfence_sample_interval, it must be
>> mapped into page granularity still.
>>
>>>
>>> Am I missing an option?
>>>
>>
>> Another option is what Kefeng firstly thought and I had proposed on
>> comments of patchsetV3, actually I wanted to do in an separate patch:
> 
> Please do it in the same patch (or patch series), otherwise we end up
> with a regression.

OK.

> 
>> "
>> So how about we raise another change, like you mentioned bootargs
>> indicating to use late init of b33f778bba5e ("kfence: alloc kfence_pool
> 
> Please avoid introducing another bootarg just for this. It will
> confuse users and will cause serious annoyance (bad UX).

OK, got it.

> 
>> after system startup").
>> 1. in arm64_kfence_alloc_pool():
>>      if (!kfence_sample_interval && !using_late_init)
>>                return 0;
>>      else
>>                allocate pool
> 
> The whole point of late allocation was that the entire pool is _not_
> allocated until it's needed (during late init). So for space-conscious
> users, this option is actually worse.
> 
>> 2. also do the check in late allocation,like
>>      if (do_allocation_late && !using_late_init)
>>                BUG();
> 
> BUG() needs to be avoided. Just because a user used the system wrong,
> should not cause it to crash (WARN instead)... but I'd really prefer
> you avoid introducing another boot arg, because it'll lead to bad UX.
> 
>> "
>> The thought is to allocate pool early as well if we need to
>> using_late_init.
>>
>> Kefeng, Marco,
>>
>> How's your idea?
> 
> I recommend that you just make can_set_direct_map() conditional on
> KFENCE being initialized early or not. With rodata protection most
> arm64 kernels likely pay the page granular direct map cost anyway. And
> for your special usecase where you want to optimize memory use, but
> know that KFENCE is enabled, it'll result in the savings you desire.

Thanks Marco, got your idea. Yeah.. rodata is another over-protection 
case. I will do the change following your suggestion for your review.

> 
> Thanks,
> -- Marco
diff mbox series

Patch

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6f9d889..ca5c932 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -24,6 +24,7 @@ 
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/set_memory.h>
+#include <linux/kfence.h>
 
 #include <asm/barrier.h>
 #include <asm/cputype.h>
@@ -525,6 +526,31 @@  static int __init enable_crash_mem_map(char *arg)
 }
 early_param("crashkernel", enable_crash_mem_map);
 
+#ifdef CONFIG_KFENCE
+
+static phys_addr_t arm64_kfence_alloc_pool(void)
+{
+	phys_addr_t kfence_pool;
+
+	if (!kfence_sample_interval)
+		return 0;
+
+	kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+	if (!kfence_pool)
+		pr_err("failed to allocate kfence pool\n");
+
+	return kfence_pool;
+}
+
+#else
+
+static phys_addr_t arm64_kfence_alloc_pool(void)
+{
+	return 0;
+}
+
+#endif
+
 static void __init map_mem(pgd_t *pgdp)
 {
 	static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
@@ -532,6 +558,7 @@  static void __init map_mem(pgd_t *pgdp)
 	phys_addr_t kernel_end = __pa_symbol(__init_begin);
 	phys_addr_t start, end;
 	int flags = NO_EXEC_MAPPINGS;
+	phys_addr_t kfence_pool;
 	u64 i;
 
 	/*
@@ -564,6 +591,10 @@  static void __init map_mem(pgd_t *pgdp)
 	}
 #endif
 
+	kfence_pool = arm64_kfence_alloc_pool();
+	if (kfence_pool)
+		memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
+
 	/* map all the memory banks */
 	for_each_mem_range(i, &start, &end) {
 		if (start >= end)
@@ -608,6 +639,17 @@  static void __init map_mem(pgd_t *pgdp)
 		}
 	}
 #endif
+
+	/* Kfence pool needs page-level mapping */
+	if (kfence_pool) {
+		__map_memblock(pgdp, kfence_pool,
+			kfence_pool + KFENCE_POOL_SIZE,
+			pgprot_tagged(PAGE_KERNEL),
+			NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+		memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
+		/* kfence_pool really mapped now */
+		kfence_set_pool(kfence_pool);
+	}
 }
 
 void mark_rodata_ro(void)
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 79dd201..61156d0 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -22,12 +22,11 @@  bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED
 bool can_set_direct_map(void)
 {
 	/*
-	 * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be
+	 * rodata_full and DEBUG_PAGEALLOC require linear map to be
 	 * mapped at page granularity, so that it is possible to
 	 * protect/unprotect single pages.
 	 */
-	return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
-		IS_ENABLED(CONFIG_KFENCE);
+	return (rodata_enabled && rodata_full) || debug_pagealloc_enabled();
 }
 
 static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
diff --git a/include/linux/kfence.h b/include/linux/kfence.h
index 726857a..570d4e3 100644
--- a/include/linux/kfence.h
+++ b/include/linux/kfence.h
@@ -64,6 +64,12 @@  static __always_inline bool is_kfence_address(const void *addr)
 void __init kfence_alloc_pool(void);
 
 /**
+ * kfence_set_pool() - allows an arch to set the
+ * KFENCE pool during early init
+ */
+void __init kfence_set_pool(phys_addr_t addr);
+
+/**
  * kfence_init() - perform KFENCE initialization at boot time
  *
  * Requires that kfence_alloc_pool() was called before. This sets up the
@@ -222,8 +228,10 @@  bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla
 
 #else /* CONFIG_KFENCE */
 
+#define KFENCE_POOL_SIZE 0
 static inline bool is_kfence_address(const void *addr) { return false; }
 static inline void kfence_alloc_pool(void) { }
+static inline void kfence_set_pool(phys_addr_t addr) { }
 static inline void kfence_init(void) { }
 static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
 static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 5349c37..0765395 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -814,12 +814,21 @@  void __init kfence_alloc_pool(void)
 	if (!kfence_sample_interval)
 		return;
 
+	/* if the pool has already been initialized by arch, skip the below */
+	if (__kfence_pool)
+		return;
+
 	__kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
 
 	if (!__kfence_pool)
 		pr_err("failed to allocate pool\n");
 }
 
+void __init kfence_set_pool(phys_addr_t addr)
+{
+	__kfence_pool = phys_to_virt(addr);
+}
+
 static void kfence_init_enable(void)
 {
 	if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))