diff mbox series

[v8] mm,kfence: decouple kfence from page granularity mapping judgement

Message ID 1678777502-6933-1-git-send-email-quic_zhenhuah@quicinc.com (mailing list archive)
State New, archived
Headers show
Series [v8] mm,kfence: decouple kfence from page granularity mapping judgement | expand

Commit Message

Zhenhua Huang March 14, 2023, 7:05 a.m. UTC
Kfence only needs its pool to be mapped as page granularity, if it is
inited early. Previous judgement was a bit over protected. From [1], Mark
suggested to "just map the KFENCE region a page granularity". So I
decouple it from judgement and do page granularity mapping for kfence
pool only. Need to be noticed that late init of kfence pool still requires
page granularity mapping.

Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
platform. Like what I've tested on QEMU(emulated 1GB RAM) with
gki_defconfig, also turning off rodata protection:
Before:
[root@liebao ]# cat /proc/meminfo
MemTotal:         999484 kB
After:
[root@liebao ]# cat /proc/meminfo
MemTotal:        1001480 kB

To implement this, also relocate the kfence pool allocation before the
linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
addr, __kfence_pool is to be set after linear mapping set up.

LINK: [1] https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
---
 arch/arm64/include/asm/kfence.h |  2 ++
 arch/arm64/mm/mmu.c             | 44 +++++++++++++++++++++++++++++++++++++++++
 arch/arm64/mm/pageattr.c        |  9 +++++++--
 include/linux/kfence.h          |  8 ++++++++
 mm/kfence/core.c                |  9 +++++++++
 5 files changed, 70 insertions(+), 2 deletions(-)

Comments

Zhenhua Huang March 14, 2023, 10:31 a.m. UTC | #1
On 2023/3/14 16:41, Marco Elver wrote:
> On Tue, 14 Mar 2023 at 08:05, Zhenhua Huang <quic_zhenhuah@quicinc.com> wrote:
>>
>> Kfence only needs its pool to be mapped as page granularity, if it is
>> inited early. Previous judgement was a bit over protected. From [1], Mark
>> suggested to "just map the KFENCE region a page granularity". So I
>> decouple it from judgement and do page granularity mapping for kfence
>> pool only. Need to be noticed that late init of kfence pool still requires
>> page granularity mapping.
>>
>> Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
>> platform. Like what I've tested on QEMU(emulated 1GB RAM) with
>> gki_defconfig, also turning off rodata protection:
>> Before:
>> [root@liebao ]# cat /proc/meminfo
>> MemTotal:         999484 kB
>> After:
>> [root@liebao ]# cat /proc/meminfo
>> MemTotal:        1001480 kB
>>
>> To implement this, also relocate the kfence pool allocation before the
>> linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
>> addr, __kfence_pool is to be set after linear mapping set up.
>>
>> LINK: [1] https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/
>> Suggested-by: Mark Rutland <mark.rutland@arm.com>
>> Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
>> ---
>>   arch/arm64/include/asm/kfence.h |  2 ++
>>   arch/arm64/mm/mmu.c             | 44 +++++++++++++++++++++++++++++++++++++++++
>>   arch/arm64/mm/pageattr.c        |  9 +++++++--
>>   include/linux/kfence.h          |  8 ++++++++
>>   mm/kfence/core.c                |  9 +++++++++
>>   5 files changed, 70 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
>> index aa855c6..f1f9ca2d 100644
>> --- a/arch/arm64/include/asm/kfence.h
>> +++ b/arch/arm64/include/asm/kfence.h
>> @@ -10,6 +10,8 @@
>>
>>   #include <asm/set_memory.h>
>>
>> +extern phys_addr_t early_kfence_pool;
>> +
>>   static inline bool arch_kfence_init_pool(void) { return true; }
>>
>>   static inline bool kfence_protect_page(unsigned long addr, bool protect)
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index 6f9d889..7fbf2ed 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -24,6 +24,7 @@
>>   #include <linux/mm.h>
>>   #include <linux/vmalloc.h>
>>   #include <linux/set_memory.h>
>> +#include <linux/kfence.h>
>>
>>   #include <asm/barrier.h>
>>   #include <asm/cputype.h>
>> @@ -38,6 +39,7 @@
>>   #include <asm/ptdump.h>
>>   #include <asm/tlbflush.h>
>>   #include <asm/pgalloc.h>
>> +#include <asm/kfence.h>
>>
>>   #define NO_BLOCK_MAPPINGS      BIT(0)
>>   #define NO_CONT_MAPPINGS       BIT(1)
>> @@ -525,6 +527,33 @@ static int __init enable_crash_mem_map(char *arg)
>>   }
>>   early_param("crashkernel", enable_crash_mem_map);
>>
>> +#ifdef CONFIG_KFENCE
>> +
>> +static phys_addr_t arm64_kfence_alloc_pool(void)
>> +{
>> +       phys_addr_t kfence_pool;
>> +
>> +       if (!kfence_sample_interval)
>> +               return 0;
>> +
>> +       kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
>> +       if (!kfence_pool)
>> +               pr_err("failed to allocate kfence pool\n");
>> +
>> +       return kfence_pool;
>> +}
>> +
>> +#else
>> +
>> +static phys_addr_t arm64_kfence_alloc_pool(void)
>> +{
>> +       return 0;
>> +}
>> +
>> +#endif
>> +
>> +phys_addr_t early_kfence_pool;
> 
> The compiler will not optimize out this global. This now exists in all
> arm64 kernel builds.
> 
> Furthermore, there's no need for this to be phys_addr_t. Nothing
> outside map_mem() needs the address, so this can just be a bool.

Seems we need this early_kfence_bool to be explicit phys_addr_t as we 
need to mark/clear NOMAP for the region, so that it will not do linear 
mapping in the for loop.

> 
> I'd recommend moving the variable under CONFIG_KFENCE, and in the asm
> header, just having a static inline helper function e.g.
> arm64_kfence_early_pool(). That helper just returns false in the
> !CONFIG_KFENCE case.
> 
>>   static void __init map_mem(pgd_t *pgdp)
>>   {
>>          static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
>> @@ -543,6 +572,10 @@ static void __init map_mem(pgd_t *pgdp)
>>           */
>>          BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
>>
>> +       early_kfence_pool = arm64_kfence_alloc_pool();
>> +       if (early_kfence_pool)
>> +               memblock_mark_nomap(early_kfence_pool, KFENCE_POOL_SIZE);
>> +
>>          if (can_set_direct_map())
>>                  flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
>>
>> @@ -608,6 +641,17 @@ static void __init map_mem(pgd_t *pgdp)
>>                  }
>>          }
>>   #endif
>> +
>> +       /* Kfence pool needs page-level mapping */
>> +       if (early_kfence_pool) {
>> +               __map_memblock(pgdp, early_kfence_pool,
>> +                       early_kfence_pool + KFENCE_POOL_SIZE,
>> +                       pgprot_tagged(PAGE_KERNEL),
>> +                       NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
>> +               memblock_clear_nomap(early_kfence_pool, KFENCE_POOL_SIZE);
>> +               /* kfence_pool really mapped now */
>> +               kfence_set_pool(early_kfence_pool);
>> +       }
>>   }
>>
>>   void mark_rodata_ro(void)
>> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
>> index 79dd201..7ce5295 100644
>> --- a/arch/arm64/mm/pageattr.c
>> +++ b/arch/arm64/mm/pageattr.c
>> @@ -7,10 +7,12 @@
>>   #include <linux/module.h>
>>   #include <linux/sched.h>
>>   #include <linux/vmalloc.h>
>> +#include <linux/kfence.h>
>>
>>   #include <asm/cacheflush.h>
>>   #include <asm/set_memory.h>
>>   #include <asm/tlbflush.h>
>> +#include <asm/kfence.h>
>>
>>   struct page_change_data {
>>          pgprot_t set_mask;
>> @@ -22,12 +24,15 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED
>>   bool can_set_direct_map(void)
>>   {
>>          /*
>> -        * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be
>> +        * rodata_full and DEBUG_PAGEALLOC require linear map to be
>>           * mapped at page granularity, so that it is possible to
>>           * protect/unprotect single pages.
>> +        *
>> +        * Kfence pool requires page granularity mapping also if we init it
>> +        * late.
>>           */
>>          return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
>> -               IS_ENABLED(CONFIG_KFENCE);
>> +           (IS_ENABLED(CONFIG_KFENCE) && !early_kfence_pool);
>>   }
>>
>>   static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
>> diff --git a/include/linux/kfence.h b/include/linux/kfence.h
>> index 726857a..570d4e3 100644
>> --- a/include/linux/kfence.h
>> +++ b/include/linux/kfence.h
>> @@ -64,6 +64,12 @@ static __always_inline bool is_kfence_address(const void *addr)
>>   void __init kfence_alloc_pool(void);
>>
>>   /**
>> + * kfence_set_pool() - allows an arch to set the
>> + * KFENCE pool during early init
>> + */
>> +void __init kfence_set_pool(phys_addr_t addr);
>> +
>> +/**
>>    * kfence_init() - perform KFENCE initialization at boot time
>>    *
>>    * Requires that kfence_alloc_pool() was called before. This sets up the
>> @@ -222,8 +228,10 @@ bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla
>>
>>   #else /* CONFIG_KFENCE */
>>
>> +#define KFENCE_POOL_SIZE 0
>>   static inline bool is_kfence_address(const void *addr) { return false; }
>>   static inline void kfence_alloc_pool(void) { }
>> +static inline void kfence_set_pool(phys_addr_t addr) { }
>>   static inline void kfence_init(void) { }
>>   static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
>>   static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
>> diff --git a/mm/kfence/core.c b/mm/kfence/core.c
>> index 5349c37..0765395 100644
>> --- a/mm/kfence/core.c
>> +++ b/mm/kfence/core.c
>> @@ -814,12 +814,21 @@ void __init kfence_alloc_pool(void)
>>          if (!kfence_sample_interval)
>>                  return;
>>
>> +       /* if the pool has already been initialized by arch, skip the below */
>> +       if (__kfence_pool)
>> +               return;
>> +
>>          __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
>>
>>          if (!__kfence_pool)
>>                  pr_err("failed to allocate pool\n");
>>   }
>>
>> +void __init kfence_set_pool(phys_addr_t addr)
>> +{
>> +       __kfence_pool = phys_to_virt(addr);
>> +}
> 
> Please move this function to the header as a static inline function,
> because nothing else other than arm64 needs this, having this function
> be here introduces a .text size increase for everyone.
> 
> The function is so short that having it as a static inline function is
> fine, and will save a few bytes of .text.

Reasonable! Thanks!

> 
>>   static void kfence_init_enable(void)
>>   {
>>          if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
>> --
>> 2.7.4
>>

Thanks,
Zhenhua
Pavan Kondeti March 14, 2023, 11:14 a.m. UTC | #2
On Tue, Mar 14, 2023 at 06:08:07PM +0800, Zhenhua Huang wrote:
> 
> 
> On 2023/3/14 16:36, Pavan Kondeti wrote:
> > On Tue, Mar 14, 2023 at 03:05:02PM +0800, Zhenhua Huang wrote:
> > > Kfence only needs its pool to be mapped as page granularity, if it is
> > > inited early. Previous judgement was a bit over protected. From [1], Mark
> > > suggested to "just map the KFENCE region a page granularity". So I
> > > decouple it from judgement and do page granularity mapping for kfence
> > > pool only. Need to be noticed that late init of kfence pool still requires
> > > page granularity mapping.
> > > 
> > > Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
> > > platform. Like what I've tested on QEMU(emulated 1GB RAM) with
> > > gki_defconfig, also turning off rodata protection:
> > > Before:
> > > [root@liebao ]# cat /proc/meminfo
> > > MemTotal:         999484 kB
> > > After:
> > > [root@liebao ]# cat /proc/meminfo
> > > MemTotal:        1001480 kB
> > > 
> > > To implement this, also relocate the kfence pool allocation before the
> > > linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
> > > addr, __kfence_pool is to be set after linear mapping set up.
> > > 
> > > LINK: [1] https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/
> > > Suggested-by: Mark Rutland <mark.rutland@arm.com>
> > > Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
> > > ---
> > >   arch/arm64/include/asm/kfence.h |  2 ++
> > >   arch/arm64/mm/mmu.c             | 44 +++++++++++++++++++++++++++++++++++++++++
> > >   arch/arm64/mm/pageattr.c        |  9 +++++++--
> > >   include/linux/kfence.h          |  8 ++++++++
> > >   mm/kfence/core.c                |  9 +++++++++
> > >   5 files changed, 70 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
> > > index aa855c6..f1f9ca2d 100644
> > > --- a/arch/arm64/include/asm/kfence.h
> > > +++ b/arch/arm64/include/asm/kfence.h
> > > @@ -10,6 +10,8 @@
> > >   #include <asm/set_memory.h>
> > > +extern phys_addr_t early_kfence_pool;
> > > +
> > >   static inline bool arch_kfence_init_pool(void) { return true; }
> > >   static inline bool kfence_protect_page(unsigned long addr, bool protect)
> > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > > index 6f9d889..7fbf2ed 100644
> > > --- a/arch/arm64/mm/mmu.c
> > > +++ b/arch/arm64/mm/mmu.c
> > > @@ -24,6 +24,7 @@
> > >   #include <linux/mm.h>
> > >   #include <linux/vmalloc.h>
> > >   #include <linux/set_memory.h>
> > > +#include <linux/kfence.h>
> > >   #include <asm/barrier.h>
> > >   #include <asm/cputype.h>
> > > @@ -38,6 +39,7 @@
> > >   #include <asm/ptdump.h>
> > >   #include <asm/tlbflush.h>
> > >   #include <asm/pgalloc.h>
> > > +#include <asm/kfence.h>
> > >   #define NO_BLOCK_MAPPINGS	BIT(0)
> > >   #define NO_CONT_MAPPINGS	BIT(1)
> > > @@ -525,6 +527,33 @@ static int __init enable_crash_mem_map(char *arg)
> > >   }
> > >   early_param("crashkernel", enable_crash_mem_map);
> > > +#ifdef CONFIG_KFENCE
> > > +
> > > +static phys_addr_t arm64_kfence_alloc_pool(void)
> > > +{
> > > +	phys_addr_t kfence_pool;
> > > +
> > > +	if (!kfence_sample_interval)
> > > +		return 0;
> > > +
> > 
> > Are you sure that kernel commandline param are processed this early?
> > AFAICS, start_kernel()->parse_args() process the kernel arguments. We
> > are here before that. without your patch, mm_init() which takes care of
> > allocating kfence memory is called after parse_args().
> > 
> > Can you check your patch with kfence.sample_interval=0 appended to
> > kernel commandline?
> > 
> 
> Thanks Pavan. I have tried and you're correct. Previously I thought it's
> parsed by the way:
> setup_arch()->parse_early_param(earlier)->parse_early_options->
> do_early_param
> Unfortunately seems not take effect.
> 
> Then the only way left is we always allocate the kfence pool early? as we
> can't get sample_invertal at this early stage.
> 

That would mean, we would allocate the kfence pool memory even when it
is disabled from commandline. That does not sound good to me.

Is it possible to free this early allocated memory later in
mm_init()->kfence_alloc_pool()? if that is not possible, can we think of
adding early param for kfence?

> > > +	kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
> > > +	if (!kfence_pool)
> > > +		pr_err("failed to allocate kfence pool\n");
> > > +
> > For whatever reason, if this allocation fails, what should be done? We
> > end up not calling kfence_set_pool(). kfence_alloc_pool() is going to
> > attempt allocation again but we did not setup page granularity. That
> > means, we are enabling KFENCE without meeting pre-conditions. Can you
> > check this?
> 
> In this scenario, early_kfence_pool should be false(0) and we will end up
> using page granularity mapping? should be fine IMO.
> 

Right, I missed that hunk in can_set_direct_map().

Thanks,
Pavan
Zhenhua Huang March 14, 2023, 11:20 a.m. UTC | #3
On 2023/3/14 18:08, Zhenhua Huang wrote:
> 
> 
> On 2023/3/14 16:36, Pavan Kondeti wrote:
>> On Tue, Mar 14, 2023 at 03:05:02PM +0800, Zhenhua Huang wrote:
>>> Kfence only needs its pool to be mapped as page granularity, if it is
>>> inited early. Previous judgement was a bit over protected. From [1], 
>>> Mark
>>> suggested to "just map the KFENCE region a page granularity". So I
>>> decouple it from judgement and do page granularity mapping for kfence
>>> pool only. Need to be noticed that late init of kfence pool still 
>>> requires
>>> page granularity mapping.
>>>
>>> Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
>>> platform. Like what I've tested on QEMU(emulated 1GB RAM) with
>>> gki_defconfig, also turning off rodata protection:
>>> Before:
>>> [root@liebao ]# cat /proc/meminfo
>>> MemTotal:         999484 kB
>>> After:
>>> [root@liebao ]# cat /proc/meminfo
>>> MemTotal:        1001480 kB
>>>
>>> To implement this, also relocate the kfence pool allocation before the
>>> linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
>>> addr, __kfence_pool is to be set after linear mapping set up.
>>>
>>> LINK: [1] 
>>> https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/
>>> Suggested-by: Mark Rutland <mark.rutland@arm.com>
>>> Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
>>> ---
>>>   arch/arm64/include/asm/kfence.h |  2 ++
>>>   arch/arm64/mm/mmu.c             | 44 
>>> +++++++++++++++++++++++++++++++++++++++++
>>>   arch/arm64/mm/pageattr.c        |  9 +++++++--
>>>   include/linux/kfence.h          |  8 ++++++++
>>>   mm/kfence/core.c                |  9 +++++++++
>>>   5 files changed, 70 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kfence.h 
>>> b/arch/arm64/include/asm/kfence.h
>>> index aa855c6..f1f9ca2d 100644
>>> --- a/arch/arm64/include/asm/kfence.h
>>> +++ b/arch/arm64/include/asm/kfence.h
>>> @@ -10,6 +10,8 @@
>>>   #include <asm/set_memory.h>
>>> +extern phys_addr_t early_kfence_pool;
>>> +
>>>   static inline bool arch_kfence_init_pool(void) { return true; }
>>>   static inline bool kfence_protect_page(unsigned long addr, bool 
>>> protect)
>>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>>> index 6f9d889..7fbf2ed 100644
>>> --- a/arch/arm64/mm/mmu.c
>>> +++ b/arch/arm64/mm/mmu.c
>>> @@ -24,6 +24,7 @@
>>>   #include <linux/mm.h>
>>>   #include <linux/vmalloc.h>
>>>   #include <linux/set_memory.h>
>>> +#include <linux/kfence.h>
>>>   #include <asm/barrier.h>
>>>   #include <asm/cputype.h>
>>> @@ -38,6 +39,7 @@
>>>   #include <asm/ptdump.h>
>>>   #include <asm/tlbflush.h>
>>>   #include <asm/pgalloc.h>
>>> +#include <asm/kfence.h>
>>>   #define NO_BLOCK_MAPPINGS    BIT(0)
>>>   #define NO_CONT_MAPPINGS    BIT(1)
>>> @@ -525,6 +527,33 @@ static int __init enable_crash_mem_map(char *arg)
>>>   }
>>>   early_param("crashkernel", enable_crash_mem_map);
>>> +#ifdef CONFIG_KFENCE
>>> +
>>> +static phys_addr_t arm64_kfence_alloc_pool(void)
>>> +{
>>> +    phys_addr_t kfence_pool;
>>> +
>>> +    if (!kfence_sample_interval)
>>> +        return 0;
>>> +
>>
>> Are you sure that kernel commandline param are processed this early?
>> AFAICS, start_kernel()->parse_args() process the kernel arguments. We
>> are here before that. without your patch, mm_init() which takes care of
>> allocating kfence memory is called after parse_args().
>>
>> Can you check your patch with kfence.sample_interval=0 appended to
>> kernel commandline?
>>
> 
> Thanks Pavan. I have tried and you're correct. Previously I thought it's 
> parsed by the way:
> setup_arch()->parse_early_param(earlier)->parse_early_options-> 
> do_early_param
> Unfortunately seems not take effect.
> 
> Then the only way left is we always allocate the kfence pool early? as 
> we can't get sample_invertal at this early stage.

 From logs, it seems early param can take effect before doing linear 
mapping set up. Let me think about it :) Thanks for pointing this out!

> 
>>> +    kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
>>> +    if (!kfence_pool)
>>> +        pr_err("failed to allocate kfence pool\n");
>>> +
>> For whatever reason, if this allocation fails, what should be done? We
>> end up not calling kfence_set_pool(). kfence_alloc_pool() is going to
>> attempt allocation again but we did not setup page granularity. That
>> means, we are enabling KFENCE without meeting pre-conditions. Can you
>> check this?
> 
> In this scenario, early_kfence_pool should be false(0) and we will end 
> up using page granularity mapping? should be fine IMO.
> 
>>
>>> +    return kfence_pool;
>>> +}
>>> +
>>
>> Thanks,
>> Pavan
> 
> Thanks,
> Zhenhua
Zhenhua Huang March 15, 2023, 6:51 a.m. UTC | #4
Thanks Pavan.

On 2023/3/14 19:14, Pavan Kondeti wrote:
> On Tue, Mar 14, 2023 at 06:08:07PM +0800, Zhenhua Huang wrote:
>>
>>
>> On 2023/3/14 16:36, Pavan Kondeti wrote:
>>> On Tue, Mar 14, 2023 at 03:05:02PM +0800, Zhenhua Huang wrote:
>>>> Kfence only needs its pool to be mapped as page granularity, if it is
>>>> inited early. Previous judgement was a bit over protected. From [1], Mark
>>>> suggested to "just map the KFENCE region a page granularity". So I
>>>> decouple it from judgement and do page granularity mapping for kfence
>>>> pool only. Need to be noticed that late init of kfence pool still requires
>>>> page granularity mapping.
>>>>
>>>> Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
>>>> platform. Like what I've tested on QEMU(emulated 1GB RAM) with
>>>> gki_defconfig, also turning off rodata protection:
>>>> Before:
>>>> [root@liebao ]# cat /proc/meminfo
>>>> MemTotal:         999484 kB
>>>> After:
>>>> [root@liebao ]# cat /proc/meminfo
>>>> MemTotal:        1001480 kB
>>>>
>>>> To implement this, also relocate the kfence pool allocation before the
>>>> linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
>>>> addr, __kfence_pool is to be set after linear mapping set up.
>>>>
>>>> LINK: [1] https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/
>>>> Suggested-by: Mark Rutland <mark.rutland@arm.com>
>>>> Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
>>>> ---
>>>>    arch/arm64/include/asm/kfence.h |  2 ++
>>>>    arch/arm64/mm/mmu.c             | 44 +++++++++++++++++++++++++++++++++++++++++
>>>>    arch/arm64/mm/pageattr.c        |  9 +++++++--
>>>>    include/linux/kfence.h          |  8 ++++++++
>>>>    mm/kfence/core.c                |  9 +++++++++
>>>>    5 files changed, 70 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
>>>> index aa855c6..f1f9ca2d 100644
>>>> --- a/arch/arm64/include/asm/kfence.h
>>>> +++ b/arch/arm64/include/asm/kfence.h
>>>> @@ -10,6 +10,8 @@
>>>>    #include <asm/set_memory.h>
>>>> +extern phys_addr_t early_kfence_pool;
>>>> +
>>>>    static inline bool arch_kfence_init_pool(void) { return true; }
>>>>    static inline bool kfence_protect_page(unsigned long addr, bool protect)
>>>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>>>> index 6f9d889..7fbf2ed 100644
>>>> --- a/arch/arm64/mm/mmu.c
>>>> +++ b/arch/arm64/mm/mmu.c
>>>> @@ -24,6 +24,7 @@
>>>>    #include <linux/mm.h>
>>>>    #include <linux/vmalloc.h>
>>>>    #include <linux/set_memory.h>
>>>> +#include <linux/kfence.h>
>>>>    #include <asm/barrier.h>
>>>>    #include <asm/cputype.h>
>>>> @@ -38,6 +39,7 @@
>>>>    #include <asm/ptdump.h>
>>>>    #include <asm/tlbflush.h>
>>>>    #include <asm/pgalloc.h>
>>>> +#include <asm/kfence.h>
>>>>    #define NO_BLOCK_MAPPINGS	BIT(0)
>>>>    #define NO_CONT_MAPPINGS	BIT(1)
>>>> @@ -525,6 +527,33 @@ static int __init enable_crash_mem_map(char *arg)
>>>>    }
>>>>    early_param("crashkernel", enable_crash_mem_map);
>>>> +#ifdef CONFIG_KFENCE
>>>> +
>>>> +static phys_addr_t arm64_kfence_alloc_pool(void)
>>>> +{
>>>> +	phys_addr_t kfence_pool;
>>>> +
>>>> +	if (!kfence_sample_interval)
>>>> +		return 0;
>>>> +
>>>
>>> Are you sure that kernel commandline param are processed this early?
>>> AFAICS, start_kernel()->parse_args() process the kernel arguments. We
>>> are here before that. without your patch, mm_init() which takes care of
>>> allocating kfence memory is called after parse_args().
>>>
>>> Can you check your patch with kfence.sample_interval=0 appended to
>>> kernel commandline?
>>>
>>
>> Thanks Pavan. I have tried and you're correct. Previously I thought it's
>> parsed by the way:
>> setup_arch()->parse_early_param(earlier)->parse_early_options->
>> do_early_param
>> Unfortunately seems not take effect.
>>
>> Then the only way left is we always allocate the kfence pool early? as we
>> can't get sample_invertal at this early stage.
>>
> 
> That would mean, we would allocate the kfence pool memory even when it
> is disabled from commandline. That does not sound good to me.
> 
> Is it possible to free this early allocated memory later in
> mm_init()->kfence_alloc_pool()? if that is not possible, can we think of
> adding early param for kfence?

If we freed that buffer, there may be no chance to get that page 
granularity mapped buffer again.. as all these allocation/free are 
through normal buddy allocator.

At this stage, seems only additional early param can work.. Marco 
previously wanted to reuse sample_interval but seems not doable now.

Hi Marco,

Sorry, Can we thought of the solution again? like
ARM64:
1. intercepts early boot arg and gives early alloc memory to KFENCE
2. KFENCE to disable dynamic switch
3. disable page gran and save memory overhead
The purpose is in the case of w/o boot arg, it's just same as now.. arch 
specific kfence buffer will not allocate. And w/ boot arg, we can get 
expected saving.

Thanks,
Zhenhua

> 
>>>> +	kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
>>>> +	if (!kfence_pool)
>>>> +		pr_err("failed to allocate kfence pool\n");
>>>> +
>>> For whatever reason, if this allocation fails, what should be done? We
>>> end up not calling kfence_set_pool(). kfence_alloc_pool() is going to
>>> attempt allocation again but we did not setup page granularity. That
>>> means, we are enabling KFENCE without meeting pre-conditions. Can you
>>> check this?
>>
>> In this scenario, early_kfence_pool should be false(0) and we will end up
>> using page granularity mapping? should be fine IMO.
>>
> 
> Right, I missed that hunk in can_set_direct_map().
> 
> Thanks,
> Pavan
Marco Elver March 15, 2023, 8:52 a.m. UTC | #5
On Wed, Mar 15, 2023 at 02:51PM +0800, Zhenhua Huang wrote:
[...]
> > Is it possible to free this early allocated memory later in
> > mm_init()->kfence_alloc_pool()? if that is not possible, can we think of
> > adding early param for kfence?
> 
> If we freed that buffer, there may be no chance to get that page granularity
> mapped buffer again.. as all these allocation/free are through normal buddy
> allocator.
> 
> At this stage, seems only additional early param can work.. Marco previously
> wanted to reuse sample_interval but seems not doable now.
> 
> Hi Marco,
> 
> Sorry, Can we thought of the solution again? like
> ARM64:
> 1. intercepts early boot arg and gives early alloc memory to KFENCE
> 2. KFENCE to disable dynamic switch
> 3. disable page gran and save memory overhead
> The purpose is in the case of w/o boot arg, it's just same as now.. arch
> specific kfence buffer will not allocate. And w/ boot arg, we can get
> expected saving.

You can get kfence.sample_interval with early_param(). mm/kfence/core.c
should be left as is with a module param, so it can be set at runtime in
/sys/modules/kfence/parameters/.

However you can add this to the #ifdef CONFIG_KFENCE in arm64 code
you're adding:

  static bool kfence_early_init __initdata = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
  static int __init parse_kfence_early_init(char *p) {
  	int val;

  	if (get_option(&p, &val))
  		kfence_early_init = !!val;
  	return 0;
  }
  early_param("kfence.sample_interval", parse_kfence_early_init);

Nothing is preventing us from parsing kfence.sample_interval twice
during boot. At this stage you don't need the actual sample_interval,
only if kfence.sample_interval was provided on the cmdline and is not 0.

That will avoid adding another new param.

Thanks,
-- Marco
Zhenhua Huang March 15, 2023, 11:19 a.m. UTC | #6
On 2023/3/15 16:52, Marco Elver wrote:
> On Wed, Mar 15, 2023 at 02:51PM +0800, Zhenhua Huang wrote:
> [...]
>>> Is it possible to free this early allocated memory later in
>>> mm_init()->kfence_alloc_pool()? if that is not possible, can we think of
>>> adding early param for kfence?
>>
>> If we freed that buffer, there may be no chance to get that page granularity
>> mapped buffer again.. as all these allocation/free are through normal buddy
>> allocator.
>>
>> At this stage, seems only additional early param can work.. Marco previously
>> wanted to reuse sample_interval but seems not doable now.
>>
>> Hi Marco,
>>
>> Sorry, Can we thought of the solution again? like
>> ARM64:
>> 1. intercepts early boot arg and gives early alloc memory to KFENCE
>> 2. KFENCE to disable dynamic switch
>> 3. disable page gran and save memory overhead
>> The purpose is in the case of w/o boot arg, it's just same as now.. arch
>> specific kfence buffer will not allocate. And w/ boot arg, we can get
>> expected saving.
> 
> You can get kfence.sample_interval with early_param(). mm/kfence/core.c
> should be left as is with a module param, so it can be set at runtime in
> /sys/modules/kfence/parameters/.
> 
> However you can add this to the #ifdef CONFIG_KFENCE in arm64 code
> you're adding:
> 
>    static bool kfence_early_init __initdata = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
>    static int __init parse_kfence_early_init(char *p) {
>    	int val;
> 
>    	if (get_option(&p, &val))
>    		kfence_early_init = !!val;
>    	return 0;
>    }
>    early_param("kfence.sample_interval", parse_kfence_early_init);
> 
> Nothing is preventing us from parsing kfence.sample_interval twice
> during boot. At this stage you don't need the actual sample_interval,
> only if kfence.sample_interval was provided on the cmdline and is not 0.
> 
> That will avoid adding another new param.

I'm fine with above solution, Thanks Marco. Let me make the patch and 
share further.

Thanks,
Zhenhua

> 
> Thanks,
> -- Marco
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
index aa855c6..f1f9ca2d 100644
--- a/arch/arm64/include/asm/kfence.h
+++ b/arch/arm64/include/asm/kfence.h
@@ -10,6 +10,8 @@ 
 
 #include <asm/set_memory.h>
 
+extern phys_addr_t early_kfence_pool;
+
 static inline bool arch_kfence_init_pool(void) { return true; }
 
 static inline bool kfence_protect_page(unsigned long addr, bool protect)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6f9d889..7fbf2ed 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -24,6 +24,7 @@ 
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/set_memory.h>
+#include <linux/kfence.h>
 
 #include <asm/barrier.h>
 #include <asm/cputype.h>
@@ -38,6 +39,7 @@ 
 #include <asm/ptdump.h>
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
+#include <asm/kfence.h>
 
 #define NO_BLOCK_MAPPINGS	BIT(0)
 #define NO_CONT_MAPPINGS	BIT(1)
@@ -525,6 +527,33 @@  static int __init enable_crash_mem_map(char *arg)
 }
 early_param("crashkernel", enable_crash_mem_map);
 
+#ifdef CONFIG_KFENCE
+
+static phys_addr_t arm64_kfence_alloc_pool(void)
+{
+	phys_addr_t kfence_pool;
+
+	if (!kfence_sample_interval)
+		return 0;
+
+	kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+	if (!kfence_pool)
+		pr_err("failed to allocate kfence pool\n");
+
+	return kfence_pool;
+}
+
+#else
+
+static phys_addr_t arm64_kfence_alloc_pool(void)
+{
+	return 0;
+}
+
+#endif
+
+phys_addr_t early_kfence_pool;
+
 static void __init map_mem(pgd_t *pgdp)
 {
 	static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
@@ -543,6 +572,10 @@  static void __init map_mem(pgd_t *pgdp)
 	 */
 	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
 
+	early_kfence_pool = arm64_kfence_alloc_pool();
+	if (early_kfence_pool)
+		memblock_mark_nomap(early_kfence_pool, KFENCE_POOL_SIZE);
+
 	if (can_set_direct_map())
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
@@ -608,6 +641,17 @@  static void __init map_mem(pgd_t *pgdp)
 		}
 	}
 #endif
+
+	/* Kfence pool needs page-level mapping */
+	if (early_kfence_pool) {
+		__map_memblock(pgdp, early_kfence_pool,
+			early_kfence_pool + KFENCE_POOL_SIZE,
+			pgprot_tagged(PAGE_KERNEL),
+			NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+		memblock_clear_nomap(early_kfence_pool, KFENCE_POOL_SIZE);
+		/* kfence_pool really mapped now */
+		kfence_set_pool(early_kfence_pool);
+	}
 }
 
 void mark_rodata_ro(void)
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 79dd201..7ce5295 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -7,10 +7,12 @@ 
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
+#include <linux/kfence.h>
 
 #include <asm/cacheflush.h>
 #include <asm/set_memory.h>
 #include <asm/tlbflush.h>
+#include <asm/kfence.h>
 
 struct page_change_data {
 	pgprot_t set_mask;
@@ -22,12 +24,15 @@  bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED
 bool can_set_direct_map(void)
 {
 	/*
-	 * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be
+	 * rodata_full and DEBUG_PAGEALLOC require linear map to be
 	 * mapped at page granularity, so that it is possible to
 	 * protect/unprotect single pages.
+	 *
+	 * Kfence pool requires page granularity mapping also if we init it
+	 * late.
 	 */
 	return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
-		IS_ENABLED(CONFIG_KFENCE);
+	    (IS_ENABLED(CONFIG_KFENCE) && !early_kfence_pool);
 }
 
 static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
diff --git a/include/linux/kfence.h b/include/linux/kfence.h
index 726857a..570d4e3 100644
--- a/include/linux/kfence.h
+++ b/include/linux/kfence.h
@@ -64,6 +64,12 @@  static __always_inline bool is_kfence_address(const void *addr)
 void __init kfence_alloc_pool(void);
 
 /**
+ * kfence_set_pool() - allows an arch to set the
+ * KFENCE pool during early init
+ */
+void __init kfence_set_pool(phys_addr_t addr);
+
+/**
  * kfence_init() - perform KFENCE initialization at boot time
  *
  * Requires that kfence_alloc_pool() was called before. This sets up the
@@ -222,8 +228,10 @@  bool __kfence_obj_info(struct kmem_obj_info *kpp, void *object, struct slab *sla
 
 #else /* CONFIG_KFENCE */
 
+#define KFENCE_POOL_SIZE 0
 static inline bool is_kfence_address(const void *addr) { return false; }
 static inline void kfence_alloc_pool(void) { }
+static inline void kfence_set_pool(phys_addr_t addr) { }
 static inline void kfence_init(void) { }
 static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
 static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 5349c37..0765395 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -814,12 +814,21 @@  void __init kfence_alloc_pool(void)
 	if (!kfence_sample_interval)
 		return;
 
+	/* if the pool has already been initialized by arch, skip the below */
+	if (__kfence_pool)
+		return;
+
 	__kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
 
 	if (!__kfence_pool)
 		pr_err("failed to allocate pool\n");
 }
 
+void __init kfence_set_pool(phys_addr_t addr)
+{
+	__kfence_pool = phys_to_virt(addr);
+}
+
 static void kfence_init_enable(void)
 {
 	if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))