diff mbox series

[4/5] RISC-V: Protect .init.text & .init.data

Message ID 20201009211344.2358688-5-atish.patra@wdc.com (mailing list archive)
State New, archived
Headers show
Series Improve kernel section protections | expand

Commit Message

Atish Patra Oct. 9, 2020, 9:13 p.m. UTC
Currently, .init.text & .init.data are intermixed which makes it impossible
apply different permissions to them. .init.data shouldn't need exec
permissions while .init.text shouldn't have write permission.

Keep them in separate sections so that different permissions are applied to
each section. This improves the kernel protection under
CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
entire _init section after it is freed so that those pages can be used for
other purpose.

Signed-off-by: Atish Patra <atish.patra@wdc.com>
---
 arch/riscv/include/asm/sections.h   |  2 ++
 arch/riscv/include/asm/set_memory.h |  2 ++
 arch/riscv/kernel/setup.c           |  4 ++++
 arch/riscv/kernel/vmlinux.lds.S     | 10 +++++++++-
 arch/riscv/mm/init.c                |  6 ++++++
 arch/riscv/mm/pageattr.c            |  6 ++++++
 6 files changed, 29 insertions(+), 1 deletion(-)

Comments

Greentime Hu Oct. 12, 2020, 1:14 p.m. UTC | #1
Atish Patra <atish.patra@wdc.com> 於 2020年10月10日 週六 上午5:13寫道:
>
> Currently, .init.text & .init.data are intermixed which makes it impossible
> apply different permissions to them. .init.data shouldn't need exec
> permissions while .init.text shouldn't have write permission.
>
> Keep them in separate sections so that different permissions are applied to
> each section. This improves the kernel protection under
> CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
> entire _init section after it is freed so that those pages can be used for
> other purpose.
>
> Signed-off-by: Atish Patra <atish.patra@wdc.com>
> ---
>  arch/riscv/include/asm/sections.h   |  2 ++
>  arch/riscv/include/asm/set_memory.h |  2 ++
>  arch/riscv/kernel/setup.c           |  4 ++++
>  arch/riscv/kernel/vmlinux.lds.S     | 10 +++++++++-
>  arch/riscv/mm/init.c                |  6 ++++++
>  arch/riscv/mm/pageattr.c            |  6 ++++++
>  6 files changed, 29 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
> index d60802bfafbc..730d2c4a844d 100644
> --- a/arch/riscv/include/asm/sections.h
> +++ b/arch/riscv/include/asm/sections.h
> @@ -10,6 +10,8 @@
>  #include <asm-generic/sections.h>
>  extern char _start[];
>  extern char _start_kernel[];
> +extern char __init_data_begin[], __init_data_end[];
> +extern char __init_text_begin[], __init_text_end[];
>
>  extern bool init_mem_is_free;
>
> diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> index 4cc3a4e2afd3..913429c9c1ae 100644
> --- a/arch/riscv/include/asm/set_memory.h
> +++ b/arch/riscv/include/asm/set_memory.h
> @@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
>  int set_memory_rw(unsigned long addr, int numpages);
>  int set_memory_x(unsigned long addr, int numpages);
>  int set_memory_nx(unsigned long addr, int numpages);
> +int set_memory_default(unsigned long addr, int numpages);
>  void protect_kernel_text_data(void);
>  #else
>  static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> @@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
>  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
>  static inline void protect_kernel_text_data(void) {};
> +static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
>  #endif
>
>  int set_direct_map_invalid_noflush(struct page *page);
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index 4176a2affd1d..b8a35ef0eab0 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -129,6 +129,10 @@ bool init_mem_is_free = false;
>
>  void free_initmem(void)
>  {
> +       unsigned long init_begin = (unsigned long)__init_begin;
> +       unsigned long init_end = (unsigned long)__init_end;
> +
> +       set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
>         free_initmem_default(POISON_FREE_INITMEM);
>         init_mem_is_free = true;
>  }
> diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> index 0807633f0dc8..15b9882588ae 100644
> --- a/arch/riscv/kernel/vmlinux.lds.S
> +++ b/arch/riscv/kernel/vmlinux.lds.S
> @@ -30,8 +30,8 @@ SECTIONS
>         . = ALIGN(PAGE_SIZE);
>
>         __init_begin = .;
> +       __init_text_begin = .;
>         INIT_TEXT_SECTION(PAGE_SIZE)
> -       INIT_DATA_SECTION(16)
>         . = ALIGN(8);
>         __soc_early_init_table : {
>                 __soc_early_init_table_start = .;
> @@ -48,11 +48,19 @@ SECTIONS
>         {
>                 EXIT_TEXT
>         }
> +
> +       __init_text_end = .;
> +       . = ALIGN(SECTION_ALIGN);
> +       /* Start of init data section */
> +       __init_data_begin = .;
> +       INIT_DATA_SECTION(16)
>         .exit.data :
>         {
>                 EXIT_DATA
>         }
>         PERCPU_SECTION(L1_CACHE_BYTES)
> +
> +       __init_data_end = .;
>         __init_end = .;
>
>         . = ALIGN(SECTION_ALIGN);
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 7859a1d1b34d..3ef0eafcc7c7 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
>  {
>         unsigned long text_start = (unsigned long)_text;
>         unsigned long text_end = (unsigned long)_etext;
> +       unsigned long init_text_start = (unsigned long)__init_text_begin;
> +       unsigned long init_text_end = (unsigned long)__init_text_end;
> +       unsigned long init_data_start = (unsigned long)__init_data_begin;
> +       unsigned long init_data_end = (unsigned long)__init_data_end;
>         unsigned long rodata_start = (unsigned long)__start_rodata;
>         unsigned long data_start = (unsigned long)_data;
>         unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
>
> +       set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
>         set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
> +       set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
>         set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
>         set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
>  }
> diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> index 19fecb362d81..aecedaf086ab 100644
> --- a/arch/riscv/mm/pageattr.c
> +++ b/arch/riscv/mm/pageattr.c
> @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
>         return ret;
>  }
>
> +int set_memory_default(unsigned long addr, int numpages)
> +{
> +       return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
> +                           __pgprot(0));
> +}
> +
>  int set_memory_ro(unsigned long addr, int numpages)
>  {
>         return __set_memory(addr, numpages, __pgprot(_PAGE_READ),


Hi Atish,

I tested this patchset and CONFIG_DEBUG_WX=y

[    2.664012] Freeing unused kernel memory: 114420K
[    2.666081] ------------[ cut here ]------------
[    2.666779] riscv/mm: Found insecure W+X mapping at address
(____ptrval____)/0xffffffe000000000
[    2.668004] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:200
note_page+0xc2/0x238
[    2.669147] Modules linked in:
[    2.669735] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
5.9.0-rc8-00033-gfacf070a80ea #153
[    2.670466] epc: ffffffe00700697c ra : ffffffe00700697c sp : ffffffe0fba9bc10
[    2.671054]  gp : ffffffe007e73078 tp : ffffffe0fba90000 t0 :
ffffffe007e861d0
[    2.671683]  t1 : 0000000000000064 t2 : ffffffe007801664 s0 :
ffffffe0fba9bc60
[    2.672499]  s1 : ffffffe0fba9bde8 a0 : 0000000000000053 a1 :
0000000000000020
[    2.673119]  a2 : 0000000000000000 a3 : 00000000000000f4 a4 :
d4328dc070ccb100
[    2.673729]  a5 : d4328dc070ccb100 a6 : 0000000000000000 a7 :
ffffffe007851e98
[    2.674333]  s2 : ffffffe007000000 s3 : ffffffe0fba9bde8 s4 :
0000000087200000
[    2.674963]  s5 : 00000000000000cb s6 : 0000000000000003 s7 :
ffffffe007e7ac00
[    2.675564]  s8 : ffffffe000000000 s9 : ffffffe007000000 s10:
ffffffe000000000
[    2.676392]  s11: ffffffe040000000 t3 : 000000000001da48 t4 :
000000000001da48
[    2.676992]  t5 : ffffffe007e74490 t6 : ffffffe007e81432
[    2.677449] status: 0000000000000120 badaddr: 0000000000000000
cause: 0000000000000003
[    2.678128] ---[ end trace 5f0a86dbe986db9b ]---
[    2.678952] Checked W+X mappings: failed, 28672 W+X pages found
[    2.679737] Run /init as init process

After debugging, I found it is because
set_memory_ro()/set_memory_nx()/... can't handle the 4KB-aligned
address if it is a 2MB mapping address.
For example, if we gave 0xffffffe000001000 and this address is in a
PMD mapping which is 2MB mapping, it will set the page attributes from
0xffffffe000000000.
And this caused the CPU hotplug failed because it is in head.text section.

#0  pageattr_pmd_entry (pmd=0xffffffe0ffdff000,
addr=18446743936270602240, next=18446743936270766080,
    walk=0xffffffe007e03e98) at arch/riscv/mm/pageattr.c:70
70              pmd_t val = READ_ONCE(*pmd);
=> 0xffffffe007005e96 <pageattr_pmd_entry+0>:   41 11   addi    sp,sp,-16
   0xffffffe007005e98 <pageattr_pmd_entry+2>:   22 e4   sd      s0,8(sp)
   0xffffffe007005e9a <pageattr_pmd_entry+4>:   00 08   addi    s0,sp,16
   0xffffffe007005e9c <pageattr_pmd_entry+6>:   1c 61   ld      a5,0(a0)
(gdb) p/x addr
$21 = 0xffffffe000001000

# cat /sys/kernel/debug/kernel_page_tables
---[ Fixmap start ]---
0xffffffcefef00000-0xffffffceff000000    0x0000000082200000         1M
PTE     D A . . . W R V
---[ Fixmap end ]---
---[ PCI I/O start ]---
---[ PCI I/O end ]---
---[ vmalloc() area ]---
0xffffffd000090000-0xffffffd000093000    0x00000001741bb000        12K
PTE     D A . . . W R V
0xffffffdffffbb000-0xffffffdffffbe000    0x0000000175f97000        12K
PTE     D A . . . W R V
0xffffffdffffcc000-0xffffffdffffcf000    0x0000000175f9a000        12K
PTE     D A . . . W R V
0xffffffdffffdd000-0xffffffdffffe0000    0x0000000175f9d000        12K
PTE     D A . . . W R V
0xffffffdffffee000-0xffffffdfffff1000    0x0000000175fa0000        12K
PTE     D A . . . W R V
---[ vmalloc() end ]---
---[ Linear mapping ]---
0xffffffe000000000-0xffffffe007000000    0x0000000080200000       112M
PMD     D A . . X W R V
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
0xffffffe007000000-0xffffffe007800000    0x0000000087200000         8M
PMD     D A . . X . R V
0xffffffe007800000-0xffffffe007e00000    0x0000000087a00000         6M
PMD     D A . . . . R V
0xffffffe007e00000-0xffffffe0ffe00000    0x0000000088000000      3968M
PMD     D A . . . W R V
#

Any idea?
Atish Patra Oct. 12, 2020, 11:26 p.m. UTC | #2
On Mon, Oct 12, 2020 at 6:15 AM Greentime Hu <greentime.hu@sifive.com> wrote:
>
> Atish Patra <atish.patra@wdc.com> 於 2020年10月10日 週六 上午5:13寫道:
> >
> > Currently, .init.text & .init.data are intermixed which makes it impossible
> > apply different permissions to them. .init.data shouldn't need exec
> > permissions while .init.text shouldn't have write permission.
> >
> > Keep them in separate sections so that different permissions are applied to
> > each section. This improves the kernel protection under
> > CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
> > entire _init section after it is freed so that those pages can be used for
> > other purpose.
> >
> > Signed-off-by: Atish Patra <atish.patra@wdc.com>
> > ---
> >  arch/riscv/include/asm/sections.h   |  2 ++
> >  arch/riscv/include/asm/set_memory.h |  2 ++
> >  arch/riscv/kernel/setup.c           |  4 ++++
> >  arch/riscv/kernel/vmlinux.lds.S     | 10 +++++++++-
> >  arch/riscv/mm/init.c                |  6 ++++++
> >  arch/riscv/mm/pageattr.c            |  6 ++++++
> >  6 files changed, 29 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
> > index d60802bfafbc..730d2c4a844d 100644
> > --- a/arch/riscv/include/asm/sections.h
> > +++ b/arch/riscv/include/asm/sections.h
> > @@ -10,6 +10,8 @@
> >  #include <asm-generic/sections.h>
> >  extern char _start[];
> >  extern char _start_kernel[];
> > +extern char __init_data_begin[], __init_data_end[];
> > +extern char __init_text_begin[], __init_text_end[];
> >
> >  extern bool init_mem_is_free;
> >
> > diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> > index 4cc3a4e2afd3..913429c9c1ae 100644
> > --- a/arch/riscv/include/asm/set_memory.h
> > +++ b/arch/riscv/include/asm/set_memory.h
> > @@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
> >  int set_memory_rw(unsigned long addr, int numpages);
> >  int set_memory_x(unsigned long addr, int numpages);
> >  int set_memory_nx(unsigned long addr, int numpages);
> > +int set_memory_default(unsigned long addr, int numpages);
> >  void protect_kernel_text_data(void);
> >  #else
> >  static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> > @@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> >  static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> >  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> >  static inline void protect_kernel_text_data(void) {};
> > +static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
> >  #endif
> >
> >  int set_direct_map_invalid_noflush(struct page *page);
> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > index 4176a2affd1d..b8a35ef0eab0 100644
> > --- a/arch/riscv/kernel/setup.c
> > +++ b/arch/riscv/kernel/setup.c
> > @@ -129,6 +129,10 @@ bool init_mem_is_free = false;
> >
> >  void free_initmem(void)
> >  {
> > +       unsigned long init_begin = (unsigned long)__init_begin;
> > +       unsigned long init_end = (unsigned long)__init_end;
> > +
> > +       set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
> >         free_initmem_default(POISON_FREE_INITMEM);
> >         init_mem_is_free = true;
> >  }
> > diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> > index 0807633f0dc8..15b9882588ae 100644
> > --- a/arch/riscv/kernel/vmlinux.lds.S
> > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > @@ -30,8 +30,8 @@ SECTIONS
> >         . = ALIGN(PAGE_SIZE);
> >
> >         __init_begin = .;
> > +       __init_text_begin = .;
> >         INIT_TEXT_SECTION(PAGE_SIZE)
> > -       INIT_DATA_SECTION(16)
> >         . = ALIGN(8);
> >         __soc_early_init_table : {
> >                 __soc_early_init_table_start = .;
> > @@ -48,11 +48,19 @@ SECTIONS
> >         {
> >                 EXIT_TEXT
> >         }
> > +
> > +       __init_text_end = .;
> > +       . = ALIGN(SECTION_ALIGN);
> > +       /* Start of init data section */
> > +       __init_data_begin = .;
> > +       INIT_DATA_SECTION(16)
> >         .exit.data :
> >         {
> >                 EXIT_DATA
> >         }
> >         PERCPU_SECTION(L1_CACHE_BYTES)
> > +
> > +       __init_data_end = .;
> >         __init_end = .;
> >
> >         . = ALIGN(SECTION_ALIGN);
> > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > index 7859a1d1b34d..3ef0eafcc7c7 100644
> > --- a/arch/riscv/mm/init.c
> > +++ b/arch/riscv/mm/init.c
> > @@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
> >  {
> >         unsigned long text_start = (unsigned long)_text;
> >         unsigned long text_end = (unsigned long)_etext;
> > +       unsigned long init_text_start = (unsigned long)__init_text_begin;
> > +       unsigned long init_text_end = (unsigned long)__init_text_end;
> > +       unsigned long init_data_start = (unsigned long)__init_data_begin;
> > +       unsigned long init_data_end = (unsigned long)__init_data_end;
> >         unsigned long rodata_start = (unsigned long)__start_rodata;
> >         unsigned long data_start = (unsigned long)_data;
> >         unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
> >
> > +       set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
> >         set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
> > +       set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
> >         set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
> >         set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
> >  }
> > diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> > index 19fecb362d81..aecedaf086ab 100644
> > --- a/arch/riscv/mm/pageattr.c
> > +++ b/arch/riscv/mm/pageattr.c
> > @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> >         return ret;
> >  }
> >
> > +int set_memory_default(unsigned long addr, int numpages)
> > +{
> > +       return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
> > +                           __pgprot(0));
> > +}
> > +
> >  int set_memory_ro(unsigned long addr, int numpages)
> >  {
> >         return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
>
>
> Hi Atish,
>
> I tested this patchset and CONFIG_DEBUG_WX=y
>

Thanks for testing the patch.

> [    2.664012] Freeing unused kernel memory: 114420K
> [    2.666081] ------------[ cut here ]------------
> [    2.666779] riscv/mm: Found insecure W+X mapping at address
> (____ptrval____)/0xffffffe000000000
> [    2.668004] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:200
> note_page+0xc2/0x238
> [    2.669147] Modules linked in:
> [    2.669735] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> 5.9.0-rc8-00033-gfacf070a80ea #153
> [    2.670466] epc: ffffffe00700697c ra : ffffffe00700697c sp : ffffffe0fba9bc10
> [    2.671054]  gp : ffffffe007e73078 tp : ffffffe0fba90000 t0 :
> ffffffe007e861d0
> [    2.671683]  t1 : 0000000000000064 t2 : ffffffe007801664 s0 :
> ffffffe0fba9bc60
> [    2.672499]  s1 : ffffffe0fba9bde8 a0 : 0000000000000053 a1 :
> 0000000000000020
> [    2.673119]  a2 : 0000000000000000 a3 : 00000000000000f4 a4 :
> d4328dc070ccb100
> [    2.673729]  a5 : d4328dc070ccb100 a6 : 0000000000000000 a7 :
> ffffffe007851e98
> [    2.674333]  s2 : ffffffe007000000 s3 : ffffffe0fba9bde8 s4 :
> 0000000087200000
> [    2.674963]  s5 : 00000000000000cb s6 : 0000000000000003 s7 :
> ffffffe007e7ac00
> [    2.675564]  s8 : ffffffe000000000 s9 : ffffffe007000000 s10:
> ffffffe000000000
> [    2.676392]  s11: ffffffe040000000 t3 : 000000000001da48 t4 :
> 000000000001da48
> [    2.676992]  t5 : ffffffe007e74490 t6 : ffffffe007e81432
> [    2.677449] status: 0000000000000120 badaddr: 0000000000000000
> cause: 0000000000000003
> [    2.678128] ---[ end trace 5f0a86dbe986db9b ]---
> [    2.678952] Checked W+X mappings: failed, 28672 W+X pages found
> [    2.679737] Run /init as init process
>

This would be triggered for the current kernel as well as we don't
protect the permission for __init section at all. As you correctly
pointed out, __init & .head sections are mapped with same permissions
because of 2MB mappings.

Currently, the entire head.text & init section have RWX permission.
This patch is trying remove the write permission from .init.text &
.head.text and execute permission from .init.data until kernel is
booted.
It doesn't provide full protection but better than the current scheme.

To remove the write permission of .head.txt only, we have to keep
.head.txt & .init.text section in separate sections. The linear
mapping would look like this in that case.
There are no issues as such but kernel size would increase by another 2M.

---[ Linear mapping ]---
0xffffffe000000000-0xffffffe000200000    0x0000000080200000         2M
PMD     D A . . X . R V
0xffffffe000200000-0xffffffe000600000    0x0000000080400000         4M
PMD     D A . . . W R V
0xffffffe000600000-0xffffffe000e00000    0x0000000080800000         8M
PMD     D A . . X . R V
0xffffffe000e00000-0xffffffe001400000    0x0000000081000000         6M
PMD     D A . . . . R V
0xffffffe001400000-0xffffffe03fe00000    0x0000000081600000      1002M
PMD     D A . . . W R V

The other solution would be move init section below text. Keep text &
head in one section.
The .init.text & .init.data will be in separate sections after that.
Here is the mapping in that case.

---[ Linear mapping ]---
0xffffffe000000000-0xffffffe000800000    0x0000000080200000         8M
PMD     D A . . X . R V
0xffffffe000800000-0xffffffe000c00000    0x0000000080a00000         4M
PMD     D A . . . W R V
0xffffffe000c00000-0xffffffe001200000    0x0000000080e00000         6M
PMD     D A . . . . R V
0xffffffe001200000-0xffffffe03fe00000    0x0000000081400000      1004M
PMD     D A . . . W R V

I prefer the 2nd approach compared to the first one as it saves memory
and we can fix lockdep issue without adding arch_is_kernel_data
to sections.h (Guo's patch).

However, the 2nd approach throws this problem if
CONFIG_HARDENED_USERCOPY is enabled.

net/ipv4/ipconfig.o: in function `ic_setup_routes':
/home/atish/workspace/linux/net/ipv4/ipconfig.c:400:(.init.text+0x1c4):
relocation truncated to fit: R_RISCV_JAL against symbol `ip_rt_ioctl'
defined in .text section in net/ipv4/fib_frontend.o
make: *** [Makefile:1162: vmlinux] Error 1

I am currently looking into this to understand why R_RISCV_JAL is
generated a generic function invocation
where auipc + jalr should have been used.

> After debugging, I found it is because
> set_memory_ro()/set_memory_nx()/... can't handle the 4KB-aligned
> address if it is a 2MB mapping address.
> For example, if we gave 0xffffffe000001000 and this address is in a
> PMD mapping which is 2MB mapping, it will set the page attributes from
> 0xffffffe000000000.
> And this caused the CPU hotplug failed because it is in head.text section.
>
> #0  pageattr_pmd_entry (pmd=0xffffffe0ffdff000,
> addr=18446743936270602240, next=18446743936270766080,
>     walk=0xffffffe007e03e98) at arch/riscv/mm/pageattr.c:70
> 70              pmd_t val = READ_ONCE(*pmd);
> => 0xffffffe007005e96 <pageattr_pmd_entry+0>:   41 11   addi    sp,sp,-16
>    0xffffffe007005e98 <pageattr_pmd_entry+2>:   22 e4   sd      s0,8(sp)
>    0xffffffe007005e9a <pageattr_pmd_entry+4>:   00 08   addi    s0,sp,16
>    0xffffffe007005e9c <pageattr_pmd_entry+6>:   1c 61   ld      a5,0(a0)
> (gdb) p/x addr
> $21 = 0xffffffe000001000
>
> # cat /sys/kernel/debug/kernel_page_tables
> ---[ Fixmap start ]---
> 0xffffffcefef00000-0xffffffceff000000    0x0000000082200000         1M
> PTE     D A . . . W R V
> ---[ Fixmap end ]---
> ---[ PCI I/O start ]---
> ---[ PCI I/O end ]---
> ---[ vmalloc() area ]---
> 0xffffffd000090000-0xffffffd000093000    0x00000001741bb000        12K
> PTE     D A . . . W R V
> 0xffffffdffffbb000-0xffffffdffffbe000    0x0000000175f97000        12K
> PTE     D A . . . W R V
> 0xffffffdffffcc000-0xffffffdffffcf000    0x0000000175f9a000        12K
> PTE     D A . . . W R V
> 0xffffffdffffdd000-0xffffffdffffe0000    0x0000000175f9d000        12K
> PTE     D A . . . W R V
> 0xffffffdffffee000-0xffffffdfffff1000    0x0000000175fa0000        12K
> PTE     D A . . . W R V
> ---[ vmalloc() end ]---
> ---[ Linear mapping ]---
> 0xffffffe000000000-0xffffffe007000000    0x0000000080200000       112M
> PMD     D A . . X W R V
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 0xffffffe007000000-0xffffffe007800000    0x0000000087200000         8M
> PMD     D A . . X . R V
> 0xffffffe007800000-0xffffffe007e00000    0x0000000087a00000         6M
> PMD     D A . . . . R V
> 0xffffffe007e00000-0xffffffe0ffe00000    0x0000000088000000      3968M
> PMD     D A . . . W R V
> #
>
> Any idea?
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv



--
Regards,
Atish
Atish Patra Oct. 13, 2020, 1:28 a.m. UTC | #3
On Mon, Oct 12, 2020 at 4:26 PM Atish Patra <atishp@atishpatra.org> wrote:
>
> On Mon, Oct 12, 2020 at 6:15 AM Greentime Hu <greentime.hu@sifive.com> wrote:
> >
> > Atish Patra <atish.patra@wdc.com> 於 2020年10月10日 週六 上午5:13寫道:
> > >
> > > Currently, .init.text & .init.data are intermixed which makes it impossible
> > > apply different permissions to them. .init.data shouldn't need exec
> > > permissions while .init.text shouldn't have write permission.
> > >
> > > Keep them in separate sections so that different permissions are applied to
> > > each section. This improves the kernel protection under
> > > CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
> > > entire _init section after it is freed so that those pages can be used for
> > > other purpose.
> > >
> > > Signed-off-by: Atish Patra <atish.patra@wdc.com>
> > > ---
> > >  arch/riscv/include/asm/sections.h   |  2 ++
> > >  arch/riscv/include/asm/set_memory.h |  2 ++
> > >  arch/riscv/kernel/setup.c           |  4 ++++
> > >  arch/riscv/kernel/vmlinux.lds.S     | 10 +++++++++-
> > >  arch/riscv/mm/init.c                |  6 ++++++
> > >  arch/riscv/mm/pageattr.c            |  6 ++++++
> > >  6 files changed, 29 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
> > > index d60802bfafbc..730d2c4a844d 100644
> > > --- a/arch/riscv/include/asm/sections.h
> > > +++ b/arch/riscv/include/asm/sections.h
> > > @@ -10,6 +10,8 @@
> > >  #include <asm-generic/sections.h>
> > >  extern char _start[];
> > >  extern char _start_kernel[];
> > > +extern char __init_data_begin[], __init_data_end[];
> > > +extern char __init_text_begin[], __init_text_end[];
> > >
> > >  extern bool init_mem_is_free;
> > >
> > > diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> > > index 4cc3a4e2afd3..913429c9c1ae 100644
> > > --- a/arch/riscv/include/asm/set_memory.h
> > > +++ b/arch/riscv/include/asm/set_memory.h
> > > @@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
> > >  int set_memory_rw(unsigned long addr, int numpages);
> > >  int set_memory_x(unsigned long addr, int numpages);
> > >  int set_memory_nx(unsigned long addr, int numpages);
> > > +int set_memory_default(unsigned long addr, int numpages);
> > >  void protect_kernel_text_data(void);
> > >  #else
> > >  static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> > > @@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> > >  static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> > >  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> > >  static inline void protect_kernel_text_data(void) {};
> > > +static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
> > >  #endif
> > >
> > >  int set_direct_map_invalid_noflush(struct page *page);
> > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > index 4176a2affd1d..b8a35ef0eab0 100644
> > > --- a/arch/riscv/kernel/setup.c
> > > +++ b/arch/riscv/kernel/setup.c
> > > @@ -129,6 +129,10 @@ bool init_mem_is_free = false;
> > >
> > >  void free_initmem(void)
> > >  {
> > > +       unsigned long init_begin = (unsigned long)__init_begin;
> > > +       unsigned long init_end = (unsigned long)__init_end;
> > > +
> > > +       set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
> > >         free_initmem_default(POISON_FREE_INITMEM);
> > >         init_mem_is_free = true;
> > >  }
> > > diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> > > index 0807633f0dc8..15b9882588ae 100644
> > > --- a/arch/riscv/kernel/vmlinux.lds.S
> > > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > > @@ -30,8 +30,8 @@ SECTIONS
> > >         . = ALIGN(PAGE_SIZE);
> > >
> > >         __init_begin = .;
> > > +       __init_text_begin = .;
> > >         INIT_TEXT_SECTION(PAGE_SIZE)
> > > -       INIT_DATA_SECTION(16)
> > >         . = ALIGN(8);
> > >         __soc_early_init_table : {
> > >                 __soc_early_init_table_start = .;
> > > @@ -48,11 +48,19 @@ SECTIONS
> > >         {
> > >                 EXIT_TEXT
> > >         }
> > > +
> > > +       __init_text_end = .;
> > > +       . = ALIGN(SECTION_ALIGN);
> > > +       /* Start of init data section */
> > > +       __init_data_begin = .;
> > > +       INIT_DATA_SECTION(16)
> > >         .exit.data :
> > >         {
> > >                 EXIT_DATA
> > >         }
> > >         PERCPU_SECTION(L1_CACHE_BYTES)
> > > +
> > > +       __init_data_end = .;
> > >         __init_end = .;
> > >
> > >         . = ALIGN(SECTION_ALIGN);
> > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > > index 7859a1d1b34d..3ef0eafcc7c7 100644
> > > --- a/arch/riscv/mm/init.c
> > > +++ b/arch/riscv/mm/init.c
> > > @@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
> > >  {
> > >         unsigned long text_start = (unsigned long)_text;
> > >         unsigned long text_end = (unsigned long)_etext;
> > > +       unsigned long init_text_start = (unsigned long)__init_text_begin;
> > > +       unsigned long init_text_end = (unsigned long)__init_text_end;
> > > +       unsigned long init_data_start = (unsigned long)__init_data_begin;
> > > +       unsigned long init_data_end = (unsigned long)__init_data_end;
> > >         unsigned long rodata_start = (unsigned long)__start_rodata;
> > >         unsigned long data_start = (unsigned long)_data;
> > >         unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
> > >
> > > +       set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
> > >         set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
> > > +       set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
> > >         set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
> > >         set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
> > >  }
> > > diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> > > index 19fecb362d81..aecedaf086ab 100644
> > > --- a/arch/riscv/mm/pageattr.c
> > > +++ b/arch/riscv/mm/pageattr.c
> > > @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> > >         return ret;
> > >  }
> > >
> > > +int set_memory_default(unsigned long addr, int numpages)
> > > +{
> > > +       return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
> > > +                           __pgprot(0));
> > > +}
> > > +
> > >  int set_memory_ro(unsigned long addr, int numpages)
> > >  {
> > >         return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
> >
> >
> > Hi Atish,
> >
> > I tested this patchset and CONFIG_DEBUG_WX=y
> >
>
> Thanks for testing the patch.
>
> > [    2.664012] Freeing unused kernel memory: 114420K
> > [    2.666081] ------------[ cut here ]------------
> > [    2.666779] riscv/mm: Found insecure W+X mapping at address
> > (____ptrval____)/0xffffffe000000000
> > [    2.668004] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:200
> > note_page+0xc2/0x238
> > [    2.669147] Modules linked in:
> > [    2.669735] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > 5.9.0-rc8-00033-gfacf070a80ea #153
> > [    2.670466] epc: ffffffe00700697c ra : ffffffe00700697c sp : ffffffe0fba9bc10
> > [    2.671054]  gp : ffffffe007e73078 tp : ffffffe0fba90000 t0 :
> > ffffffe007e861d0
> > [    2.671683]  t1 : 0000000000000064 t2 : ffffffe007801664 s0 :
> > ffffffe0fba9bc60
> > [    2.672499]  s1 : ffffffe0fba9bde8 a0 : 0000000000000053 a1 :
> > 0000000000000020
> > [    2.673119]  a2 : 0000000000000000 a3 : 00000000000000f4 a4 :
> > d4328dc070ccb100
> > [    2.673729]  a5 : d4328dc070ccb100 a6 : 0000000000000000 a7 :
> > ffffffe007851e98
> > [    2.674333]  s2 : ffffffe007000000 s3 : ffffffe0fba9bde8 s4 :
> > 0000000087200000
> > [    2.674963]  s5 : 00000000000000cb s6 : 0000000000000003 s7 :
> > ffffffe007e7ac00
> > [    2.675564]  s8 : ffffffe000000000 s9 : ffffffe007000000 s10:
> > ffffffe000000000
> > [    2.676392]  s11: ffffffe040000000 t3 : 000000000001da48 t4 :
> > 000000000001da48
> > [    2.676992]  t5 : ffffffe007e74490 t6 : ffffffe007e81432
> > [    2.677449] status: 0000000000000120 badaddr: 0000000000000000
> > cause: 0000000000000003
> > [    2.678128] ---[ end trace 5f0a86dbe986db9b ]---
> > [    2.678952] Checked W+X mappings: failed, 28672 W+X pages found
> > [    2.679737] Run /init as init process
> >
>
> This would be triggered for the current kernel as well as we don't
> protect the permission for __init section at all. As you correctly
> pointed out, __init & .head sections are mapped with same permissions
> because of 2MB mappings.
>
> Currently, the entire head.text & init section have RWX permission.
> This patch is trying remove the write permission from .init.text &
> .head.text and execute permission from .init.data until kernel is
> booted.
> It doesn't provide full protection but better than the current scheme.
>
> To remove the write permission of .head.txt only, we have to keep
> .head.txt & .init.text section in separate sections. The linear
> mapping would look like this in that case.
> There are no issues as such but kernel size would increase by another 2M.
>
> ---[ Linear mapping ]---
> 0xffffffe000000000-0xffffffe000200000    0x0000000080200000         2M
> PMD     D A . . X . R V
> 0xffffffe000200000-0xffffffe000600000    0x0000000080400000         4M
> PMD     D A . . . W R V
> 0xffffffe000600000-0xffffffe000e00000    0x0000000080800000         8M
> PMD     D A . . X . R V
> 0xffffffe000e00000-0xffffffe001400000    0x0000000081000000         6M
> PMD     D A . . . . R V
> 0xffffffe001400000-0xffffffe03fe00000    0x0000000081600000      1002M
> PMD     D A . . . W R V
>
> The other solution would be move init section below text. Keep text &
> head in one section.
> The .init.text & .init.data will be in separate sections after that.
> Here is the mapping in that case.
>
> ---[ Linear mapping ]---
> 0xffffffe000000000-0xffffffe000800000    0x0000000080200000         8M
> PMD     D A . . X . R V
> 0xffffffe000800000-0xffffffe000c00000    0x0000000080a00000         4M
> PMD     D A . . . W R V
> 0xffffffe000c00000-0xffffffe001200000    0x0000000080e00000         6M
> PMD     D A . . . . R V
> 0xffffffe001200000-0xffffffe03fe00000    0x0000000081400000      1004M
> PMD     D A . . . W R V
>
> I prefer the 2nd approach compared to the first one as it saves memory
> and we can fix lockdep issue without adding arch_is_kernel_data
> to sections.h (Guo's patch).
>
> However, the 2nd approach throws this problem if
> CONFIG_HARDENED_USERCOPY is enabled.
>
> net/ipv4/ipconfig.o: in function `ic_setup_routes':
> /home/atish/workspace/linux/net/ipv4/ipconfig.c:400:(.init.text+0x1c4):
> relocation truncated to fit: R_RISCV_JAL against symbol `ip_rt_ioctl'
> defined in .text section in net/ipv4/fib_frontend.o
> make: *** [Makefile:1162: vmlinux] Error 1
>
> I am currently looking into this to understand why R_RISCV_JAL is
> generated a generic function invocation
> where auipc + jalr should have been used.
>

I checked the relocation is is correct 00000000000001d0 R_RISCV_CALL
   ip_rt_ioctl
The assembly from objdump also shows a pair of auipc + jalr.

1d0:       00000097                auipc   ra,0x0
1d4:       000080e7                jalr    ra # 1d0 <.LBE348+0x4>

I don't understand why the toolchain is complaining about relocation
error only when
CONFIG_HARDENED_USERCOPY is enabled.

Are we seeing some subtle toolchain bug ?

> > After debugging, I found it is because
> > set_memory_ro()/set_memory_nx()/... can't handle the 4KB-aligned
> > address if it is a 2MB mapping address.
> > For example, if we gave 0xffffffe000001000 and this address is in a
> > PMD mapping which is 2MB mapping, it will set the page attributes from
> > 0xffffffe000000000.
> > And this caused the CPU hotplug failed because it is in head.text section.
> >
> > #0  pageattr_pmd_entry (pmd=0xffffffe0ffdff000,
> > addr=18446743936270602240, next=18446743936270766080,
> >     walk=0xffffffe007e03e98) at arch/riscv/mm/pageattr.c:70
> > 70              pmd_t val = READ_ONCE(*pmd);
> > => 0xffffffe007005e96 <pageattr_pmd_entry+0>:   41 11   addi    sp,sp,-16
> >    0xffffffe007005e98 <pageattr_pmd_entry+2>:   22 e4   sd      s0,8(sp)
> >    0xffffffe007005e9a <pageattr_pmd_entry+4>:   00 08   addi    s0,sp,16
> >    0xffffffe007005e9c <pageattr_pmd_entry+6>:   1c 61   ld      a5,0(a0)
> > (gdb) p/x addr
> > $21 = 0xffffffe000001000
> >
> > # cat /sys/kernel/debug/kernel_page_tables
> > ---[ Fixmap start ]---
> > 0xffffffcefef00000-0xffffffceff000000    0x0000000082200000         1M
> > PTE     D A . . . W R V
> > ---[ Fixmap end ]---
> > ---[ PCI I/O start ]---
> > ---[ PCI I/O end ]---
> > ---[ vmalloc() area ]---
> > 0xffffffd000090000-0xffffffd000093000    0x00000001741bb000        12K
> > PTE     D A . . . W R V
> > 0xffffffdffffbb000-0xffffffdffffbe000    0x0000000175f97000        12K
> > PTE     D A . . . W R V
> > 0xffffffdffffcc000-0xffffffdffffcf000    0x0000000175f9a000        12K
> > PTE     D A . . . W R V
> > 0xffffffdffffdd000-0xffffffdffffe0000    0x0000000175f9d000        12K
> > PTE     D A . . . W R V
> > 0xffffffdffffee000-0xffffffdfffff1000    0x0000000175fa0000        12K
> > PTE     D A . . . W R V
> > ---[ vmalloc() end ]---
> > ---[ Linear mapping ]---
> > 0xffffffe000000000-0xffffffe007000000    0x0000000080200000       112M
> > PMD     D A . . X W R V
> > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> > 0xffffffe007000000-0xffffffe007800000    0x0000000087200000         8M
> > PMD     D A . . X . R V
> > 0xffffffe007800000-0xffffffe007e00000    0x0000000087a00000         6M
> > PMD     D A . . . . R V
> > 0xffffffe007e00000-0xffffffe0ffe00000    0x0000000088000000      3968M
> > PMD     D A . . . W R V
> > #
> >
> > Any idea?
> >
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
>
>
>
> --
> Regards,
> Atish
Greentime Hu Oct. 13, 2020, 3:08 a.m. UTC | #4
Atish Patra <atishp@atishpatra.org> 於 2020年10月13日 週二 上午9:28寫道:
>
> On Mon, Oct 12, 2020 at 4:26 PM Atish Patra <atishp@atishpatra.org> wrote:
> >
> > On Mon, Oct 12, 2020 at 6:15 AM Greentime Hu <greentime.hu@sifive.com> wrote:
> > >
> > > Atish Patra <atish.patra@wdc.com> 於 2020年10月10日 週六 上午5:13寫道:
> > > >
> > > > Currently, .init.text & .init.data are intermixed which makes it impossible
> > > > apply different permissions to them. .init.data shouldn't need exec
> > > > permissions while .init.text shouldn't have write permission.
> > > >
> > > > Keep them in separate sections so that different permissions are applied to
> > > > each section. This improves the kernel protection under
> > > > CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
> > > > entire _init section after it is freed so that those pages can be used for
> > > > other purpose.
> > > >
> > > > Signed-off-by: Atish Patra <atish.patra@wdc.com>
> > > > ---
> > > >  arch/riscv/include/asm/sections.h   |  2 ++
> > > >  arch/riscv/include/asm/set_memory.h |  2 ++
> > > >  arch/riscv/kernel/setup.c           |  4 ++++
> > > >  arch/riscv/kernel/vmlinux.lds.S     | 10 +++++++++-
> > > >  arch/riscv/mm/init.c                |  6 ++++++
> > > >  arch/riscv/mm/pageattr.c            |  6 ++++++
> > > >  6 files changed, 29 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
> > > > index d60802bfafbc..730d2c4a844d 100644
> > > > --- a/arch/riscv/include/asm/sections.h
> > > > +++ b/arch/riscv/include/asm/sections.h
> > > > @@ -10,6 +10,8 @@
> > > >  #include <asm-generic/sections.h>
> > > >  extern char _start[];
> > > >  extern char _start_kernel[];
> > > > +extern char __init_data_begin[], __init_data_end[];
> > > > +extern char __init_text_begin[], __init_text_end[];
> > > >
> > > >  extern bool init_mem_is_free;
> > > >
> > > > diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> > > > index 4cc3a4e2afd3..913429c9c1ae 100644
> > > > --- a/arch/riscv/include/asm/set_memory.h
> > > > +++ b/arch/riscv/include/asm/set_memory.h
> > > > @@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
> > > >  int set_memory_rw(unsigned long addr, int numpages);
> > > >  int set_memory_x(unsigned long addr, int numpages);
> > > >  int set_memory_nx(unsigned long addr, int numpages);
> > > > +int set_memory_default(unsigned long addr, int numpages);
> > > >  void protect_kernel_text_data(void);
> > > >  #else
> > > >  static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> > > > @@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> > > >  static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> > > >  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> > > >  static inline void protect_kernel_text_data(void) {};
> > > > +static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
> > > >  #endif
> > > >
> > > >  int set_direct_map_invalid_noflush(struct page *page);
> > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > index 4176a2affd1d..b8a35ef0eab0 100644
> > > > --- a/arch/riscv/kernel/setup.c
> > > > +++ b/arch/riscv/kernel/setup.c
> > > > @@ -129,6 +129,10 @@ bool init_mem_is_free = false;
> > > >
> > > >  void free_initmem(void)
> > > >  {
> > > > +       unsigned long init_begin = (unsigned long)__init_begin;
> > > > +       unsigned long init_end = (unsigned long)__init_end;
> > > > +
> > > > +       set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
> > > >         free_initmem_default(POISON_FREE_INITMEM);
> > > >         init_mem_is_free = true;
> > > >  }
> > > > diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> > > > index 0807633f0dc8..15b9882588ae 100644
> > > > --- a/arch/riscv/kernel/vmlinux.lds.S
> > > > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > > > @@ -30,8 +30,8 @@ SECTIONS
> > > >         . = ALIGN(PAGE_SIZE);
> > > >
> > > >         __init_begin = .;
> > > > +       __init_text_begin = .;
> > > >         INIT_TEXT_SECTION(PAGE_SIZE)
> > > > -       INIT_DATA_SECTION(16)
> > > >         . = ALIGN(8);
> > > >         __soc_early_init_table : {
> > > >                 __soc_early_init_table_start = .;
> > > > @@ -48,11 +48,19 @@ SECTIONS
> > > >         {
> > > >                 EXIT_TEXT
> > > >         }
> > > > +
> > > > +       __init_text_end = .;
> > > > +       . = ALIGN(SECTION_ALIGN);
> > > > +       /* Start of init data section */
> > > > +       __init_data_begin = .;
> > > > +       INIT_DATA_SECTION(16)
> > > >         .exit.data :
> > > >         {
> > > >                 EXIT_DATA
> > > >         }
> > > >         PERCPU_SECTION(L1_CACHE_BYTES)
> > > > +
> > > > +       __init_data_end = .;
> > > >         __init_end = .;
> > > >
> > > >         . = ALIGN(SECTION_ALIGN);
> > > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > > > index 7859a1d1b34d..3ef0eafcc7c7 100644
> > > > --- a/arch/riscv/mm/init.c
> > > > +++ b/arch/riscv/mm/init.c
> > > > @@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
> > > >  {
> > > >         unsigned long text_start = (unsigned long)_text;
> > > >         unsigned long text_end = (unsigned long)_etext;
> > > > +       unsigned long init_text_start = (unsigned long)__init_text_begin;
> > > > +       unsigned long init_text_end = (unsigned long)__init_text_end;
> > > > +       unsigned long init_data_start = (unsigned long)__init_data_begin;
> > > > +       unsigned long init_data_end = (unsigned long)__init_data_end;
> > > >         unsigned long rodata_start = (unsigned long)__start_rodata;
> > > >         unsigned long data_start = (unsigned long)_data;
> > > >         unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
> > > >
> > > > +       set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
> > > >         set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
> > > > +       set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
> > > >         set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
> > > >         set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
> > > >  }
> > > > diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> > > > index 19fecb362d81..aecedaf086ab 100644
> > > > --- a/arch/riscv/mm/pageattr.c
> > > > +++ b/arch/riscv/mm/pageattr.c
> > > > @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> > > >         return ret;
> > > >  }
> > > >
> > > > +int set_memory_default(unsigned long addr, int numpages)
> > > > +{
> > > > +       return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
> > > > +                           __pgprot(0));
> > > > +}
> > > > +
> > > >  int set_memory_ro(unsigned long addr, int numpages)
> > > >  {
> > > >         return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
> > >
> > >
> > > Hi Atish,
> > >
> > > I tested this patchset and CONFIG_DEBUG_WX=y
> > >
> >
> > Thanks for testing the patch.
> >
> > > [    2.664012] Freeing unused kernel memory: 114420K
> > > [    2.666081] ------------[ cut here ]------------
> > > [    2.666779] riscv/mm: Found insecure W+X mapping at address
> > > (____ptrval____)/0xffffffe000000000
> > > [    2.668004] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:200
> > > note_page+0xc2/0x238
> > > [    2.669147] Modules linked in:
> > > [    2.669735] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > > 5.9.0-rc8-00033-gfacf070a80ea #153
> > > [    2.670466] epc: ffffffe00700697c ra : ffffffe00700697c sp : ffffffe0fba9bc10
> > > [    2.671054]  gp : ffffffe007e73078 tp : ffffffe0fba90000 t0 :
> > > ffffffe007e861d0
> > > [    2.671683]  t1 : 0000000000000064 t2 : ffffffe007801664 s0 :
> > > ffffffe0fba9bc60
> > > [    2.672499]  s1 : ffffffe0fba9bde8 a0 : 0000000000000053 a1 :
> > > 0000000000000020
> > > [    2.673119]  a2 : 0000000000000000 a3 : 00000000000000f4 a4 :
> > > d4328dc070ccb100
> > > [    2.673729]  a5 : d4328dc070ccb100 a6 : 0000000000000000 a7 :
> > > ffffffe007851e98
> > > [    2.674333]  s2 : ffffffe007000000 s3 : ffffffe0fba9bde8 s4 :
> > > 0000000087200000
> > > [    2.674963]  s5 : 00000000000000cb s6 : 0000000000000003 s7 :
> > > ffffffe007e7ac00
> > > [    2.675564]  s8 : ffffffe000000000 s9 : ffffffe007000000 s10:
> > > ffffffe000000000
> > > [    2.676392]  s11: ffffffe040000000 t3 : 000000000001da48 t4 :
> > > 000000000001da48
> > > [    2.676992]  t5 : ffffffe007e74490 t6 : ffffffe007e81432
> > > [    2.677449] status: 0000000000000120 badaddr: 0000000000000000
> > > cause: 0000000000000003
> > > [    2.678128] ---[ end trace 5f0a86dbe986db9b ]---
> > > [    2.678952] Checked W+X mappings: failed, 28672 W+X pages found
> > > [    2.679737] Run /init as init process
> > >
> >
> > This would be triggered for the current kernel as well as we don't
> > protect the permission for __init section at all. As you correctly
> > pointed out, __init & .head sections are mapped with same permissions
> > because of 2MB mappings.
> >
> > Currently, the entire head.text & init section have RWX permission.
> > This patch is trying remove the write permission from .init.text &
> > .head.text and execute permission from .init.data until kernel is
> > booted.
> > It doesn't provide full protection but better than the current scheme.
> >
> > To remove the write permission of .head.txt only, we have to keep
> > .head.txt & .init.text section in separate sections. The linear
> > mapping would look like this in that case.
> > There are no issues as such but kernel size would increase by another 2M.
> >
> > ---[ Linear mapping ]---
> > 0xffffffe000000000-0xffffffe000200000    0x0000000080200000         2M
> > PMD     D A . . X . R V
> > 0xffffffe000200000-0xffffffe000600000    0x0000000080400000         4M
> > PMD     D A . . . W R V
> > 0xffffffe000600000-0xffffffe000e00000    0x0000000080800000         8M
> > PMD     D A . . X . R V
> > 0xffffffe000e00000-0xffffffe001400000    0x0000000081000000         6M
> > PMD     D A . . . . R V
> > 0xffffffe001400000-0xffffffe03fe00000    0x0000000081600000      1002M
> > PMD     D A . . . W R V
> >
> > The other solution would be move init section below text. Keep text &
> > head in one section.
> > The .init.text & .init.data will be in separate sections after that.
> > Here is the mapping in that case.
> >
> > ---[ Linear mapping ]---
> > 0xffffffe000000000-0xffffffe000800000    0x0000000080200000         8M
> > PMD     D A . . X . R V
> > 0xffffffe000800000-0xffffffe000c00000    0x0000000080a00000         4M
> > PMD     D A . . . W R V
> > 0xffffffe000c00000-0xffffffe001200000    0x0000000080e00000         6M
> > PMD     D A . . . . R V
> > 0xffffffe001200000-0xffffffe03fe00000    0x0000000081400000      1004M
> > PMD     D A . . . W R V
> >
> > I prefer the 2nd approach compared to the first one as it saves memory
> > and we can fix lockdep issue without adding arch_is_kernel_data
> > to sections.h (Guo's patch).
> >
> > However, the 2nd approach throws this problem if
> > CONFIG_HARDENED_USERCOPY is enabled.
> >
> > net/ipv4/ipconfig.o: in function `ic_setup_routes':
> > /home/atish/workspace/linux/net/ipv4/ipconfig.c:400:(.init.text+0x1c4):
> > relocation truncated to fit: R_RISCV_JAL against symbol `ip_rt_ioctl'
> > defined in .text section in net/ipv4/fib_frontend.o
> > make: *** [Makefile:1162: vmlinux] Error 1
> >
> > I am currently looking into this to understand why R_RISCV_JAL is
> > generated a generic function invocation
> > where auipc + jalr should have been used.
> >
>
> I checked the relocation is is correct 00000000000001d0 R_RISCV_CALL
>    ip_rt_ioctl
> The assembly from objdump also shows a pair of auipc + jalr.
>
> 1d0:       00000097                auipc   ra,0x0
> 1d4:       000080e7                jalr    ra # 1d0 <.LBE348+0x4>
>
> I don't understand why the toolchain is complaining about relocation
> error only when
> CONFIG_HARDENED_USERCOPY is enabled.
>
> Are we seeing some subtle toolchain bug ?
>
"relocation truncated to fit" usually means that the jump address is
too far to jump.
Maybe trying "-mno-relax -Wl,--no-relax" to disable linker relax or
checking if somewhere just using jal instead of call?
Atish Patra Oct. 13, 2020, 10:25 p.m. UTC | #5
On Mon, Oct 12, 2020 at 8:08 PM Greentime Hu <greentime.hu@sifive.com> wrote:
>
> Atish Patra <atishp@atishpatra.org> 於 2020年10月13日 週二 上午9:28寫道:
> >
> > On Mon, Oct 12, 2020 at 4:26 PM Atish Patra <atishp@atishpatra.org> wrote:
> > >
> > > On Mon, Oct 12, 2020 at 6:15 AM Greentime Hu <greentime.hu@sifive.com> wrote:
> > > >
> > > > Atish Patra <atish.patra@wdc.com> 於 2020年10月10日 週六 上午5:13寫道:
> > > > >
> > > > > Currently, .init.text & .init.data are intermixed which makes it impossible
> > > > > apply different permissions to them. .init.data shouldn't need exec
> > > > > permissions while .init.text shouldn't have write permission.
> > > > >
> > > > > Keep them in separate sections so that different permissions are applied to
> > > > > each section. This improves the kernel protection under
> > > > > CONFIG_STRICT_KERNEL_RWX. We also need to restore the permissions for the
> > > > > entire _init section after it is freed so that those pages can be used for
> > > > > other purpose.
> > > > >
> > > > > Signed-off-by: Atish Patra <atish.patra@wdc.com>
> > > > > ---
> > > > >  arch/riscv/include/asm/sections.h   |  2 ++
> > > > >  arch/riscv/include/asm/set_memory.h |  2 ++
> > > > >  arch/riscv/kernel/setup.c           |  4 ++++
> > > > >  arch/riscv/kernel/vmlinux.lds.S     | 10 +++++++++-
> > > > >  arch/riscv/mm/init.c                |  6 ++++++
> > > > >  arch/riscv/mm/pageattr.c            |  6 ++++++
> > > > >  6 files changed, 29 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
> > > > > index d60802bfafbc..730d2c4a844d 100644
> > > > > --- a/arch/riscv/include/asm/sections.h
> > > > > +++ b/arch/riscv/include/asm/sections.h
> > > > > @@ -10,6 +10,8 @@
> > > > >  #include <asm-generic/sections.h>
> > > > >  extern char _start[];
> > > > >  extern char _start_kernel[];
> > > > > +extern char __init_data_begin[], __init_data_end[];
> > > > > +extern char __init_text_begin[], __init_text_end[];
> > > > >
> > > > >  extern bool init_mem_is_free;
> > > > >
> > > > > diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
> > > > > index 4cc3a4e2afd3..913429c9c1ae 100644
> > > > > --- a/arch/riscv/include/asm/set_memory.h
> > > > > +++ b/arch/riscv/include/asm/set_memory.h
> > > > > @@ -15,6 +15,7 @@ int set_memory_ro(unsigned long addr, int numpages);
> > > > >  int set_memory_rw(unsigned long addr, int numpages);
> > > > >  int set_memory_x(unsigned long addr, int numpages);
> > > > >  int set_memory_nx(unsigned long addr, int numpages);
> > > > > +int set_memory_default(unsigned long addr, int numpages);
> > > > >  void protect_kernel_text_data(void);
> > > > >  #else
> > > > >  static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
> > > > > @@ -22,6 +23,7 @@ static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
> > > > >  static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
> > > > >  static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
> > > > >  static inline void protect_kernel_text_data(void) {};
> > > > > +static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
> > > > >  #endif
> > > > >
> > > > >  int set_direct_map_invalid_noflush(struct page *page);
> > > > > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> > > > > index 4176a2affd1d..b8a35ef0eab0 100644
> > > > > --- a/arch/riscv/kernel/setup.c
> > > > > +++ b/arch/riscv/kernel/setup.c
> > > > > @@ -129,6 +129,10 @@ bool init_mem_is_free = false;
> > > > >
> > > > >  void free_initmem(void)
> > > > >  {
> > > > > +       unsigned long init_begin = (unsigned long)__init_begin;
> > > > > +       unsigned long init_end = (unsigned long)__init_end;
> > > > > +
> > > > > +       set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
> > > > >         free_initmem_default(POISON_FREE_INITMEM);
> > > > >         init_mem_is_free = true;
> > > > >  }
> > > > > diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> > > > > index 0807633f0dc8..15b9882588ae 100644
> > > > > --- a/arch/riscv/kernel/vmlinux.lds.S
> > > > > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > > > > @@ -30,8 +30,8 @@ SECTIONS
> > > > >         . = ALIGN(PAGE_SIZE);
> > > > >
> > > > >         __init_begin = .;
> > > > > +       __init_text_begin = .;
> > > > >         INIT_TEXT_SECTION(PAGE_SIZE)
> > > > > -       INIT_DATA_SECTION(16)
> > > > >         . = ALIGN(8);
> > > > >         __soc_early_init_table : {
> > > > >                 __soc_early_init_table_start = .;
> > > > > @@ -48,11 +48,19 @@ SECTIONS
> > > > >         {
> > > > >                 EXIT_TEXT
> > > > >         }
> > > > > +
> > > > > +       __init_text_end = .;
> > > > > +       . = ALIGN(SECTION_ALIGN);
> > > > > +       /* Start of init data section */
> > > > > +       __init_data_begin = .;
> > > > > +       INIT_DATA_SECTION(16)
> > > > >         .exit.data :
> > > > >         {
> > > > >                 EXIT_DATA
> > > > >         }
> > > > >         PERCPU_SECTION(L1_CACHE_BYTES)
> > > > > +
> > > > > +       __init_data_end = .;
> > > > >         __init_end = .;
> > > > >
> > > > >         . = ALIGN(SECTION_ALIGN);
> > > > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > > > > index 7859a1d1b34d..3ef0eafcc7c7 100644
> > > > > --- a/arch/riscv/mm/init.c
> > > > > +++ b/arch/riscv/mm/init.c
> > > > > @@ -627,11 +627,17 @@ void protect_kernel_text_data(void)
> > > > >  {
> > > > >         unsigned long text_start = (unsigned long)_text;
> > > > >         unsigned long text_end = (unsigned long)_etext;
> > > > > +       unsigned long init_text_start = (unsigned long)__init_text_begin;
> > > > > +       unsigned long init_text_end = (unsigned long)__init_text_end;
> > > > > +       unsigned long init_data_start = (unsigned long)__init_data_begin;
> > > > > +       unsigned long init_data_end = (unsigned long)__init_data_end;
> > > > >         unsigned long rodata_start = (unsigned long)__start_rodata;
> > > > >         unsigned long data_start = (unsigned long)_data;
> > > > >         unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
> > > > >
> > > > > +       set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
> > > > >         set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
> > > > > +       set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
> > > > >         set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
> > > > >         set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
> > > > >  }
> > > > > diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
> > > > > index 19fecb362d81..aecedaf086ab 100644
> > > > > --- a/arch/riscv/mm/pageattr.c
> > > > > +++ b/arch/riscv/mm/pageattr.c
> > > > > @@ -128,6 +128,12 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
> > > > >         return ret;
> > > > >  }
> > > > >
> > > > > +int set_memory_default(unsigned long addr, int numpages)
> > > > > +{
> > > > > +       return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
> > > > > +                           __pgprot(0));
> > > > > +}
> > > > > +
> > > > >  int set_memory_ro(unsigned long addr, int numpages)
> > > > >  {
> > > > >         return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
> > > >
> > > >
> > > > Hi Atish,
> > > >
> > > > I tested this patchset and CONFIG_DEBUG_WX=y
> > > >
> > >
> > > Thanks for testing the patch.
> > >
> > > > [    2.664012] Freeing unused kernel memory: 114420K
> > > > [    2.666081] ------------[ cut here ]------------
> > > > [    2.666779] riscv/mm: Found insecure W+X mapping at address
> > > > (____ptrval____)/0xffffffe000000000
> > > > [    2.668004] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:200
> > > > note_page+0xc2/0x238
> > > > [    2.669147] Modules linked in:
> > > > [    2.669735] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
> > > > 5.9.0-rc8-00033-gfacf070a80ea #153
> > > > [    2.670466] epc: ffffffe00700697c ra : ffffffe00700697c sp : ffffffe0fba9bc10
> > > > [    2.671054]  gp : ffffffe007e73078 tp : ffffffe0fba90000 t0 :
> > > > ffffffe007e861d0
> > > > [    2.671683]  t1 : 0000000000000064 t2 : ffffffe007801664 s0 :
> > > > ffffffe0fba9bc60
> > > > [    2.672499]  s1 : ffffffe0fba9bde8 a0 : 0000000000000053 a1 :
> > > > 0000000000000020
> > > > [    2.673119]  a2 : 0000000000000000 a3 : 00000000000000f4 a4 :
> > > > d4328dc070ccb100
> > > > [    2.673729]  a5 : d4328dc070ccb100 a6 : 0000000000000000 a7 :
> > > > ffffffe007851e98
> > > > [    2.674333]  s2 : ffffffe007000000 s3 : ffffffe0fba9bde8 s4 :
> > > > 0000000087200000
> > > > [    2.674963]  s5 : 00000000000000cb s6 : 0000000000000003 s7 :
> > > > ffffffe007e7ac00
> > > > [    2.675564]  s8 : ffffffe000000000 s9 : ffffffe007000000 s10:
> > > > ffffffe000000000
> > > > [    2.676392]  s11: ffffffe040000000 t3 : 000000000001da48 t4 :
> > > > 000000000001da48
> > > > [    2.676992]  t5 : ffffffe007e74490 t6 : ffffffe007e81432
> > > > [    2.677449] status: 0000000000000120 badaddr: 0000000000000000
> > > > cause: 0000000000000003
> > > > [    2.678128] ---[ end trace 5f0a86dbe986db9b ]---
> > > > [    2.678952] Checked W+X mappings: failed, 28672 W+X pages found
> > > > [    2.679737] Run /init as init process
> > > >
> > >
> > > This would be triggered for the current kernel as well as we don't
> > > protect the permission for __init section at all. As you correctly
> > > pointed out, __init & .head sections are mapped with same permissions
> > > because of 2MB mappings.
> > >
> > > Currently, the entire head.text & init section have RWX permission.
> > > This patch is trying remove the write permission from .init.text &
> > > .head.text and execute permission from .init.data until kernel is
> > > booted.
> > > It doesn't provide full protection but better than the current scheme.
> > >
> > > To remove the write permission of .head.txt only, we have to keep
> > > .head.txt & .init.text section in separate sections. The linear
> > > mapping would look like this in that case.
> > > There are no issues as such but kernel size would increase by another 2M.
> > >
> > > ---[ Linear mapping ]---
> > > 0xffffffe000000000-0xffffffe000200000    0x0000000080200000         2M
> > > PMD     D A . . X . R V
> > > 0xffffffe000200000-0xffffffe000600000    0x0000000080400000         4M
> > > PMD     D A . . . W R V
> > > 0xffffffe000600000-0xffffffe000e00000    0x0000000080800000         8M
> > > PMD     D A . . X . R V
> > > 0xffffffe000e00000-0xffffffe001400000    0x0000000081000000         6M
> > > PMD     D A . . . . R V
> > > 0xffffffe001400000-0xffffffe03fe00000    0x0000000081600000      1002M
> > > PMD     D A . . . W R V
> > >
> > > The other solution would be move init section below text. Keep text &
> > > head in one section.
> > > The .init.text & .init.data will be in separate sections after that.
> > > Here is the mapping in that case.
> > >
> > > ---[ Linear mapping ]---
> > > 0xffffffe000000000-0xffffffe000800000    0x0000000080200000         8M
> > > PMD     D A . . X . R V
> > > 0xffffffe000800000-0xffffffe000c00000    0x0000000080a00000         4M
> > > PMD     D A . . . W R V
> > > 0xffffffe000c00000-0xffffffe001200000    0x0000000080e00000         6M
> > > PMD     D A . . . . R V
> > > 0xffffffe001200000-0xffffffe03fe00000    0x0000000081400000      1004M
> > > PMD     D A . . . W R V
> > >
> > > I prefer the 2nd approach compared to the first one as it saves memory
> > > and we can fix lockdep issue without adding arch_is_kernel_data
> > > to sections.h (Guo's patch).
> > >
> > > However, the 2nd approach throws this problem if
> > > CONFIG_HARDENED_USERCOPY is enabled.
> > >
> > > net/ipv4/ipconfig.o: in function `ic_setup_routes':
> > > /home/atish/workspace/linux/net/ipv4/ipconfig.c:400:(.init.text+0x1c4):
> > > relocation truncated to fit: R_RISCV_JAL against symbol `ip_rt_ioctl'
> > > defined in .text section in net/ipv4/fib_frontend.o
> > > make: *** [Makefile:1162: vmlinux] Error 1
> > >
> > > I am currently looking into this to understand why R_RISCV_JAL is
> > > generated a generic function invocation
> > > where auipc + jalr should have been used.
> > >
> >
> > I checked the relocation is is correct 00000000000001d0 R_RISCV_CALL
> >    ip_rt_ioctl
> > The assembly from objdump also shows a pair of auipc + jalr.
> >
> > 1d0:       00000097                auipc   ra,0x0
> > 1d4:       000080e7                jalr    ra # 1d0 <.LBE348+0x4>
> >
> > I don't understand why the toolchain is complaining about relocation
> > error only when
> > CONFIG_HARDENED_USERCOPY is enabled.
> >
> > Are we seeing some subtle toolchain bug ?
> >
> "relocation truncated to fit" usually means that the jump address is
> too far to jump.
> Maybe trying "-mno-relax -Wl,--no-relax" to disable linker relax or
> checking if somewhere just using jal instead of call?

Yes. As I said the relocation id seems correct from the objdump.
I narrowed down the issue to a __builtin_constant_p

Here is the code path:
ip_rt_ioctl->rtentry_to_fib_config->copy_from_user->check_copy_size->check_object_size

#ifdef CONFIG_HARDENED_USERCOPY
112 extern void __check_object_size(const void *ptr, unsigned long n,
113                                         bool to_user);
114
115 static __always_inline void check_object_size(const void *ptr,
unsigned long n,
116                                               bool to_user)
117 {
118         if (!__builtin_constant_p(n))
---------------------> Commenting this line avoids the linker issue

                                                               119
            __check_object_size(ptr, n, to_user);
120 }
121 #else
122 static inline void check_object_size(const void *ptr, unsigned
long n,
123                                      bool to_user)
124 { }
125 #endif /* CONFIG_HARDENED_USERCOPY */

This happens only when copy_from_user is called from function that is
annotated with __init.
Adding Kito & Jim for their input

@kito, @Jim: Please let me know if I should create a issue in
riscv-gnu-toolchain repo or somewhere else.
Jim Wilson Oct. 14, 2020, 1:20 a.m. UTC | #6
On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <atishp@atishpatra.org> wrote:
> This happens only when copy_from_user is called from function that is
> annotated with __init.
> Adding Kito & Jim for their input
>
> @kito, @Jim: Please let me know if I should create a issue in
> riscv-gnu-toolchain repo or somewhere else.

I can't do anything useful without a testcase that I can use to
reproduce the problem.  The interactions here are complex, so pointing
at lines of code or kernel config options doesn't give me any useful
info.

Relaxation can convert calls to a jal.  I don't know of any open bugs
in this area that can generate relocation errors.  if it is a
relaxation error then turning off relaxation should work around the
problem as you suggested.

A kernel build problem is serious.  I think this is worth a bug
report.  FSF binutils or riscv-gnu-toolchain is fine.

Jim
Atish Patra Oct. 14, 2020, 5:24 a.m. UTC | #7
On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <jimw@sifive.com> wrote:
>
> On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <atishp@atishpatra.org> wrote:
> > This happens only when copy_from_user is called from function that is
> > annotated with __init.
> > Adding Kito & Jim for their input
> >
> > @kito, @Jim: Please let me know if I should create a issue in
> > riscv-gnu-toolchain repo or somewhere else.
>
> I can't do anything useful without a testcase that I can use to
> reproduce the problem.  The interactions here are complex, so pointing
> at lines of code or kernel config options doesn't give me any useful
> info.
>
> Relaxation can convert calls to a jal.  I don't know of any open bugs
> in this area that can generate relocation errors.  if it is a
> relaxation error then turning off relaxation should work around the
> problem as you suggested.
>
> A kernel build problem is serious.  I think this is worth a bug
> report.  FSF binutils or riscv-gnu-toolchain is fine.
>

I have created an issue with detailed descriptions and reproduction steps.
Please let me know if you need anything else.

> Jim
Atish Patra Oct. 16, 2020, 6:24 p.m. UTC | #8
On Tue, Oct 13, 2020 at 10:24 PM Atish Patra <atishp@atishpatra.org> wrote:
>
> On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <jimw@sifive.com> wrote:
> >
> > On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <atishp@atishpatra.org> wrote:
> > > This happens only when copy_from_user is called from function that is
> > > annotated with __init.
> > > Adding Kito & Jim for their input
> > >
> > > @kito, @Jim: Please let me know if I should create a issue in
> > > riscv-gnu-toolchain repo or somewhere else.
> >
> > I can't do anything useful without a testcase that I can use to
> > reproduce the problem.  The interactions here are complex, so pointing
> > at lines of code or kernel config options doesn't give me any useful
> > info.
> >
> > Relaxation can convert calls to a jal.  I don't know of any open bugs
> > in this area that can generate relocation errors.  if it is a
> > relaxation error then turning off relaxation should work around the
> > problem as you suggested.
> >
> > A kernel build problem is serious.  I think this is worth a bug
> > report.  FSF binutils or riscv-gnu-toolchain is fine.
> >
>
> I have created an issue with detailed descriptions and reproduction steps.
> Please let me know if you need anything else.
>

It may be a toolchain issue. Here is the ongoing discussion in case
anybody else is interested.

https://github.com/riscv/riscv-gnu-toolchain/issues/738

> > Jim
>
>
>
> --
> Regards,
> Atish
Atish Patra Oct. 22, 2020, 1:31 a.m. UTC | #9
On Fri, Oct 16, 2020 at 11:24 AM Atish Patra <atishp@atishpatra.org> wrote:
>
> On Tue, Oct 13, 2020 at 10:24 PM Atish Patra <atishp@atishpatra.org> wrote:
> >
> > On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <jimw@sifive.com> wrote:
> > >
> > > On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <atishp@atishpatra.org> wrote:
> > > > This happens only when copy_from_user is called from function that is
> > > > annotated with __init.
> > > > Adding Kito & Jim for their input
> > > >
> > > > @kito, @Jim: Please let me know if I should create a issue in
> > > > riscv-gnu-toolchain repo or somewhere else.
> > >
> > > I can't do anything useful without a testcase that I can use to
> > > reproduce the problem.  The interactions here are complex, so pointing
> > > at lines of code or kernel config options doesn't give me any useful
> > > info.
> > >
> > > Relaxation can convert calls to a jal.  I don't know of any open bugs
> > > in this area that can generate relocation errors.  if it is a
> > > relaxation error then turning off relaxation should work around the
> > > problem as you suggested.
> > >
> > > A kernel build problem is serious.  I think this is worth a bug
> > > report.  FSF binutils or riscv-gnu-toolchain is fine.
> > >
> >
> > I have created an issue with detailed descriptions and reproduction steps.
> > Please let me know if you need anything else.
> >
>
> It may be a toolchain issue. Here is the ongoing discussion in case
> anybody else is interested.
>
> https://github.com/riscv/riscv-gnu-toolchain/issues/738
>
> > > Jim
> >
> >
> >
> > --
> > Regards,
> > Atish
>
>
>
> --
> Regards,
> Atish

Thanks to Jim, we know the cause now. Jim has provided an excellent
analysis of the issue in the github issue report.
https://github.com/riscv/riscv-gnu-toolchain/issues/738

To summarize, the linker relaxation code is not aware of the
alignments between sections.
That's why it relaxes the calls from .text to .init.text and  converts
a auipc+jalr pair to jal even if the address can't be fit +/- 1MB.

There are few ways we can solve this problem.

1. As per Jim's suggestion, linker relaxation code is aware of the
section alignments. We can mark .init.text as a 2MB aligned section.
   For calls within a section, section's alignment will be used in the
calculation. For calls across sections, e.g. from .init.text to .text,
the maximum
   section alignment of every section will be used. Thus, all
relaxation within .init.text and between any sections will be
impacted.
   Thus, it avoids the error but results in the following increase in
size of various sections.
     section           change in size (in bytes)
     .head.text      +4
     .text               +40
     .init.text.        +6530
     .exit.text        +84

The only significant increase is .init.text but it is freed after
boot. Thus, I don't see any significant performance degradation due to
that.

Here is the diff
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -51,7 +51,13 @@ SECTIONS
        . = ALIGN(SECTION_ALIGN);
        __init_begin = .;
        __init_text_begin = .;
-       INIT_TEXT_SECTION(PAGE_SIZE)
+       . = ALIGN(PAGE_SIZE);                                   \
+       .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
ALIGN(SECTION_ALIGN) {  \
+               _sinittext = .;                                         \
+               INIT_TEXT                                               \
+               _einittext = .;                                         \
+       }
+
        . = ALIGN(8);
        __soc_early_init_table : {
                __soc_early_init_table_start = .;

2. We will continue to keep head.txt & .init.text together before
.text. However, we will map the pages that contain head & init.text at
page
    granularity so that .head.text and init.text can have different
permissions. I have not measured the performance impact of this but it
won't
    too bad given that the combined size of sections .head.txt &
.init.text is 200K. So we are talking about page level permission only
for
    ~50 pages during boot.

3. Keep head.text in a separate 2MB aligned section. .init.text will
follow .head.text in its own section as well. This increases the
kernel
    size by 2MB for MMU kernels. For nommu case, it will only increase
by 64 bytes due to smaller section alignment for nommu kernels.

Both solutions 1 & 2 come at minimal performance on boot time while
solution 3 comes at increased kernel size.

Any preference ?
Anup Patel Oct. 22, 2020, 5:03 a.m. UTC | #10
On Thu, Oct 22, 2020 at 7:01 AM Atish Patra <atishp@atishpatra.org> wrote:
>
> On Fri, Oct 16, 2020 at 11:24 AM Atish Patra <atishp@atishpatra.org> wrote:
> >
> > On Tue, Oct 13, 2020 at 10:24 PM Atish Patra <atishp@atishpatra.org> wrote:
> > >
> > > On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <jimw@sifive.com> wrote:
> > > >
> > > > On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <atishp@atishpatra.org> wrote:
> > > > > This happens only when copy_from_user is called from function that is
> > > > > annotated with __init.
> > > > > Adding Kito & Jim for their input
> > > > >
> > > > > @kito, @Jim: Please let me know if I should create a issue in
> > > > > riscv-gnu-toolchain repo or somewhere else.
> > > >
> > > > I can't do anything useful without a testcase that I can use to
> > > > reproduce the problem.  The interactions here are complex, so pointing
> > > > at lines of code or kernel config options doesn't give me any useful
> > > > info.
> > > >
> > > > Relaxation can convert calls to a jal.  I don't know of any open bugs
> > > > in this area that can generate relocation errors.  if it is a
> > > > relaxation error then turning off relaxation should work around the
> > > > problem as you suggested.
> > > >
> > > > A kernel build problem is serious.  I think this is worth a bug
> > > > report.  FSF binutils or riscv-gnu-toolchain is fine.
> > > >
> > >
> > > I have created an issue with detailed descriptions and reproduction steps.
> > > Please let me know if you need anything else.
> > >
> >
> > It may be a toolchain issue. Here is the ongoing discussion in case
> > anybody else is interested.
> >
> > https://github.com/riscv/riscv-gnu-toolchain/issues/738
> >
> > > > Jim
> > >
> > >
> > >
> > > --
> > > Regards,
> > > Atish
> >
> >
> >
> > --
> > Regards,
> > Atish
>
> Thanks to Jim, we know the cause now. Jim has provided an excellent
> analysis of the issue in the github issue report.
> https://github.com/riscv/riscv-gnu-toolchain/issues/738
>
> To summarize, the linker relaxation code is not aware of the
> alignments between sections.
> That's why it relaxes the calls from .text to .init.text and  converts
> a auipc+jalr pair to jal even if the address can't be fit +/- 1MB.
>
> There are few ways we can solve this problem.
>
> 1. As per Jim's suggestion, linker relaxation code is aware of the
> section alignments. We can mark .init.text as a 2MB aligned section.
>    For calls within a section, section's alignment will be used in the
> calculation. For calls across sections, e.g. from .init.text to .text,
> the maximum
>    section alignment of every section will be used. Thus, all
> relaxation within .init.text and between any sections will be
> impacted.
>    Thus, it avoids the error but results in the following increase in
> size of various sections.
>      section           change in size (in bytes)
>      .head.text      +4
>      .text               +40
>      .init.text.        +6530
>      .exit.text        +84
>
> The only significant increase is .init.text but it is freed after
> boot. Thus, I don't see any significant performance degradation due to
> that.
>
> Here is the diff
> --- a/arch/riscv/kernel/vmlinux.lds.S
> +++ b/arch/riscv/kernel/vmlinux.lds.S
> @@ -51,7 +51,13 @@ SECTIONS
>         . = ALIGN(SECTION_ALIGN);
>         __init_begin = .;
>         __init_text_begin = .;
> -       INIT_TEXT_SECTION(PAGE_SIZE)
> +       . = ALIGN(PAGE_SIZE);                                   \
> +       .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
> ALIGN(SECTION_ALIGN) {  \
> +               _sinittext = .;                                         \
> +               INIT_TEXT                                               \
> +               _einittext = .;                                         \
> +       }
> +
>         . = ALIGN(8);
>         __soc_early_init_table : {
>                 __soc_early_init_table_start = .;
>
> 2. We will continue to keep head.txt & .init.text together before
> .text. However, we will map the pages that contain head & init.text at
> page
>     granularity so that .head.text and init.text can have different
> permissions. I have not measured the performance impact of this but it
> won't
>     too bad given that the combined size of sections .head.txt &
> .init.text is 200K. So we are talking about page level permission only
> for
>     ~50 pages during boot.
>
> 3. Keep head.text in a separate 2MB aligned section. .init.text will
> follow .head.text in its own section as well. This increases the
> kernel
>     size by 2MB for MMU kernels. For nommu case, it will only increase
> by 64 bytes due to smaller section alignment for nommu kernels.
>
> Both solutions 1 & 2 come at minimal performance on boot time while
> solution 3 comes at increased kernel size.
>
> Any preference ?

I prefer solution#3 because:
1. This will help us avoid special handling of static objects
2.  This will make RISC-V linker script more aligned with other
     major architectures

I don't think solution#3 will increase the size of the kernel by 2MB. We
can make head.text part of text section. There will be only one section
alignment between text section and init section but the existing section
alignment between init section and text section will be removed. In other
words, number of section alignments will remain same.

Regards,
Anup
Anup Patel Oct. 22, 2020, 7:22 a.m. UTC | #11
On Thu, Oct 22, 2020 at 10:33 AM Anup Patel <anup@brainfault.org> wrote:
>
> On Thu, Oct 22, 2020 at 7:01 AM Atish Patra <atishp@atishpatra.org> wrote:
> >
> > On Fri, Oct 16, 2020 at 11:24 AM Atish Patra <atishp@atishpatra.org> wrote:
> > >
> > > On Tue, Oct 13, 2020 at 10:24 PM Atish Patra <atishp@atishpatra.org> wrote:
> > > >
> > > > On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <jimw@sifive.com> wrote:
> > > > >
> > > > > On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <atishp@atishpatra.org> wrote:
> > > > > > This happens only when copy_from_user is called from function that is
> > > > > > annotated with __init.
> > > > > > Adding Kito & Jim for their input
> > > > > >
> > > > > > @kito, @Jim: Please let me know if I should create a issue in
> > > > > > riscv-gnu-toolchain repo or somewhere else.
> > > > >
> > > > > I can't do anything useful without a testcase that I can use to
> > > > > reproduce the problem.  The interactions here are complex, so pointing
> > > > > at lines of code or kernel config options doesn't give me any useful
> > > > > info.
> > > > >
> > > > > Relaxation can convert calls to a jal.  I don't know of any open bugs
> > > > > in this area that can generate relocation errors.  if it is a
> > > > > relaxation error then turning off relaxation should work around the
> > > > > problem as you suggested.
> > > > >
> > > > > A kernel build problem is serious.  I think this is worth a bug
> > > > > report.  FSF binutils or riscv-gnu-toolchain is fine.
> > > > >
> > > >
> > > > I have created an issue with detailed descriptions and reproduction steps.
> > > > Please let me know if you need anything else.
> > > >
> > >
> > > It may be a toolchain issue. Here is the ongoing discussion in case
> > > anybody else is interested.
> > >
> > > https://github.com/riscv/riscv-gnu-toolchain/issues/738
> > >
> > > > > Jim
> > > >
> > > >
> > > >
> > > > --
> > > > Regards,
> > > > Atish
> > >
> > >
> > >
> > > --
> > > Regards,
> > > Atish
> >
> > Thanks to Jim, we know the cause now. Jim has provided an excellent
> > analysis of the issue in the github issue report.
> > https://github.com/riscv/riscv-gnu-toolchain/issues/738
> >
> > To summarize, the linker relaxation code is not aware of the
> > alignments between sections.
> > That's why it relaxes the calls from .text to .init.text and  converts
> > a auipc+jalr pair to jal even if the address can't be fit +/- 1MB.
> >
> > There are few ways we can solve this problem.
> >
> > 1. As per Jim's suggestion, linker relaxation code is aware of the
> > section alignments. We can mark .init.text as a 2MB aligned section.
> >    For calls within a section, section's alignment will be used in the
> > calculation. For calls across sections, e.g. from .init.text to .text,
> > the maximum
> >    section alignment of every section will be used. Thus, all
> > relaxation within .init.text and between any sections will be
> > impacted.
> >    Thus, it avoids the error but results in the following increase in
> > size of various sections.
> >      section           change in size (in bytes)
> >      .head.text      +4
> >      .text               +40
> >      .init.text.        +6530
> >      .exit.text        +84
> >
> > The only significant increase is .init.text but it is freed after
> > boot. Thus, I don't see any significant performance degradation due to
> > that.
> >
> > Here is the diff
> > --- a/arch/riscv/kernel/vmlinux.lds.S
> > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > @@ -51,7 +51,13 @@ SECTIONS
> >         . = ALIGN(SECTION_ALIGN);
> >         __init_begin = .;
> >         __init_text_begin = .;
> > -       INIT_TEXT_SECTION(PAGE_SIZE)
> > +       . = ALIGN(PAGE_SIZE);                                   \
> > +       .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
> > ALIGN(SECTION_ALIGN) {  \
> > +               _sinittext = .;                                         \
> > +               INIT_TEXT                                               \
> > +               _einittext = .;                                         \
> > +       }
> > +
> >         . = ALIGN(8);
> >         __soc_early_init_table : {
> >                 __soc_early_init_table_start = .;
> >
> > 2. We will continue to keep head.txt & .init.text together before
> > .text. However, we will map the pages that contain head & init.text at
> > page
> >     granularity so that .head.text and init.text can have different
> > permissions. I have not measured the performance impact of this but it
> > won't
> >     too bad given that the combined size of sections .head.txt &
> > .init.text is 200K. So we are talking about page level permission only
> > for
> >     ~50 pages during boot.
> >
> > 3. Keep head.text in a separate 2MB aligned section. .init.text will
> > follow .head.text in its own section as well. This increases the
> > kernel
> >     size by 2MB for MMU kernels. For nommu case, it will only increase
> > by 64 bytes due to smaller section alignment for nommu kernels.
> >
> > Both solutions 1 & 2 come at minimal performance on boot time while
> > solution 3 comes at increased kernel size.
> >
> > Any preference ?
>
> I prefer solution#3 because:
> 1. This will help us avoid special handling of static objects
> 2.  This will make RISC-V linker script more aligned with other
>      major architectures
>
> I don't think solution#3 will increase the size of the kernel by 2MB. We
> can make head.text part of text section. There will be only one section
> alignment between text section and init section but the existing section
> alignment between init section and text section will be removed. In other
> words, number of section alignments will remain same.

I think we will need to incorporate Jim's suggestion irrespective of the
solution we choose because without Jim's changes we can hit the
linker relaxation issue in solution#2 as well.

Regards,
Anup
Atish Patra Oct. 22, 2020, 5:13 p.m. UTC | #12
On Thu, Oct 22, 2020 at 12:22 AM Anup Patel <anup@brainfault.org> wrote:
>
> On Thu, Oct 22, 2020 at 10:33 AM Anup Patel <anup@brainfault.org> wrote:
> >
> > On Thu, Oct 22, 2020 at 7:01 AM Atish Patra <atishp@atishpatra.org> wrote:
> > >
> > > On Fri, Oct 16, 2020 at 11:24 AM Atish Patra <atishp@atishpatra.org> wrote:
> > > >
> > > > On Tue, Oct 13, 2020 at 10:24 PM Atish Patra <atishp@atishpatra.org> wrote:
> > > > >
> > > > > On Tue, Oct 13, 2020 at 6:21 PM Jim Wilson <jimw@sifive.com> wrote:
> > > > > >
> > > > > > On Tue, Oct 13, 2020 at 3:25 PM Atish Patra <atishp@atishpatra.org> wrote:
> > > > > > > This happens only when copy_from_user is called from function that is
> > > > > > > annotated with __init.
> > > > > > > Adding Kito & Jim for their input
> > > > > > >
> > > > > > > @kito, @Jim: Please let me know if I should create a issue in
> > > > > > > riscv-gnu-toolchain repo or somewhere else.
> > > > > >
> > > > > > I can't do anything useful without a testcase that I can use to
> > > > > > reproduce the problem.  The interactions here are complex, so pointing
> > > > > > at lines of code or kernel config options doesn't give me any useful
> > > > > > info.
> > > > > >
> > > > > > Relaxation can convert calls to a jal.  I don't know of any open bugs
> > > > > > in this area that can generate relocation errors.  if it is a
> > > > > > relaxation error then turning off relaxation should work around the
> > > > > > problem as you suggested.
> > > > > >
> > > > > > A kernel build problem is serious.  I think this is worth a bug
> > > > > > report.  FSF binutils or riscv-gnu-toolchain is fine.
> > > > > >
> > > > >
> > > > > I have created an issue with detailed descriptions and reproduction steps.
> > > > > Please let me know if you need anything else.
> > > > >
> > > >
> > > > It may be a toolchain issue. Here is the ongoing discussion in case
> > > > anybody else is interested.
> > > >
> > > > https://github.com/riscv/riscv-gnu-toolchain/issues/738
> > > >
> > > > > > Jim
> > > > >
> > > > >
> > > > >
> > > > > --
> > > > > Regards,
> > > > > Atish
> > > >
> > > >
> > > >
> > > > --
> > > > Regards,
> > > > Atish
> > >
> > > Thanks to Jim, we know the cause now. Jim has provided an excellent
> > > analysis of the issue in the github issue report.
> > > https://github.com/riscv/riscv-gnu-toolchain/issues/738
> > >
> > > To summarize, the linker relaxation code is not aware of the
> > > alignments between sections.
> > > That's why it relaxes the calls from .text to .init.text and  converts
> > > a auipc+jalr pair to jal even if the address can't be fit +/- 1MB.
> > >
> > > There are few ways we can solve this problem.
> > >
> > > 1. As per Jim's suggestion, linker relaxation code is aware of the
> > > section alignments. We can mark .init.text as a 2MB aligned section.
> > >    For calls within a section, section's alignment will be used in the
> > > calculation. For calls across sections, e.g. from .init.text to .text,
> > > the maximum
> > >    section alignment of every section will be used. Thus, all
> > > relaxation within .init.text and between any sections will be
> > > impacted.
> > >    Thus, it avoids the error but results in the following increase in
> > > size of various sections.
> > >      section           change in size (in bytes)
> > >      .head.text      +4
> > >      .text               +40
> > >      .init.text.        +6530
> > >      .exit.text        +84
> > >
> > > The only significant increase is .init.text but it is freed after
> > > boot. Thus, I don't see any significant performance degradation due to
> > > that.
> > >
> > > Here is the diff
> > > --- a/arch/riscv/kernel/vmlinux.lds.S
> > > +++ b/arch/riscv/kernel/vmlinux.lds.S
> > > @@ -51,7 +51,13 @@ SECTIONS
> > >         . = ALIGN(SECTION_ALIGN);
> > >         __init_begin = .;
> > >         __init_text_begin = .;
> > > -       INIT_TEXT_SECTION(PAGE_SIZE)
> > > +       . = ALIGN(PAGE_SIZE);                                   \
> > > +       .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
> > > ALIGN(SECTION_ALIGN) {  \
> > > +               _sinittext = .;                                         \
> > > +               INIT_TEXT                                               \
> > > +               _einittext = .;                                         \
> > > +       }
> > > +
> > >         . = ALIGN(8);
> > >         __soc_early_init_table : {
> > >                 __soc_early_init_table_start = .;
> > >
> > > 2. We will continue to keep head.txt & .init.text together before
> > > .text. However, we will map the pages that contain head & init.text at
> > > page
> > >     granularity so that .head.text and init.text can have different
> > > permissions. I have not measured the performance impact of this but it
> > > won't
> > >     too bad given that the combined size of sections .head.txt &
> > > .init.text is 200K. So we are talking about page level permission only
> > > for
> > >     ~50 pages during boot.
> > >
> > > 3. Keep head.text in a separate 2MB aligned section. .init.text will
> > > follow .head.text in its own section as well. This increases the
> > > kernel
> > >     size by 2MB for MMU kernels. For nommu case, it will only increase
> > > by 64 bytes due to smaller section alignment for nommu kernels.
> > >
> > > Both solutions 1 & 2 come at minimal performance on boot time while
> > > solution 3 comes at increased kernel size.
> > >
> > > Any preference ?
> >
> > I prefer solution#3 because:
> > 1. This will help us avoid special handling of static objects
> > 2.  This will make RISC-V linker script more aligned with other
> >      major architectures
> >
> > I don't think solution#3 will increase the size of the kernel by 2MB. We
> > can make head.text part of text section. There will be only one section
> > alignment between text section and init section but the existing section
> > alignment between init section and text section will be removed. In other
> > words, number of section alignments will remain same.
>
> I think we will need to incorporate Jim's suggestion irrespective of the
> solution we choose because without Jim's changes we can hit the
> linker relaxation issue in solution#2 as well.
>

Reconsidering all the possible approaches, I think you are right.

With approach 2 & 3, I just hit the issue now. However, there is an
alignment between
sections which may throw the same linker error in future when text or
init.text gets bigger.

I will follow approach 1 suggested by Jim to revise the series.

> Regards,
> Anup
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h
index d60802bfafbc..730d2c4a844d 100644
--- a/arch/riscv/include/asm/sections.h
+++ b/arch/riscv/include/asm/sections.h
@@ -10,6 +10,8 @@ 
 #include <asm-generic/sections.h>
 extern char _start[];
 extern char _start_kernel[];
+extern char __init_data_begin[], __init_data_end[];
+extern char __init_text_begin[], __init_text_end[];
 
 extern bool init_mem_is_free;
 
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index 4cc3a4e2afd3..913429c9c1ae 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -15,6 +15,7 @@  int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_default(unsigned long addr, int numpages);
 void protect_kernel_text_data(void);
 #else
 static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
@@ -22,6 +23,7 @@  static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 static inline void protect_kernel_text_data(void) {};
+static inline int set_memory_default(unsigned long addr, int numpages) { return 0; }
 #endif
 
 int set_direct_map_invalid_noflush(struct page *page);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 4176a2affd1d..b8a35ef0eab0 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -129,6 +129,10 @@  bool init_mem_is_free = false;
 
 void free_initmem(void)
 {
+	unsigned long init_begin = (unsigned long)__init_begin;
+	unsigned long init_end = (unsigned long)__init_end;
+
+	set_memory_default(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
 	free_initmem_default(POISON_FREE_INITMEM);
 	init_mem_is_free = true;
 }
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 0807633f0dc8..15b9882588ae 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -30,8 +30,8 @@  SECTIONS
 	. = ALIGN(PAGE_SIZE);
 
 	__init_begin = .;
+	__init_text_begin = .;
 	INIT_TEXT_SECTION(PAGE_SIZE)
-	INIT_DATA_SECTION(16)
 	. = ALIGN(8);
 	__soc_early_init_table : {
 		__soc_early_init_table_start = .;
@@ -48,11 +48,19 @@  SECTIONS
 	{
 		EXIT_TEXT
 	}
+
+	__init_text_end = .;
+	. = ALIGN(SECTION_ALIGN);
+	/* Start of init data section */
+	__init_data_begin = .;
+	INIT_DATA_SECTION(16)
 	.exit.data :
 	{
 		EXIT_DATA
 	}
 	PERCPU_SECTION(L1_CACHE_BYTES)
+
+	__init_data_end = .;
 	__init_end = .;
 
 	. = ALIGN(SECTION_ALIGN);
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 7859a1d1b34d..3ef0eafcc7c7 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -627,11 +627,17 @@  void protect_kernel_text_data(void)
 {
 	unsigned long text_start = (unsigned long)_text;
 	unsigned long text_end = (unsigned long)_etext;
+	unsigned long init_text_start = (unsigned long)__init_text_begin;
+	unsigned long init_text_end = (unsigned long)__init_text_end;
+	unsigned long init_data_start = (unsigned long)__init_data_begin;
+	unsigned long init_data_end = (unsigned long)__init_data_end;
 	unsigned long rodata_start = (unsigned long)__start_rodata;
 	unsigned long data_start = (unsigned long)_data;
 	unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
 
+	set_memory_ro(init_text_start, (init_text_end - init_text_start) >> PAGE_SHIFT);
 	set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
+	set_memory_nx(init_data_start, (init_data_end - init_data_start) >> PAGE_SHIFT);
 	set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
 	set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
 }
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index 19fecb362d81..aecedaf086ab 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -128,6 +128,12 @@  static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
 	return ret;
 }
 
+int set_memory_default(unsigned long addr, int numpages)
+{
+	return __set_memory(addr, numpages, __pgprot(_PAGE_KERNEL | _PAGE_EXEC),
+			    __pgprot(0));
+}
+
 int set_memory_ro(unsigned long addr, int numpages)
 {
 	return __set_memory(addr, numpages, __pgprot(_PAGE_READ),