Message ID | 20230727212647.4182407-2-charlie@rivosinc.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | RISC-V: mm: Make SV48 the default address space | expand |
On 27/07/2023 23:26, Charlie Jenkins wrote: > Make sv48 the default address space for mmap as some applications > currently depend on this assumption. A hint address passed to mmap will > cause the largest address space that fits entirely into the hint to be > used. If the hint is less than or equal to 1<<38, an sv39 address will > be used. An exception is that if the hint address is 0, then a sv48 > address will be used. After an address space is completely full, the next > smallest address space will be used. > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com> > --- > arch/riscv/include/asm/elf.h | 2 +- > arch/riscv/include/asm/pgtable.h | 20 +++++++++++- > arch/riscv/include/asm/processor.h | 52 ++++++++++++++++++++++++++---- > 3 files changed, 66 insertions(+), 8 deletions(-) > > diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h > index c24280774caf..5d3368d5585c 100644 > --- a/arch/riscv/include/asm/elf.h > +++ b/arch/riscv/include/asm/elf.h > @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); > * the loader. We need to make sure that it is out of the way of the program > * that it will "exec", and that there is sufficient room for the brk. > */ > -#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) > +#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2) > > #ifdef CONFIG_64BIT > #ifdef CONFIG_COMPAT > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index 75970ee2bda2..c76a1ef094a4 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -63,8 +63,26 @@ > * position vmemmap directly below the VMALLOC region. > */ > #ifdef CONFIG_64BIT > +#define VA_BITS_SV39 39 > +#define VA_BITS_SV48 48 > +#define VA_BITS_SV57 57 > + > +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1)) > +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1)) > +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1)) > + > #define VA_BITS (pgtable_l5_enabled ? \ > - 57 : (pgtable_l4_enabled ? 48 : 39)) > + VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39)) > + > +#ifdef CONFIG_COMPAT > +#define MMAP_VA_BITS_64 ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS) > +#define MMAP_MIN_VA_BITS_64 ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS) Here the condition is always true right? > +#define MMAP_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_VA_BITS_64) > +#define MMAP_MIN_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_MIN_VA_BITS_64) I think you should use is_compat_task() here instead of test_thread_flag(TIF_32BIT). And what about introducing VA_BITS_SV32 instead of hardcoding 32? > +#else > +#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS) > +#define MMAP_MIN_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS) Ditto here. > +#endif > #else > #define VA_BITS 32 > #endif > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > index c950a8d9edef..e810244ea951 100644 > --- a/arch/riscv/include/asm/processor.h > +++ b/arch/riscv/include/asm/processor.h > @@ -13,19 +13,59 @@ > > #include <asm/ptrace.h> > > +#ifdef CONFIG_64BIT > +#define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) > +#define STACK_TOP_MAX TASK_SIZE_64 > + > +#define arch_get_mmap_end(addr, len, flags) \ > +({ \ > + unsigned long mmap_end; \ > + typeof(addr) _addr = (addr); \ > + if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \ > + mmap_end = DEFAULT_MAP_WINDOW; \ Wouldn't that prevent a sv57 system to allocate sv57 addresses when sv48 is full unless explicitly asked? > + else if ((_addr) >= VA_USER_SV57) \ > + mmap_end = STACK_TOP_MAX; \ > + else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \ > + mmap_end = VA_USER_SV48; \ > + else \ > + mmap_end = VA_USER_SV39; \ > + mmap_end; \ > +}) > + > +#define arch_get_mmap_base(addr, base) \ > +({ \ > + unsigned long mmap_base; \ > + typeof(addr) _addr = (addr); \ > + typeof(base) _base = (base); \ > + unsigned long rnd_gap = (_base) - DEFAULT_MAP_WINDOW; \ > + if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \ > + mmap_base = (_base); \ > + else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \ > + mmap_base = VA_USER_SV57 + rnd_gap; \ Shouldn't it be mmap_base = VA_USER_SV57 - rnd_gap? > + else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \ > + mmap_base = VA_USER_SV48 + rnd_gap; \ > + else \ > + mmap_base = VA_USER_SV39 + rnd_gap; \ > + mmap_base; \ > +}) > + > +#else > +#define DEFAULT_MAP_WINDOW TASK_SIZE > +#define STACK_TOP_MAX TASK_SIZE > +#endif > +#define STACK_ALIGN 16 > + > +#define STACK_TOP DEFAULT_MAP_WINDOW > + > /* > * This decides where the kernel will search for a free chunk of vm > * space during mmap's. > */ > -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) > - > -#define STACK_TOP TASK_SIZE > #ifdef CONFIG_64BIT > -#define STACK_TOP_MAX TASK_SIZE_64 > +#define TASK_UNMAPPED_BASE PAGE_ALIGN((UL(1) << MMAP_MIN_VA_BITS) / 3) > #else > -#define STACK_TOP_MAX TASK_SIZE > +#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) > #endif > -#define STACK_ALIGN 16 > > #ifndef __ASSEMBLY__ >
On Sun, Aug 06, 2023 at 11:53:51AM +0200, Alexandre Ghiti wrote: > > On 27/07/2023 23:26, Charlie Jenkins wrote: > > Make sv48 the default address space for mmap as some applications > > currently depend on this assumption. A hint address passed to mmap will > > cause the largest address space that fits entirely into the hint to be > > used. If the hint is less than or equal to 1<<38, an sv39 address will > > be used. An exception is that if the hint address is 0, then a sv48 > > address will be used. After an address space is completely full, the next > > smallest address space will be used. > > > > Signed-off-by: Charlie Jenkins <charlie@rivosinc.com> > > --- > > arch/riscv/include/asm/elf.h | 2 +- > > arch/riscv/include/asm/pgtable.h | 20 +++++++++++- > > arch/riscv/include/asm/processor.h | 52 ++++++++++++++++++++++++++---- > > 3 files changed, 66 insertions(+), 8 deletions(-) > > > > diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h > > index c24280774caf..5d3368d5585c 100644 > > --- a/arch/riscv/include/asm/elf.h > > +++ b/arch/riscv/include/asm/elf.h > > @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); > > * the loader. We need to make sure that it is out of the way of the program > > * that it will "exec", and that there is sufficient room for the brk. > > */ > > -#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) > > +#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2) > > #ifdef CONFIG_64BIT > > #ifdef CONFIG_COMPAT > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > > index 75970ee2bda2..c76a1ef094a4 100644 > > --- a/arch/riscv/include/asm/pgtable.h > > +++ b/arch/riscv/include/asm/pgtable.h > > @@ -63,8 +63,26 @@ > > * position vmemmap directly below the VMALLOC region. > > */ > > #ifdef CONFIG_64BIT > > +#define VA_BITS_SV39 39 > > +#define VA_BITS_SV48 48 > > +#define VA_BITS_SV57 57 > > + > > +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1)) > > +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1)) > > +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1)) > > + > > #define VA_BITS (pgtable_l5_enabled ? \ > > - 57 : (pgtable_l4_enabled ? 48 : 39)) > > + VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39)) > > + > > +#ifdef CONFIG_COMPAT > > +#define MMAP_VA_BITS_64 ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS) > > +#define MMAP_MIN_VA_BITS_64 ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS) > > > Here the condition is always true right? > Yes, that condition is always true, I can eliminate the conditional. > > > +#define MMAP_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_VA_BITS_64) > > +#define MMAP_MIN_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_MIN_VA_BITS_64) > > > I think you should use is_compat_task() here instead of > test_thread_flag(TIF_32BIT). And what about introducing VA_BITS_SV32 instead > of hardcoding 32? > Sounds good. > > > +#else > > +#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS) > > +#define MMAP_MIN_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS) > > > Ditto here. > > > > +#endif > > #else > > #define VA_BITS 32 > > #endif > > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > > index c950a8d9edef..e810244ea951 100644 > > --- a/arch/riscv/include/asm/processor.h > > +++ b/arch/riscv/include/asm/processor.h > > @@ -13,19 +13,59 @@ > > #include <asm/ptrace.h> > > +#ifdef CONFIG_64BIT > > +#define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) > > +#define STACK_TOP_MAX TASK_SIZE_64 > > + > > +#define arch_get_mmap_end(addr, len, flags) \ > > +({ \ > > + unsigned long mmap_end; \ > > + typeof(addr) _addr = (addr); \ > > + if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \ > > + mmap_end = DEFAULT_MAP_WINDOW; \ > > > Wouldn't that prevent a sv57 system to allocate sv57 addresses when sv48 is > full unless explicitly asked? > Yes that is a good point, that should be STACK_TOP_MAX as well. > > > + else if ((_addr) >= VA_USER_SV57) \ > > + mmap_end = STACK_TOP_MAX; \ > > + else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \ > > + mmap_end = VA_USER_SV48; \ > > + else \ > > + mmap_end = VA_USER_SV39; \ > > + mmap_end; \ > > +}) > > + > > +#define arch_get_mmap_base(addr, base) \ > > +({ \ > > + unsigned long mmap_base; \ > > + typeof(addr) _addr = (addr); \ > > + typeof(base) _base = (base); \ > > + unsigned long rnd_gap = (_base) - DEFAULT_MAP_WINDOW; \ > > + if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \ > > + mmap_base = (_base); \ > > + else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \ > > + mmap_base = VA_USER_SV57 + rnd_gap; \ > > > Shouldn't it be mmap_base = VA_USER_SV57 - rnd_gap? > No, rnd_gap is a negative number here. 'base' is equal to DEFAULT_MAP_WINDOW - gap - rnd. It does seem more clear if it is a positive number so I will set rnd_gap to DEFAULT_MAP_WINDOW - (_base). > > > + else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \ > > + mmap_base = VA_USER_SV48 + rnd_gap; \ > > + else \ > > + mmap_base = VA_USER_SV39 + rnd_gap; \ > > + mmap_base; \ > > +}) > > + > > +#else > > +#define DEFAULT_MAP_WINDOW TASK_SIZE > > +#define STACK_TOP_MAX TASK_SIZE > > +#endif > > +#define STACK_ALIGN 16 > > + > > +#define STACK_TOP DEFAULT_MAP_WINDOW > > + > > /* > > * This decides where the kernel will search for a free chunk of vm > > * space during mmap's. > > */ > > -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) > > - > > -#define STACK_TOP TASK_SIZE > > #ifdef CONFIG_64BIT > > -#define STACK_TOP_MAX TASK_SIZE_64 > > +#define TASK_UNMAPPED_BASE PAGE_ALIGN((UL(1) << MMAP_MIN_VA_BITS) / 3) > > #else > > -#define STACK_TOP_MAX TASK_SIZE > > +#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) > > #endif > > -#define STACK_ALIGN 16 > > #ifndef __ASSEMBLY__
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index c24280774caf..5d3368d5585c 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr); * the loader. We need to make sure that it is out of the way of the program * that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2) +#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2) #ifdef CONFIG_64BIT #ifdef CONFIG_COMPAT diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 75970ee2bda2..c76a1ef094a4 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -63,8 +63,26 @@ * position vmemmap directly below the VMALLOC region. */ #ifdef CONFIG_64BIT +#define VA_BITS_SV39 39 +#define VA_BITS_SV48 48 +#define VA_BITS_SV57 57 + +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1)) +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1)) +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1)) + #define VA_BITS (pgtable_l5_enabled ? \ - 57 : (pgtable_l4_enabled ? 48 : 39)) + VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39)) + +#ifdef CONFIG_COMPAT +#define MMAP_VA_BITS_64 ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS) +#define MMAP_MIN_VA_BITS_64 ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS) +#define MMAP_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_VA_BITS_64) +#define MMAP_MIN_VA_BITS (test_thread_flag(TIF_32BIT) ? 32 : MMAP_MIN_VA_BITS_64) +#else +#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS) +#define MMAP_MIN_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS) +#endif #else #define VA_BITS 32 #endif diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index c950a8d9edef..e810244ea951 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -13,19 +13,59 @@ #include <asm/ptrace.h> +#ifdef CONFIG_64BIT +#define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1)) +#define STACK_TOP_MAX TASK_SIZE_64 + +#define arch_get_mmap_end(addr, len, flags) \ +({ \ + unsigned long mmap_end; \ + typeof(addr) _addr = (addr); \ + if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \ + mmap_end = DEFAULT_MAP_WINDOW; \ + else if ((_addr) >= VA_USER_SV57) \ + mmap_end = STACK_TOP_MAX; \ + else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \ + mmap_end = VA_USER_SV48; \ + else \ + mmap_end = VA_USER_SV39; \ + mmap_end; \ +}) + +#define arch_get_mmap_base(addr, base) \ +({ \ + unsigned long mmap_base; \ + typeof(addr) _addr = (addr); \ + typeof(base) _base = (base); \ + unsigned long rnd_gap = (_base) - DEFAULT_MAP_WINDOW; \ + if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_32BIT))) \ + mmap_base = (_base); \ + else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \ + mmap_base = VA_USER_SV57 + rnd_gap; \ + else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \ + mmap_base = VA_USER_SV48 + rnd_gap; \ + else \ + mmap_base = VA_USER_SV39 + rnd_gap; \ + mmap_base; \ +}) + +#else +#define DEFAULT_MAP_WINDOW TASK_SIZE +#define STACK_TOP_MAX TASK_SIZE +#endif +#define STACK_ALIGN 16 + +#define STACK_TOP DEFAULT_MAP_WINDOW + /* * This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) - -#define STACK_TOP TASK_SIZE #ifdef CONFIG_64BIT -#define STACK_TOP_MAX TASK_SIZE_64 +#define TASK_UNMAPPED_BASE PAGE_ALIGN((UL(1) << MMAP_MIN_VA_BITS) / 3) #else -#define STACK_TOP_MAX TASK_SIZE +#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3) #endif -#define STACK_ALIGN 16 #ifndef __ASSEMBLY__
Make sv48 the default address space for mmap as some applications currently depend on this assumption. A hint address passed to mmap will cause the largest address space that fits entirely into the hint to be used. If the hint is less than or equal to 1<<38, an sv39 address will be used. An exception is that if the hint address is 0, then a sv48 address will be used. After an address space is completely full, the next smallest address space will be used. Signed-off-by: Charlie Jenkins <charlie@rivosinc.com> --- arch/riscv/include/asm/elf.h | 2 +- arch/riscv/include/asm/pgtable.h | 20 +++++++++++- arch/riscv/include/asm/processor.h | 52 ++++++++++++++++++++++++++---- 3 files changed, 66 insertions(+), 8 deletions(-)