diff mbox series

[1/2] RISC-V: mm: Restrict address space for sv39,sv48,sv57

Message ID 20230627222152.177716-2-charlie@rivosinc.com (mailing list archive)
State New
Headers show
Series Make SV39 the default address space | expand

Commit Message

Charlie Jenkins June 27, 2023, 10:21 p.m. UTC
Make sv39 the default address space for mmap as some applications
currently depend on this assumption. The RISC-V specification enforces
that bits outside of the virtual address range are not used, so
restricting the size of the default address space as such should be
temporary. A hint address passed to mmap will cause the largest address
space that fits entirely into the hint to be used. If the hint is less
than or equal to 1<<38, a 39-bit address will be used. After an address
space is completely full, the next smallest address space will be used.

Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
---
 arch/riscv/include/asm/elf.h       |  2 +-
 arch/riscv/include/asm/pgtable.h   | 13 +++++++++-
 arch/riscv/include/asm/processor.h | 41 +++++++++++++++++++++++++-----
 3 files changed, 47 insertions(+), 9 deletions(-)

Comments

Jessica Clarke June 27, 2023, 10:32 p.m. UTC | #1
On 27 Jun 2023, at 23:21, Charlie Jenkins <charlie@rivosinc.com> wrote:
> 
> Make sv39 the default address space for mmap as some applications
> currently depend on this assumption.

They are just plain wrong too. Sv48 was in even Priv v1.10 (the first
spec where satp was named as such and contained the mode, rather than
requiring M-mode’s help in configuring virtual memory), predating the
ratified v1.11 spec. A 39-bit address space is pathetic and has
implications for ASLR.

I strongly suggest applications be forced to support at least Sv48,
which is totally reasonable given the address space sizes used by other
architectures. Sv57 is more disruptive to some runtimes, though ideally
even that would be free for the kernel to use rather than committing to
not using it for the default uABI.

Jess

> The RISC-V specification enforces
> that bits outside of the virtual address range are not used, so
> restricting the size of the default address space as such should be
> temporary. A hint address passed to mmap will cause the largest address
> space that fits entirely into the hint to be used. If the hint is less
> than or equal to 1<<38, a 39-bit address will be used. After an address
> space is completely full, the next smallest address space will be used.
> 
> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
> ---
> arch/riscv/include/asm/elf.h       |  2 +-
> arch/riscv/include/asm/pgtable.h   | 13 +++++++++-
> arch/riscv/include/asm/processor.h | 41 +++++++++++++++++++++++++-----
> 3 files changed, 47 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
> index 30e7d2455960..1b57f13a1afd 100644
> --- a/arch/riscv/include/asm/elf.h
> +++ b/arch/riscv/include/asm/elf.h
> @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
>  * the loader.  We need to make sure that it is out of the way of the program
>  * that it will "exec", and that there is sufficient room for the brk.
>  */
> -#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2)
> +#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2)
> 
> #ifdef CONFIG_64BIT
> #ifdef CONFIG_COMPAT
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 75970ee2bda2..e83912e97870 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -57,18 +57,29 @@
> #define MODULES_END (PFN_ALIGN((unsigned long)&_start))
> #endif
> 
> +
> /*
>  * Roughly size the vmemmap space to be large enough to fit enough
>  * struct pages to map half the virtual address space. Then
>  * position vmemmap directly below the VMALLOC region.
>  */
> #ifdef CONFIG_64BIT
> +#define VA_BITS_SV39 39
> +#define VA_BITS_SV48 48
> +#define VA_BITS_SV57 57
> +
> +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1))
> +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
> +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
> +
> #define VA_BITS (pgtable_l5_enabled ? \
> - 57 : (pgtable_l4_enabled ? 48 : 39))
> + VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
> #else
> #define VA_BITS 32
> #endif
> 
> +#define DEFAULT_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS)
> +
> #define VMEMMAP_SHIFT \
> (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
> #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> index 6fb8bbec8459..019dcd4ecae4 100644
> --- a/arch/riscv/include/asm/processor.h
> +++ b/arch/riscv/include/asm/processor.h
> @@ -12,20 +12,47 @@
> 
> #include <asm/ptrace.h>
> 
> -/*
> - * This decides where the kernel will search for a free chunk of vm
> - * space during mmap's.
> - */
> -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
> -
> -#define STACK_TOP TASK_SIZE
> #ifdef CONFIG_64BIT
> +#define DEFAULT_MAP_WINDOW (UL(1) << (DEFAULT_VA_BITS - 1))
> #define STACK_TOP_MAX TASK_SIZE_64
> +
> +#define arch_get_mmap_end(addr, len, flags) \
> + ((addr) == 0 || (addr) >= VA_USER_SV57 ? STACK_TOP_MAX :   \
> + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \
> + VA_USER_SV48 : \
> + VA_USER_SV39)
> +
> +#define arch_get_mmap_base(addr, base) \
> + (((addr >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) ?   \
> + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \
> + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \
> + base + VA_USER_SV48 - DEFAULT_MAP_WINDOW : \
> + base)
> +
> #else
> +#define DEFAULT_MAP_WINDOW TASK_SIZE
> #define STACK_TOP_MAX TASK_SIZE
> +
> +#define arch_get_mmap_end(addr, len, flags) \
> + ((addr) > DEFAULT_MAP_WINDOW ? STACK_TOP_MAX : DEFAULT_MAP_WINDOW)
> +
> +#define arch_get_mmap_base(addr, base) \
> + ((addr > DEFAULT_MAP_WINDOW) ? \
> + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \
> + base)
> +
> #endif
> #define STACK_ALIGN 16
> 
> +
> +#define STACK_TOP DEFAULT_MAP_WINDOW
> +
> +/*
> + * This decides where the kernel will search for a free chunk of vm
> + * space during mmap's.
> + */
> +#define TASK_UNMAPPED_BASE PAGE_ALIGN(DEFAULT_MAP_WINDOW / 3)
> +
> #ifndef __ASSEMBLY__
> 
> struct task_struct;
> -- 
> 2.34.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Palmer Dabbelt June 27, 2023, 11:36 p.m. UTC | #2
On Tue, 27 Jun 2023 15:32:36 PDT (-0700), jrtc27@jrtc27.com wrote:
> On 27 Jun 2023, at 23:21, Charlie Jenkins <charlie@rivosinc.com> wrote:
>> 
>> Make sv39 the default address space for mmap as some applications
>> currently depend on this assumption.
>
> They are just plain wrong too. Sv48 was in even Priv v1.10 (the first
> spec where satp was named as such and contained the mode, rather than
> requiring M-mode’s help in configuring virtual memory), predating the
> ratified v1.11 spec. A 39-bit address space is pathetic and has
> implications for ASLR.
>
> I strongly suggest applications be forced to support at least Sv48,
> which is totally reasonable given the address space sizes used by other
> architectures. Sv57 is more disruptive to some runtimes, though ideally
> even that would be free for the kernel to use rather than committing to
> not using it for the default uABI.

Go and OpenJDK both broke when we expanded the VA width.  I don't like 
it either, but if the change breaks userspace then it's a regression and 
we have to live with the bug.

> Jess
>
>> The RISC-V specification enforces
>> that bits outside of the virtual address range are not used, so
>> restricting the size of the default address space as such should be
>> temporary. A hint address passed to mmap will cause the largest address
>> space that fits entirely into the hint to be used. If the hint is less
>> than or equal to 1<<38, a 39-bit address will be used. After an address
>> space is completely full, the next smallest address space will be used.
>> 
>> Signed-off-by: Charlie Jenkins <charlie@rivosinc.com>
>> ---
>> arch/riscv/include/asm/elf.h       |  2 +-
>> arch/riscv/include/asm/pgtable.h   | 13 +++++++++-
>> arch/riscv/include/asm/processor.h | 41 +++++++++++++++++++++++++-----
>> 3 files changed, 47 insertions(+), 9 deletions(-)
>> 
>> diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
>> index 30e7d2455960..1b57f13a1afd 100644
>> --- a/arch/riscv/include/asm/elf.h
>> +++ b/arch/riscv/include/asm/elf.h
>> @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
>>  * the loader.  We need to make sure that it is out of the way of the program
>>  * that it will "exec", and that there is sufficient room for the brk.
>>  */
>> -#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2)
>> +#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2)
>> 
>> #ifdef CONFIG_64BIT
>> #ifdef CONFIG_COMPAT
>> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
>> index 75970ee2bda2..e83912e97870 100644
>> --- a/arch/riscv/include/asm/pgtable.h
>> +++ b/arch/riscv/include/asm/pgtable.h
>> @@ -57,18 +57,29 @@
>> #define MODULES_END (PFN_ALIGN((unsigned long)&_start))
>> #endif
>> 
>> +
>> /*
>>  * Roughly size the vmemmap space to be large enough to fit enough
>>  * struct pages to map half the virtual address space. Then
>>  * position vmemmap directly below the VMALLOC region.
>>  */
>> #ifdef CONFIG_64BIT
>> +#define VA_BITS_SV39 39
>> +#define VA_BITS_SV48 48
>> +#define VA_BITS_SV57 57
>> +
>> +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1))
>> +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
>> +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
>> +
>> #define VA_BITS (pgtable_l5_enabled ? \
>> - 57 : (pgtable_l4_enabled ? 48 : 39))
>> + VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
>> #else
>> #define VA_BITS 32
>> #endif
>> 
>> +#define DEFAULT_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS)
>> +
>> #define VMEMMAP_SHIFT \
>> (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
>> #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
>> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
>> index 6fb8bbec8459..019dcd4ecae4 100644
>> --- a/arch/riscv/include/asm/processor.h
>> +++ b/arch/riscv/include/asm/processor.h
>> @@ -12,20 +12,47 @@
>> 
>> #include <asm/ptrace.h>
>> 
>> -/*
>> - * This decides where the kernel will search for a free chunk of vm
>> - * space during mmap's.
>> - */
>> -#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
>> -
>> -#define STACK_TOP TASK_SIZE
>> #ifdef CONFIG_64BIT
>> +#define DEFAULT_MAP_WINDOW (UL(1) << (DEFAULT_VA_BITS - 1))
>> #define STACK_TOP_MAX TASK_SIZE_64
>> +
>> +#define arch_get_mmap_end(addr, len, flags) \
>> + ((addr) == 0 || (addr) >= VA_USER_SV57 ? STACK_TOP_MAX :   \
>> + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \
>> + VA_USER_SV48 : \
>> + VA_USER_SV39)
>> +
>> +#define arch_get_mmap_base(addr, base) \
>> + (((addr >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) ?   \
>> + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \
>> + (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \
>> + base + VA_USER_SV48 - DEFAULT_MAP_WINDOW : \
>> + base)
>> +
>> #else
>> +#define DEFAULT_MAP_WINDOW TASK_SIZE
>> #define STACK_TOP_MAX TASK_SIZE
>> +
>> +#define arch_get_mmap_end(addr, len, flags) \
>> + ((addr) > DEFAULT_MAP_WINDOW ? STACK_TOP_MAX : DEFAULT_MAP_WINDOW)
>> +
>> +#define arch_get_mmap_base(addr, base) \
>> + ((addr > DEFAULT_MAP_WINDOW) ? \
>> + base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \
>> + base)
>> +
>> #endif
>> #define STACK_ALIGN 16
>> 
>> +
>> +#define STACK_TOP DEFAULT_MAP_WINDOW
>> +
>> +/*
>> + * This decides where the kernel will search for a free chunk of vm
>> + * space during mmap's.
>> + */
>> +#define TASK_UNMAPPED_BASE PAGE_ALIGN(DEFAULT_MAP_WINDOW / 3)
>> +
>> #ifndef __ASSEMBLY__
>> 
>> struct task_struct;
>> -- 
>> 2.34.1
>> 
>> 
>> _______________________________________________
>> linux-riscv mailing list
>> linux-riscv@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-riscv
Charlie Jenkins June 27, 2023, 11:38 p.m. UTC | #3
Yes it is small to have a default of 38-bits of userspace. I would be
interesting in the opinions of other people on whether it would be
acceptable to have the default be sv48 and require applications that
prefer fewer bits to specify so with the given mmap hinting.
Nick Kossifidis June 28, 2023, 7:44 a.m. UTC | #4
On 6/28/23 02:36, Palmer Dabbelt wrote:
> On Tue, 27 Jun 2023 15:32:36 PDT (-0700), jrtc27@jrtc27.com wrote:
>> On 27 Jun 2023, at 23:21, Charlie Jenkins <charlie@rivosinc.com> wrote:
>>>
>>> Make sv39 the default address space for mmap as some applications
>>> currently depend on this assumption.
>>
>> They are just plain wrong too. Sv48 was in even Priv v1.10 (the first
>> spec where satp was named as such and contained the mode, rather than
>> requiring M-mode’s help in configuring virtual memory), predating the
>> ratified v1.11 spec. A 39-bit address space is pathetic and has
>> implications for ASLR.
>>
>> I strongly suggest applications be forced to support at least Sv48,
>> which is totally reasonable given the address space sizes used by other
>> architectures. Sv57 is more disruptive to some runtimes, though ideally
>> even that would be free for the kernel to use rather than committing to
>> not using it for the default uABI.
> 
> Go and OpenJDK both broke when we expanded the VA width.  I don't like 
> it either, but if the change breaks userspace then it's a regression and 
> we have to live with the bug.
> 

Have we debugged this ? do we at least know why they break ? Just 
disabling Sv48/57 by default for everyone because some userspace apps 
break doesn't seem the correct approach, it seems more like a bug in 
userspace IMHO.
Anup Patel June 28, 2023, 12:34 p.m. UTC | #5
On Wed, Jun 28, 2023 at 5:09 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
>
> Yes it is small to have a default of 38-bits of userspace. I would be
> interesting in the opinions of other people on whether it would be
> acceptable to have the default be sv48 and require applications that
> prefer fewer bits to specify so with the given mmap hinting.

I think sv48 is a reasonable default instead of sv39. We should fallback
to sv39 only if the underlying host does not support sv48.

Regards,
Anup
Charlie Jenkins June 29, 2023, 1:38 a.m. UTC | #6
On Wed, Jun 28, 2023 at 06:04:41PM +0530, Anup Patel wrote:
> On Wed, Jun 28, 2023 at 5:09 AM Charlie Jenkins <charlie@rivosinc.com> wrote:
> >
> > Yes it is small to have a default of 38-bits of userspace. I would be
> > interesting in the opinions of other people on whether it would be
> > acceptable to have the default be sv48 and require applications that
> > prefer fewer bits to specify so with the given mmap hinting.
> 
> I think sv48 is a reasonable default instead of sv39. We should fallback
> to sv39 only if the underlying host does not support sv48.
> 
> Regards,
> Anup

I did some research and it appears that Java does work on sv48, but not
on sv57. Using the v6.4 kernel I was able to successfully run OpenJDK on
both sv38 and sv48, but on sv57 there is a SIGSEGV error on QEMU.
Relevant JDK discussion can be seen here 
https://mail.openjdk.org/pipermail/hotspot-dev/2022-November/067298.html.
Go similarly appears to work even on sv57 according to https://go-review.googlesource.com/c/go/+/409055.
I have not tried Go myself. 

The point of contention here I believe is that in v6.4, the highest
address space available will be used, causing all of these applications
that do not work properly in sv57 to fail when testing in sv57
environments. Given that these applications seem to work in sv48, it
seems reasonable to default to sv48, unless there are an abundance of
additional applications that are unhappy with this.

Using the hint mechanism to mmap will then allow users to change the
address space to sv57 if required. It should be possible to allow users
to use sv38 if they need it using the same mechanism, but reducing the
address space instead of growing it will require more thought from me to
implement.

Thanks,
Charlie
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index 30e7d2455960..1b57f13a1afd 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -49,7 +49,7 @@  extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
  * the loader.  We need to make sure that it is out of the way of the program
  * that it will "exec", and that there is sufficient room for the brk.
  */
-#define ELF_ET_DYN_BASE		((TASK_SIZE / 3) * 2)
+#define ELF_ET_DYN_BASE		((DEFAULT_MAP_WINDOW / 3) * 2)
 
 #ifdef CONFIG_64BIT
 #ifdef CONFIG_COMPAT
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 75970ee2bda2..e83912e97870 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -57,18 +57,29 @@ 
 #define MODULES_END		(PFN_ALIGN((unsigned long)&_start))
 #endif
 
+
 /*
  * Roughly size the vmemmap space to be large enough to fit enough
  * struct pages to map half the virtual address space. Then
  * position vmemmap directly below the VMALLOC region.
  */
 #ifdef CONFIG_64BIT
+#define VA_BITS_SV39 39
+#define VA_BITS_SV48 48
+#define VA_BITS_SV57 57
+
+#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1))
+#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
+#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
+
 #define VA_BITS		(pgtable_l5_enabled ? \
-				57 : (pgtable_l4_enabled ? 48 : 39))
+				VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
 #else
 #define VA_BITS		32
 #endif
 
+#define DEFAULT_VA_BITS ((VA_BITS >= VA_BITS_SV39) ? VA_BITS_SV39 : VA_BITS)
+
 #define VMEMMAP_SHIFT \
 	(VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
 #define VMEMMAP_SIZE	BIT(VMEMMAP_SHIFT)
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 6fb8bbec8459..019dcd4ecae4 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -12,20 +12,47 @@ 
 
 #include <asm/ptrace.h>
 
-/*
- * This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define TASK_UNMAPPED_BASE	PAGE_ALIGN(TASK_SIZE / 3)
-
-#define STACK_TOP		TASK_SIZE
 #ifdef CONFIG_64BIT
+#define DEFAULT_MAP_WINDOW	(UL(1) << (DEFAULT_VA_BITS - 1))
 #define STACK_TOP_MAX		TASK_SIZE_64
+
+#define arch_get_mmap_end(addr, len, flags) \
+	((addr) == 0 || (addr) >= VA_USER_SV57 ? STACK_TOP_MAX :   \
+	 (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \
+						 VA_USER_SV48 : \
+						 VA_USER_SV39)
+
+#define arch_get_mmap_base(addr, base) \
+	(((addr >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) ?   \
+		 base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \
+	 (((addr) >= VA_USER_SV48) && (VA_BITS >= VA_BITS_SV48)) ? \
+		 base + VA_USER_SV48 - DEFAULT_MAP_WINDOW : \
+		 base)
+
 #else
+#define DEFAULT_MAP_WINDOW	TASK_SIZE
 #define STACK_TOP_MAX		TASK_SIZE
+
+#define arch_get_mmap_end(addr, len, flags) \
+	((addr) > DEFAULT_MAP_WINDOW ? STACK_TOP_MAX : DEFAULT_MAP_WINDOW)
+
+#define arch_get_mmap_base(addr, base) \
+	((addr > DEFAULT_MAP_WINDOW) ? \
+		base + STACK_TOP_MAX - DEFAULT_MAP_WINDOW : \
+		base)
+
 #endif
 #define STACK_ALIGN		16
 
+
+#define STACK_TOP		DEFAULT_MAP_WINDOW
+
+/*
+ * This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE	PAGE_ALIGN(DEFAULT_MAP_WINDOW / 3)
+
 #ifndef __ASSEMBLY__
 
 struct task_struct;