diff mbox series

[v3,10/29] riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE

Message ID 20240403234054.2020347-11-debug@rivosinc.com (mailing list archive)
State RFC
Headers show
Series riscv control-flow integrity for usermode | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Deepak Gupta April 3, 2024, 11:34 p.m. UTC
`arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
This is to make sure that any existing apps using PROT_WRITE still work.

Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
encodings for shadow stack. Above changes ensure that existing apps
continue to work because underneath kernel will be picking
`protection_map[VM_WRITE|VM_READ]` PTE encodings.

Signed-off-by: Deepak Gupta <debug@rivosinc.com>
---
 arch/riscv/include/asm/mman.h    | 24 ++++++++++++++++++++++++
 arch/riscv/include/asm/pgtable.h |  1 +
 arch/riscv/kernel/sys_riscv.c    | 11 +++++++++++
 arch/riscv/mm/init.c             |  2 +-
 mm/mmap.c                        |  1 +
 5 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/include/asm/mman.h

Comments

Charlie Jenkins May 10, 2024, 9:02 p.m. UTC | #1
On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote:
> `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
> VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
> updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
> This is to make sure that any existing apps using PROT_WRITE still work.
> 
> Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
> Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
> encodings for shadow stack. Above changes ensure that existing apps
> continue to work because underneath kernel will be picking
> `protection_map[VM_WRITE|VM_READ]` PTE encodings.
> 
> Signed-off-by: Deepak Gupta <debug@rivosinc.com>
> ---
>  arch/riscv/include/asm/mman.h    | 24 ++++++++++++++++++++++++
>  arch/riscv/include/asm/pgtable.h |  1 +
>  arch/riscv/kernel/sys_riscv.c    | 11 +++++++++++
>  arch/riscv/mm/init.c             |  2 +-
>  mm/mmap.c                        |  1 +
>  5 files changed, 38 insertions(+), 1 deletion(-)
>  create mode 100644 arch/riscv/include/asm/mman.h
> 
> diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
> new file mode 100644
> index 000000000000..ef9fedf32546
> --- /dev/null
> +++ b/arch/riscv/include/asm/mman.h
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ASM_MMAN_H__
> +#define __ASM_MMAN_H__
> +
> +#include <linux/compiler.h>
> +#include <linux/types.h>
> +#include <uapi/asm/mman.h>
> +
> +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
> +	unsigned long pkey __always_unused)
> +{
> +	unsigned long ret = 0;
> +
> +	/*
> +	 * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
> +	 * Only VM_WRITE means shadow stack.
> +	 */
> +	if (prot & PROT_WRITE)
> +		ret = (VM_READ | VM_WRITE);
> +	return ret;
> +}
> +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
> +
> +#endif /* ! __ASM_MMAN_H__ */
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 6066822e7396..4d5983bc6766 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
>  #define PAGE_READ_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
>  #define PAGE_WRITE_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ |	\
>  					 _PAGE_EXEC | _PAGE_WRITE)
> +#define PAGE_SHADOWSTACK       __pgprot(_PAGE_BASE | _PAGE_WRITE)
>  
>  #define PAGE_COPY		PAGE_READ
>  #define PAGE_COPY_EXEC		PAGE_READ_EXEC
> diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> index f1c1416a9f1e..846c36b1b3d5 100644
> --- a/arch/riscv/kernel/sys_riscv.c
> +++ b/arch/riscv/kernel/sys_riscv.c
> @@ -8,6 +8,8 @@
>  #include <linux/syscalls.h>
>  #include <asm/cacheflush.h>
>  #include <asm-generic/mman-common.h>
> +#include <vdso/vsyscall.h>
> +#include <asm/mman.h>
>  
>  static long riscv_sys_mmap(unsigned long addr, unsigned long len,
>  			   unsigned long prot, unsigned long flags,
> @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
>  	if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
>  		return -EINVAL;
>  
> +	/*
> +	 * If only PROT_WRITE is specified then extend that to PROT_READ
> +	 * protection_map[VM_WRITE] is now going to select shadow stack encodings.
> +	 * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ]
> +	 * If user wants to create shadow stack then they should use `map_shadow_stack` syscall.
> +	 */
> +	if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))

The comments says that this should extend to PROT_READ if only
PROT_WRITE is specified. This condition instead is checking if
PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC |
VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ).
This will not currently cause any issues because these both map to the
same value in the protection_map PAGE_COPY_EXEC, however this seems to
be not the intention of this change.

prot == PROT_WRITE better suits the condition explained in the comment.

> +		prot |= PROT_READ;
> +
>  	return ksys_mmap_pgoff(addr, len, prot, flags, fd,
>  			       offset >> (PAGE_SHIFT - page_shift_offset));
>  }
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index fa34cf55037b..98e5ece4052a 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
>  static const pgprot_t protection_map[16] = {
>  	[VM_NONE]					= PAGE_NONE,
>  	[VM_READ]					= PAGE_READ,
> -	[VM_WRITE]					= PAGE_COPY,
> +	[VM_WRITE]					= PAGE_SHADOWSTACK,
>  	[VM_WRITE | VM_READ]				= PAGE_COPY,
>  	[VM_EXEC]					= PAGE_EXEC,
>  	[VM_EXEC | VM_READ]				= PAGE_READ_EXEC,
> diff --git a/mm/mmap.c b/mm/mmap.c
> index d89770eaab6b..57a974f49b00 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -47,6 +47,7 @@
>  #include <linux/oom.h>
>  #include <linux/sched/mm.h>
>  #include <linux/ksm.h>
> +#include <linux/processor.h>

It doesn't seem like this is necessary for this patch.

- Charlie

>  
>  #include <linux/uaccess.h>
>  #include <asm/cacheflush.h>
> -- 
> 2.43.2
>
Alexandre Ghiti May 12, 2024, 4:24 p.m. UTC | #2
Hi Deepak,

On 04/04/2024 01:34, Deepak Gupta wrote:
> `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
> VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
> updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
> This is to make sure that any existing apps using PROT_WRITE still work.
>
> Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
> Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
> encodings for shadow stack. Above changes ensure that existing apps
> continue to work because underneath kernel will be picking
> `protection_map[VM_WRITE|VM_READ]` PTE encodings.
>
> Signed-off-by: Deepak Gupta <debug@rivosinc.com>
> ---
>   arch/riscv/include/asm/mman.h    | 24 ++++++++++++++++++++++++
>   arch/riscv/include/asm/pgtable.h |  1 +
>   arch/riscv/kernel/sys_riscv.c    | 11 +++++++++++
>   arch/riscv/mm/init.c             |  2 +-
>   mm/mmap.c                        |  1 +
>   5 files changed, 38 insertions(+), 1 deletion(-)
>   create mode 100644 arch/riscv/include/asm/mman.h
>
> diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
> new file mode 100644
> index 000000000000..ef9fedf32546
> --- /dev/null
> +++ b/arch/riscv/include/asm/mman.h
> @@ -0,0 +1,24 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ASM_MMAN_H__
> +#define __ASM_MMAN_H__
> +
> +#include <linux/compiler.h>
> +#include <linux/types.h>
> +#include <uapi/asm/mman.h>
> +
> +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
> +	unsigned long pkey __always_unused)
> +{
> +	unsigned long ret = 0;
> +
> +	/*
> +	 * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
> +	 * Only VM_WRITE means shadow stack.
> +	 */
> +	if (prot & PROT_WRITE)
> +		ret = (VM_READ | VM_WRITE);
> +	return ret;
> +}
> +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
> +
> +#endif /* ! __ASM_MMAN_H__ */
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 6066822e7396..4d5983bc6766 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
>   #define PAGE_READ_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
>   #define PAGE_WRITE_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ |	\
>   					 _PAGE_EXEC | _PAGE_WRITE)
> +#define PAGE_SHADOWSTACK       __pgprot(_PAGE_BASE | _PAGE_WRITE)
>   
>   #define PAGE_COPY		PAGE_READ
>   #define PAGE_COPY_EXEC		PAGE_READ_EXEC
> diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> index f1c1416a9f1e..846c36b1b3d5 100644
> --- a/arch/riscv/kernel/sys_riscv.c
> +++ b/arch/riscv/kernel/sys_riscv.c
> @@ -8,6 +8,8 @@
>   #include <linux/syscalls.h>
>   #include <asm/cacheflush.h>
>   #include <asm-generic/mman-common.h>
> +#include <vdso/vsyscall.h>
> +#include <asm/mman.h>
>   
>   static long riscv_sys_mmap(unsigned long addr, unsigned long len,
>   			   unsigned long prot, unsigned long flags,
> @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
>   	if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
>   		return -EINVAL;
>   
> +	/*
> +	 * If only PROT_WRITE is specified then extend that to PROT_READ
> +	 * protection_map[VM_WRITE] is now going to select shadow stack encodings.
> +	 * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ]
> +	 * If user wants to create shadow stack then they should use `map_shadow_stack` syscall.
> +	 */
> +	if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
> +		prot |= PROT_READ;
> +
>   	return ksys_mmap_pgoff(addr, len, prot, flags, fd,
>   			       offset >> (PAGE_SHIFT - page_shift_offset));
>   }
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index fa34cf55037b..98e5ece4052a 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
>   static const pgprot_t protection_map[16] = {
>   	[VM_NONE]					= PAGE_NONE,
>   	[VM_READ]					= PAGE_READ,
> -	[VM_WRITE]					= PAGE_COPY,
> +	[VM_WRITE]					= PAGE_SHADOWSTACK,
>   	[VM_WRITE | VM_READ]				= PAGE_COPY,
>   	[VM_EXEC]					= PAGE_EXEC,
>   	[VM_EXEC | VM_READ]				= PAGE_READ_EXEC,
> diff --git a/mm/mmap.c b/mm/mmap.c
> index d89770eaab6b..57a974f49b00 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -47,6 +47,7 @@
>   #include <linux/oom.h>
>   #include <linux/sched/mm.h>
>   #include <linux/ksm.h>
> +#include <linux/processor.h>
>   
>   #include <linux/uaccess.h>
>   #include <asm/cacheflush.h>


What happens if someone restricts the permission to PROT_WRITE using 
mprotect()? I would say this is an issue since it would turn the pages 
into shadow stack pages.
Deepak Gupta May 13, 2024, 5:47 p.m. UTC | #3
On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote:
>On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote:
>> `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
>> VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
>> updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
>> This is to make sure that any existing apps using PROT_WRITE still work.
>>
>> Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
>> Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
>> encodings for shadow stack. Above changes ensure that existing apps
>> continue to work because underneath kernel will be picking
>> `protection_map[VM_WRITE|VM_READ]` PTE encodings.
>>
>> Signed-off-by: Deepak Gupta <debug@rivosinc.com>
>> ---
>>  arch/riscv/include/asm/mman.h    | 24 ++++++++++++++++++++++++
>>  arch/riscv/include/asm/pgtable.h |  1 +
>>  arch/riscv/kernel/sys_riscv.c    | 11 +++++++++++
>>  arch/riscv/mm/init.c             |  2 +-
>>  mm/mmap.c                        |  1 +
>>  5 files changed, 38 insertions(+), 1 deletion(-)
>>  create mode 100644 arch/riscv/include/asm/mman.h
>>
>> diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
>> new file mode 100644
>> index 000000000000..ef9fedf32546
>> --- /dev/null
>> +++ b/arch/riscv/include/asm/mman.h
>> @@ -0,0 +1,24 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __ASM_MMAN_H__
>> +#define __ASM_MMAN_H__
>> +
>> +#include <linux/compiler.h>
>> +#include <linux/types.h>
>> +#include <uapi/asm/mman.h>
>> +
>> +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
>> +	unsigned long pkey __always_unused)
>> +{
>> +	unsigned long ret = 0;
>> +
>> +	/*
>> +	 * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
>> +	 * Only VM_WRITE means shadow stack.
>> +	 */
>> +	if (prot & PROT_WRITE)
>> +		ret = (VM_READ | VM_WRITE);
>> +	return ret;
>> +}
>> +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
>> +
>> +#endif /* ! __ASM_MMAN_H__ */
>> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
>> index 6066822e7396..4d5983bc6766 100644
>> --- a/arch/riscv/include/asm/pgtable.h
>> +++ b/arch/riscv/include/asm/pgtable.h
>> @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
>>  #define PAGE_READ_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
>>  #define PAGE_WRITE_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ |	\
>>  					 _PAGE_EXEC | _PAGE_WRITE)
>> +#define PAGE_SHADOWSTACK       __pgprot(_PAGE_BASE | _PAGE_WRITE)
>>
>>  #define PAGE_COPY		PAGE_READ
>>  #define PAGE_COPY_EXEC		PAGE_READ_EXEC
>> diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
>> index f1c1416a9f1e..846c36b1b3d5 100644
>> --- a/arch/riscv/kernel/sys_riscv.c
>> +++ b/arch/riscv/kernel/sys_riscv.c
>> @@ -8,6 +8,8 @@
>>  #include <linux/syscalls.h>
>>  #include <asm/cacheflush.h>
>>  #include <asm-generic/mman-common.h>
>> +#include <vdso/vsyscall.h>
>> +#include <asm/mman.h>
>>
>>  static long riscv_sys_mmap(unsigned long addr, unsigned long len,
>>  			   unsigned long prot, unsigned long flags,
>> @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
>>  	if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
>>  		return -EINVAL;
>>
>> +	/*
>> +	 * If only PROT_WRITE is specified then extend that to PROT_READ
>> +	 * protection_map[VM_WRITE] is now going to select shadow stack encodings.
>> +	 * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ]
>> +	 * If user wants to create shadow stack then they should use `map_shadow_stack` syscall.
>> +	 */
>> +	if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
>
>The comments says that this should extend to PROT_READ if only
>PROT_WRITE is specified. This condition instead is checking if
>PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC |
>VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ).
>This will not currently cause any issues because these both map to the
>same value in the protection_map PAGE_COPY_EXEC, however this seems to
>be not the intention of this change.
>
>prot == PROT_WRITE better suits the condition explained in the comment.

If someone specifies this (PROT_EXEC | PROT_WRITE) today, it works because
of the way permissions are setup in `protection_map`. On risc-v there is no
way to have a page which is execute and write only. So expectation is that
if some apps were using `PROT_EXEC | PROT_WRITE` today, they were working
because internally it was translating to read, write and execute on page
permissions level. This patch make sure that, it stays same from page
permissions perspective.

If someone was using PROT_EXEC, it may translate to execute only and this change
doesn't impact that.

Patch simply looks for presence of `PROT_WRITE` and absence of `PROT_READ` in
protection flags and if that condition is satisfied, it assumes that caller assumed
page is going to be read allowed as well.


>
>> +		prot |= PROT_READ;
>> +
>>  	return ksys_mmap_pgoff(addr, len, prot, flags, fd,
>>  			       offset >> (PAGE_SHIFT - page_shift_offset));
>>  }
>> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
>> index fa34cf55037b..98e5ece4052a 100644
>> --- a/arch/riscv/mm/init.c
>> +++ b/arch/riscv/mm/init.c
>> @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
>>  static const pgprot_t protection_map[16] = {
>>  	[VM_NONE]					= PAGE_NONE,
>>  	[VM_READ]					= PAGE_READ,
>> -	[VM_WRITE]					= PAGE_COPY,
>> +	[VM_WRITE]					= PAGE_SHADOWSTACK,
>>  	[VM_WRITE | VM_READ]				= PAGE_COPY,
>>  	[VM_EXEC]					= PAGE_EXEC,
>>  	[VM_EXEC | VM_READ]				= PAGE_READ_EXEC,
>> diff --git a/mm/mmap.c b/mm/mmap.c
>> index d89770eaab6b..57a974f49b00 100644
>> --- a/mm/mmap.c
>> +++ b/mm/mmap.c
>> @@ -47,6 +47,7 @@
>>  #include <linux/oom.h>
>>  #include <linux/sched/mm.h>
>>  #include <linux/ksm.h>
>> +#include <linux/processor.h>
>
>It doesn't seem like this is necessary for this patch.

Thanks. Yeah it looks like I forgot to remove this over the churn.
Will fix it.

>
>- Charlie
>
>>
>>  #include <linux/uaccess.h>
>>  #include <asm/cacheflush.h>
>> --
>> 2.43.2
>>
Deepak Gupta May 13, 2024, 6:29 p.m. UTC | #4
On Sun, May 12, 2024 at 06:24:45PM +0200, Alexandre Ghiti wrote:
>Hi Deepak,
>
>On 04/04/2024 01:34, Deepak Gupta wrote:
>>`arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
>>VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
>>updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
>>This is to make sure that any existing apps using PROT_WRITE still work.
>>
>>Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
>>Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
>>encodings for shadow stack. Above changes ensure that existing apps
>>continue to work because underneath kernel will be picking
>>`protection_map[VM_WRITE|VM_READ]` PTE encodings.
>>
>>Signed-off-by: Deepak Gupta <debug@rivosinc.com>
>>---
>>  arch/riscv/include/asm/mman.h    | 24 ++++++++++++++++++++++++
>>  arch/riscv/include/asm/pgtable.h |  1 +
>>  arch/riscv/kernel/sys_riscv.c    | 11 +++++++++++
>>  arch/riscv/mm/init.c             |  2 +-
>>  mm/mmap.c                        |  1 +
>>  5 files changed, 38 insertions(+), 1 deletion(-)
>>  create mode 100644 arch/riscv/include/asm/mman.h
>>
>>diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
>>new file mode 100644
>>index 000000000000..ef9fedf32546
>>--- /dev/null
>>+++ b/arch/riscv/include/asm/mman.h
>>@@ -0,0 +1,24 @@
>>+/* SPDX-License-Identifier: GPL-2.0 */
>>+#ifndef __ASM_MMAN_H__
>>+#define __ASM_MMAN_H__
>>+
>>+#include <linux/compiler.h>
>>+#include <linux/types.h>
>>+#include <uapi/asm/mman.h>
>>+
>>+static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
>>+	unsigned long pkey __always_unused)
>>+{
>>+	unsigned long ret = 0;
>>+
>>+	/*
>>+	 * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
>>+	 * Only VM_WRITE means shadow stack.
>>+	 */
>>+	if (prot & PROT_WRITE)
>>+		ret = (VM_READ | VM_WRITE);
>>+	return ret;
>>+}
>>+#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
>>+
>>+#endif /* ! __ASM_MMAN_H__ */
>>diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
>>index 6066822e7396..4d5983bc6766 100644
>>--- a/arch/riscv/include/asm/pgtable.h
>>+++ b/arch/riscv/include/asm/pgtable.h
>>@@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
>>  #define PAGE_READ_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
>>  #define PAGE_WRITE_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ |	\
>>  					 _PAGE_EXEC | _PAGE_WRITE)
>>+#define PAGE_SHADOWSTACK       __pgprot(_PAGE_BASE | _PAGE_WRITE)
>>  #define PAGE_COPY		PAGE_READ
>>  #define PAGE_COPY_EXEC		PAGE_READ_EXEC
>>diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
>>index f1c1416a9f1e..846c36b1b3d5 100644
>>--- a/arch/riscv/kernel/sys_riscv.c
>>+++ b/arch/riscv/kernel/sys_riscv.c
>>@@ -8,6 +8,8 @@
>>  #include <linux/syscalls.h>
>>  #include <asm/cacheflush.h>
>>  #include <asm-generic/mman-common.h>
>>+#include <vdso/vsyscall.h>
>>+#include <asm/mman.h>
>>  static long riscv_sys_mmap(unsigned long addr, unsigned long len,
>>  			   unsigned long prot, unsigned long flags,
>>@@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
>>  	if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
>>  		return -EINVAL;
>>+	/*
>>+	 * If only PROT_WRITE is specified then extend that to PROT_READ
>>+	 * protection_map[VM_WRITE] is now going to select shadow stack encodings.
>>+	 * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ]
>>+	 * If user wants to create shadow stack then they should use `map_shadow_stack` syscall.
>>+	 */
>>+	if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
>>+		prot |= PROT_READ;
>>+
>>  	return ksys_mmap_pgoff(addr, len, prot, flags, fd,
>>  			       offset >> (PAGE_SHIFT - page_shift_offset));
>>  }
>>diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
>>index fa34cf55037b..98e5ece4052a 100644
>>--- a/arch/riscv/mm/init.c
>>+++ b/arch/riscv/mm/init.c
>>@@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
>>  static const pgprot_t protection_map[16] = {
>>  	[VM_NONE]					= PAGE_NONE,
>>  	[VM_READ]					= PAGE_READ,
>>-	[VM_WRITE]					= PAGE_COPY,
>>+	[VM_WRITE]					= PAGE_SHADOWSTACK,
>>  	[VM_WRITE | VM_READ]				= PAGE_COPY,
>>  	[VM_EXEC]					= PAGE_EXEC,
>>  	[VM_EXEC | VM_READ]				= PAGE_READ_EXEC,
>>diff --git a/mm/mmap.c b/mm/mmap.c
>>index d89770eaab6b..57a974f49b00 100644
>>--- a/mm/mmap.c
>>+++ b/mm/mmap.c
>>@@ -47,6 +47,7 @@
>>  #include <linux/oom.h>
>>  #include <linux/sched/mm.h>
>>  #include <linux/ksm.h>
>>+#include <linux/processor.h>
>>  #include <linux/uaccess.h>
>>  #include <asm/cacheflush.h>
>
>
>What happens if someone restricts the permission to PROT_WRITE using 
>mprotect()? I would say this is an issue since it would turn the pages 
>into shadow stack pages.

look at this patch in this patch series.
"riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE"

It implements `arch_calc_vm_prot_bits` for risc-v and enforces that incoming
PROT_WRITE is converted to VM_READ | VM_WRITE. And thus it'll become read/write
memory. This way `mprotect` can be used to convert a shadow stack page to
read/write memory but not a regular memory to shadow stack page.

>
>
Charlie Jenkins May 13, 2024, 6:36 p.m. UTC | #5
On Mon, May 13, 2024 at 10:47:25AM -0700, Deepak Gupta wrote:
> On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote:
> > On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote:
> > > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
> > > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
> > > updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
> > > This is to make sure that any existing apps using PROT_WRITE still work.
> > > 
> > > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
> > > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
> > > encodings for shadow stack. Above changes ensure that existing apps
> > > continue to work because underneath kernel will be picking
> > > `protection_map[VM_WRITE|VM_READ]` PTE encodings.
> > > 
> > > Signed-off-by: Deepak Gupta <debug@rivosinc.com>
> > > ---
> > >  arch/riscv/include/asm/mman.h    | 24 ++++++++++++++++++++++++
> > >  arch/riscv/include/asm/pgtable.h |  1 +
> > >  arch/riscv/kernel/sys_riscv.c    | 11 +++++++++++
> > >  arch/riscv/mm/init.c             |  2 +-
> > >  mm/mmap.c                        |  1 +
> > >  5 files changed, 38 insertions(+), 1 deletion(-)
> > >  create mode 100644 arch/riscv/include/asm/mman.h
> > > 
> > > diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
> > > new file mode 100644
> > > index 000000000000..ef9fedf32546
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/mman.h
> > > @@ -0,0 +1,24 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#ifndef __ASM_MMAN_H__
> > > +#define __ASM_MMAN_H__
> > > +
> > > +#include <linux/compiler.h>
> > > +#include <linux/types.h>
> > > +#include <uapi/asm/mman.h>
> > > +
> > > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
> > > +	unsigned long pkey __always_unused)
> > > +{
> > > +	unsigned long ret = 0;
> > > +
> > > +	/*
> > > +	 * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
> > > +	 * Only VM_WRITE means shadow stack.
> > > +	 */
> > > +	if (prot & PROT_WRITE)
> > > +		ret = (VM_READ | VM_WRITE);
> > > +	return ret;
> > > +}
> > > +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
> > > +
> > > +#endif /* ! __ASM_MMAN_H__ */
> > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> > > index 6066822e7396..4d5983bc6766 100644
> > > --- a/arch/riscv/include/asm/pgtable.h
> > > +++ b/arch/riscv/include/asm/pgtable.h
> > > @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
> > >  #define PAGE_READ_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
> > >  #define PAGE_WRITE_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ |	\
> > >  					 _PAGE_EXEC | _PAGE_WRITE)
> > > +#define PAGE_SHADOWSTACK       __pgprot(_PAGE_BASE | _PAGE_WRITE)
> > > 
> > >  #define PAGE_COPY		PAGE_READ
> > >  #define PAGE_COPY_EXEC		PAGE_READ_EXEC
> > > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> > > index f1c1416a9f1e..846c36b1b3d5 100644
> > > --- a/arch/riscv/kernel/sys_riscv.c
> > > +++ b/arch/riscv/kernel/sys_riscv.c
> > > @@ -8,6 +8,8 @@
> > >  #include <linux/syscalls.h>
> > >  #include <asm/cacheflush.h>
> > >  #include <asm-generic/mman-common.h>
> > > +#include <vdso/vsyscall.h>
> > > +#include <asm/mman.h>
> > > 
> > >  static long riscv_sys_mmap(unsigned long addr, unsigned long len,
> > >  			   unsigned long prot, unsigned long flags,
> > > @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
> > >  	if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
> > >  		return -EINVAL;
> > > 
> > > +	/*
> > > +	 * If only PROT_WRITE is specified then extend that to PROT_READ
> > > +	 * protection_map[VM_WRITE] is now going to select shadow stack encodings.
> > > +	 * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ]
> > > +	 * If user wants to create shadow stack then they should use `map_shadow_stack` syscall.
> > > +	 */
> > > +	if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
> > 
> > The comments says that this should extend to PROT_READ if only
> > PROT_WRITE is specified. This condition instead is checking if
> > PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC |
> > VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ).
> > This will not currently cause any issues because these both map to the
> > same value in the protection_map PAGE_COPY_EXEC, however this seems to
> > be not the intention of this change.
> > 
> > prot == PROT_WRITE better suits the condition explained in the comment.
> 
> If someone specifies this (PROT_EXEC | PROT_WRITE) today, it works because
> of the way permissions are setup in `protection_map`. On risc-v there is no
> way to have a page which is execute and write only. So expectation is that
> if some apps were using `PROT_EXEC | PROT_WRITE` today, they were working
> because internally it was translating to read, write and execute on page
> permissions level. This patch make sure that, it stays same from page
> permissions perspective.
> 
> If someone was using PROT_EXEC, it may translate to execute only and this change
> doesn't impact that.
> 
> Patch simply looks for presence of `PROT_WRITE` and absence of `PROT_READ` in
> protection flags and if that condition is satisfied, it assumes that caller assumed
> page is going to be read allowed as well.

The purpose of this change is for compatibility with shadow stack pages
but this affects flags for pages that are not shadow stack pages.
Adding PROT_READ to the other cases is redundant as protection_map
already handles that mapping. Permissions being strictly PROT_WRITE is
the only case that needs to be handled, and is the only case that is
called out in the commit message and in the comment.

- Charlie

> 
> 
> > 
> > > +		prot |= PROT_READ;
> > > +
> > >  	return ksys_mmap_pgoff(addr, len, prot, flags, fd,
> > >  			       offset >> (PAGE_SHIFT - page_shift_offset));
> > >  }
> > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > > index fa34cf55037b..98e5ece4052a 100644
> > > --- a/arch/riscv/mm/init.c
> > > +++ b/arch/riscv/mm/init.c
> > > @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
> > >  static const pgprot_t protection_map[16] = {
> > >  	[VM_NONE]					= PAGE_NONE,
> > >  	[VM_READ]					= PAGE_READ,
> > > -	[VM_WRITE]					= PAGE_COPY,
> > > +	[VM_WRITE]					= PAGE_SHADOWSTACK,
> > >  	[VM_WRITE | VM_READ]				= PAGE_COPY,
> > >  	[VM_EXEC]					= PAGE_EXEC,
> > >  	[VM_EXEC | VM_READ]				= PAGE_READ_EXEC,
> > > diff --git a/mm/mmap.c b/mm/mmap.c
> > > index d89770eaab6b..57a974f49b00 100644
> > > --- a/mm/mmap.c
> > > +++ b/mm/mmap.c
> > > @@ -47,6 +47,7 @@
> > >  #include <linux/oom.h>
> > >  #include <linux/sched/mm.h>
> > >  #include <linux/ksm.h>
> > > +#include <linux/processor.h>
> > 
> > It doesn't seem like this is necessary for this patch.
> 
> Thanks. Yeah it looks like I forgot to remove this over the churn.
> Will fix it.
> 
> > 
> > - Charlie
> > 
> > > 
> > >  #include <linux/uaccess.h>
> > >  #include <asm/cacheflush.h>
> > > --
> > > 2.43.2
> > >
Deepak Gupta May 13, 2024, 6:41 p.m. UTC | #6
On Mon, May 13, 2024 at 11:36:49AM -0700, Charlie Jenkins wrote:
>On Mon, May 13, 2024 at 10:47:25AM -0700, Deepak Gupta wrote:
>> On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote:
>> > On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote:
>> > > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
>> > > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
>> > > updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
>> > > This is to make sure that any existing apps using PROT_WRITE still work.
>> > >
>> > > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
>> > > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
>> > > encodings for shadow stack. Above changes ensure that existing apps
>> > > continue to work because underneath kernel will be picking
>> > > `protection_map[VM_WRITE|VM_READ]` PTE encodings.
>> > >
>> > > Signed-off-by: Deepak Gupta <debug@rivosinc.com>
>> > > ---
>> > >  arch/riscv/include/asm/mman.h    | 24 ++++++++++++++++++++++++
>> > >  arch/riscv/include/asm/pgtable.h |  1 +
>> > >  arch/riscv/kernel/sys_riscv.c    | 11 +++++++++++
>> > >  arch/riscv/mm/init.c             |  2 +-
>> > >  mm/mmap.c                        |  1 +
>> > >  5 files changed, 38 insertions(+), 1 deletion(-)
>> > >  create mode 100644 arch/riscv/include/asm/mman.h
>> > >
>> > > diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
>> > > new file mode 100644
>> > > index 000000000000..ef9fedf32546
>> > > --- /dev/null
>> > > +++ b/arch/riscv/include/asm/mman.h
>> > > @@ -0,0 +1,24 @@
>> > > +/* SPDX-License-Identifier: GPL-2.0 */
>> > > +#ifndef __ASM_MMAN_H__
>> > > +#define __ASM_MMAN_H__
>> > > +
>> > > +#include <linux/compiler.h>
>> > > +#include <linux/types.h>
>> > > +#include <uapi/asm/mman.h>
>> > > +
>> > > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
>> > > +	unsigned long pkey __always_unused)
>> > > +{
>> > > +	unsigned long ret = 0;
>> > > +
>> > > +	/*
>> > > +	 * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
>> > > +	 * Only VM_WRITE means shadow stack.
>> > > +	 */
>> > > +	if (prot & PROT_WRITE)
>> > > +		ret = (VM_READ | VM_WRITE);
>> > > +	return ret;
>> > > +}
>> > > +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
>> > > +
>> > > +#endif /* ! __ASM_MMAN_H__ */
>> > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
>> > > index 6066822e7396..4d5983bc6766 100644
>> > > --- a/arch/riscv/include/asm/pgtable.h
>> > > +++ b/arch/riscv/include/asm/pgtable.h
>> > > @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
>> > >  #define PAGE_READ_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
>> > >  #define PAGE_WRITE_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ |	\
>> > >  					 _PAGE_EXEC | _PAGE_WRITE)
>> > > +#define PAGE_SHADOWSTACK       __pgprot(_PAGE_BASE | _PAGE_WRITE)
>> > >
>> > >  #define PAGE_COPY		PAGE_READ
>> > >  #define PAGE_COPY_EXEC		PAGE_READ_EXEC
>> > > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
>> > > index f1c1416a9f1e..846c36b1b3d5 100644
>> > > --- a/arch/riscv/kernel/sys_riscv.c
>> > > +++ b/arch/riscv/kernel/sys_riscv.c
>> > > @@ -8,6 +8,8 @@
>> > >  #include <linux/syscalls.h>
>> > >  #include <asm/cacheflush.h>
>> > >  #include <asm-generic/mman-common.h>
>> > > +#include <vdso/vsyscall.h>
>> > > +#include <asm/mman.h>
>> > >
>> > >  static long riscv_sys_mmap(unsigned long addr, unsigned long len,
>> > >  			   unsigned long prot, unsigned long flags,
>> > > @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
>> > >  	if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
>> > >  		return -EINVAL;
>> > >
>> > > +	/*
>> > > +	 * If only PROT_WRITE is specified then extend that to PROT_READ
>> > > +	 * protection_map[VM_WRITE] is now going to select shadow stack encodings.
>> > > +	 * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ]
>> > > +	 * If user wants to create shadow stack then they should use `map_shadow_stack` syscall.
>> > > +	 */
>> > > +	if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
>> >
>> > The comments says that this should extend to PROT_READ if only
>> > PROT_WRITE is specified. This condition instead is checking if
>> > PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC |
>> > VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ).
>> > This will not currently cause any issues because these both map to the
>> > same value in the protection_map PAGE_COPY_EXEC, however this seems to
>> > be not the intention of this change.
>> >
>> > prot == PROT_WRITE better suits the condition explained in the comment.
>>
>> If someone specifies this (PROT_EXEC | PROT_WRITE) today, it works because
>> of the way permissions are setup in `protection_map`. On risc-v there is no
>> way to have a page which is execute and write only. So expectation is that
>> if some apps were using `PROT_EXEC | PROT_WRITE` today, they were working
>> because internally it was translating to read, write and execute on page
>> permissions level. This patch make sure that, it stays same from page
>> permissions perspective.
>>
>> If someone was using PROT_EXEC, it may translate to execute only and this change
>> doesn't impact that.
>>
>> Patch simply looks for presence of `PROT_WRITE` and absence of `PROT_READ` in
>> protection flags and if that condition is satisfied, it assumes that caller assumed
>> page is going to be read allowed as well.
>
>The purpose of this change is for compatibility with shadow stack pages
>but this affects flags for pages that are not shadow stack pages.
>Adding PROT_READ to the other cases is redundant as protection_map
>already handles that mapping. Permissions being strictly PROT_WRITE is
>the only case that needs to be handled, and is the only case that is
>called out in the commit message and in the comment.

Yeah that's fine.
I can change the commit message or just strictly check for PROT_WRITE.
It doesn't change bottomline, I am fine with either option.

Let me know your preference.

>
>- Charlie
>
>>
>>
>> >
>> > > +		prot |= PROT_READ;
>> > > +
>> > >  	return ksys_mmap_pgoff(addr, len, prot, flags, fd,
>> > >  			       offset >> (PAGE_SHIFT - page_shift_offset));
>> > >  }
>> > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
>> > > index fa34cf55037b..98e5ece4052a 100644
>> > > --- a/arch/riscv/mm/init.c
>> > > +++ b/arch/riscv/mm/init.c
>> > > @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
>> > >  static const pgprot_t protection_map[16] = {
>> > >  	[VM_NONE]					= PAGE_NONE,
>> > >  	[VM_READ]					= PAGE_READ,
>> > > -	[VM_WRITE]					= PAGE_COPY,
>> > > +	[VM_WRITE]					= PAGE_SHADOWSTACK,
>> > >  	[VM_WRITE | VM_READ]				= PAGE_COPY,
>> > >  	[VM_EXEC]					= PAGE_EXEC,
>> > >  	[VM_EXEC | VM_READ]				= PAGE_READ_EXEC,
>> > > diff --git a/mm/mmap.c b/mm/mmap.c
>> > > index d89770eaab6b..57a974f49b00 100644
>> > > --- a/mm/mmap.c
>> > > +++ b/mm/mmap.c
>> > > @@ -47,6 +47,7 @@
>> > >  #include <linux/oom.h>
>> > >  #include <linux/sched/mm.h>
>> > >  #include <linux/ksm.h>
>> > > +#include <linux/processor.h>
>> >
>> > It doesn't seem like this is necessary for this patch.
>>
>> Thanks. Yeah it looks like I forgot to remove this over the churn.
>> Will fix it.
>>
>> >
>> > - Charlie
>> >
>> > >
>> > >  #include <linux/uaccess.h>
>> > >  #include <asm/cacheflush.h>
>> > > --
>> > > 2.43.2
>> > >
Charlie Jenkins May 13, 2024, 9:26 p.m. UTC | #7
On Mon, May 13, 2024 at 11:41:34AM -0700, Deepak Gupta wrote:
> On Mon, May 13, 2024 at 11:36:49AM -0700, Charlie Jenkins wrote:
> > On Mon, May 13, 2024 at 10:47:25AM -0700, Deepak Gupta wrote:
> > > On Fri, May 10, 2024 at 02:02:54PM -0700, Charlie Jenkins wrote:
> > > > On Wed, Apr 03, 2024 at 04:34:58PM -0700, Deepak Gupta wrote:
> > > > > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ |
> > > > > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is
> > > > > updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ).
> > > > > This is to make sure that any existing apps using PROT_WRITE still work.
> > > > >
> > > > > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings.
> > > > > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE
> > > > > encodings for shadow stack. Above changes ensure that existing apps
> > > > > continue to work because underneath kernel will be picking
> > > > > `protection_map[VM_WRITE|VM_READ]` PTE encodings.
> > > > >
> > > > > Signed-off-by: Deepak Gupta <debug@rivosinc.com>
> > > > > ---
> > > > >  arch/riscv/include/asm/mman.h    | 24 ++++++++++++++++++++++++
> > > > >  arch/riscv/include/asm/pgtable.h |  1 +
> > > > >  arch/riscv/kernel/sys_riscv.c    | 11 +++++++++++
> > > > >  arch/riscv/mm/init.c             |  2 +-
> > > > >  mm/mmap.c                        |  1 +
> > > > >  5 files changed, 38 insertions(+), 1 deletion(-)
> > > > >  create mode 100644 arch/riscv/include/asm/mman.h
> > > > >
> > > > > diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
> > > > > new file mode 100644
> > > > > index 000000000000..ef9fedf32546
> > > > > --- /dev/null
> > > > > +++ b/arch/riscv/include/asm/mman.h
> > > > > @@ -0,0 +1,24 @@
> > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > +#ifndef __ASM_MMAN_H__
> > > > > +#define __ASM_MMAN_H__
> > > > > +
> > > > > +#include <linux/compiler.h>
> > > > > +#include <linux/types.h>
> > > > > +#include <uapi/asm/mman.h>
> > > > > +
> > > > > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
> > > > > +	unsigned long pkey __always_unused)
> > > > > +{
> > > > > +	unsigned long ret = 0;
> > > > > +
> > > > > +	/*
> > > > > +	 * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
> > > > > +	 * Only VM_WRITE means shadow stack.
> > > > > +	 */
> > > > > +	if (prot & PROT_WRITE)
> > > > > +		ret = (VM_READ | VM_WRITE);
> > > > > +	return ret;
> > > > > +}
> > > > > +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
> > > > > +
> > > > > +#endif /* ! __ASM_MMAN_H__ */
> > > > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> > > > > index 6066822e7396..4d5983bc6766 100644
> > > > > --- a/arch/riscv/include/asm/pgtable.h
> > > > > +++ b/arch/riscv/include/asm/pgtable.h
> > > > > @@ -184,6 +184,7 @@ extern struct pt_alloc_ops pt_ops __initdata;
> > > > >  #define PAGE_READ_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
> > > > >  #define PAGE_WRITE_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ |	\
> > > > >  					 _PAGE_EXEC | _PAGE_WRITE)
> > > > > +#define PAGE_SHADOWSTACK       __pgprot(_PAGE_BASE | _PAGE_WRITE)
> > > > >
> > > > >  #define PAGE_COPY		PAGE_READ
> > > > >  #define PAGE_COPY_EXEC		PAGE_READ_EXEC
> > > > > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> > > > > index f1c1416a9f1e..846c36b1b3d5 100644
> > > > > --- a/arch/riscv/kernel/sys_riscv.c
> > > > > +++ b/arch/riscv/kernel/sys_riscv.c
> > > > > @@ -8,6 +8,8 @@
> > > > >  #include <linux/syscalls.h>
> > > > >  #include <asm/cacheflush.h>
> > > > >  #include <asm-generic/mman-common.h>
> > > > > +#include <vdso/vsyscall.h>
> > > > > +#include <asm/mman.h>
> > > > >
> > > > >  static long riscv_sys_mmap(unsigned long addr, unsigned long len,
> > > > >  			   unsigned long prot, unsigned long flags,
> > > > > @@ -17,6 +19,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len,
> > > > >  	if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
> > > > >  		return -EINVAL;
> > > > >
> > > > > +	/*
> > > > > +	 * If only PROT_WRITE is specified then extend that to PROT_READ
> > > > > +	 * protection_map[VM_WRITE] is now going to select shadow stack encodings.
> > > > > +	 * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ]
> > > > > +	 * If user wants to create shadow stack then they should use `map_shadow_stack` syscall.
> > > > > +	 */
> > > > > +	if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
> > > >
> > > > The comments says that this should extend to PROT_READ if only
> > > > PROT_WRITE is specified. This condition instead is checking if
> > > > PROT_WRITE is selected but PROT_READ is not. If prot is (VM_EXEC |
> > > > VM_WRITE) then it would be extended to (VM_EXEC | VM_WRITE | VM_READ).
> > > > This will not currently cause any issues because these both map to the
> > > > same value in the protection_map PAGE_COPY_EXEC, however this seems to
> > > > be not the intention of this change.
> > > >
> > > > prot == PROT_WRITE better suits the condition explained in the comment.
> > > 
> > > If someone specifies this (PROT_EXEC | PROT_WRITE) today, it works because
> > > of the way permissions are setup in `protection_map`. On risc-v there is no
> > > way to have a page which is execute and write only. So expectation is that
> > > if some apps were using `PROT_EXEC | PROT_WRITE` today, they were working
> > > because internally it was translating to read, write and execute on page
> > > permissions level. This patch make sure that, it stays same from page
> > > permissions perspective.
> > > 
> > > If someone was using PROT_EXEC, it may translate to execute only and this change
> > > doesn't impact that.
> > > 
> > > Patch simply looks for presence of `PROT_WRITE` and absence of `PROT_READ` in
> > > protection flags and if that condition is satisfied, it assumes that caller assumed
> > > page is going to be read allowed as well.
> > 
> > The purpose of this change is for compatibility with shadow stack pages
> > but this affects flags for pages that are not shadow stack pages.
> > Adding PROT_READ to the other cases is redundant as protection_map
> > already handles that mapping. Permissions being strictly PROT_WRITE is
> > the only case that needs to be handled, and is the only case that is
> > called out in the commit message and in the comment.
> 
> Yeah that's fine.
> I can change the commit message or just strictly check for PROT_WRITE.
> It doesn't change bottomline, I am fine with either option.
> 
> Let me know your preference.

I would prefer the strict check. This is not critical though so I will
support whatever you decide!

- Charlie

> 
> > 
> > - Charlie
> > 
> > > 
> > > 
> > > >
> > > > > +		prot |= PROT_READ;
> > > > > +
> > > > >  	return ksys_mmap_pgoff(addr, len, prot, flags, fd,
> > > > >  			       offset >> (PAGE_SHIFT - page_shift_offset));
> > > > >  }
> > > > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > > > > index fa34cf55037b..98e5ece4052a 100644
> > > > > --- a/arch/riscv/mm/init.c
> > > > > +++ b/arch/riscv/mm/init.c
> > > > > @@ -299,7 +299,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
> > > > >  static const pgprot_t protection_map[16] = {
> > > > >  	[VM_NONE]					= PAGE_NONE,
> > > > >  	[VM_READ]					= PAGE_READ,
> > > > > -	[VM_WRITE]					= PAGE_COPY,
> > > > > +	[VM_WRITE]					= PAGE_SHADOWSTACK,
> > > > >  	[VM_WRITE | VM_READ]				= PAGE_COPY,
> > > > >  	[VM_EXEC]					= PAGE_EXEC,
> > > > >  	[VM_EXEC | VM_READ]				= PAGE_READ_EXEC,
> > > > > diff --git a/mm/mmap.c b/mm/mmap.c
> > > > > index d89770eaab6b..57a974f49b00 100644
> > > > > --- a/mm/mmap.c
> > > > > +++ b/mm/mmap.c
> > > > > @@ -47,6 +47,7 @@
> > > > >  #include <linux/oom.h>
> > > > >  #include <linux/sched/mm.h>
> > > > >  #include <linux/ksm.h>
> > > > > +#include <linux/processor.h>
> > > >
> > > > It doesn't seem like this is necessary for this patch.
> > > 
> > > Thanks. Yeah it looks like I forgot to remove this over the churn.
> > > Will fix it.
> > > 
> > > >
> > > > - Charlie
> > > >
> > > > >
> > > > >  #include <linux/uaccess.h>
> > > > >  #include <asm/cacheflush.h>
> > > > > --
> > > > > 2.43.2
> > > > >
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h
new file mode 100644
index 000000000000..ef9fedf32546
--- /dev/null
+++ b/arch/riscv/include/asm/mman.h
@@ -0,0 +1,24 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMAN_H__
+#define __ASM_MMAN_H__
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <uapi/asm/mman.h>
+
+static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
+	unsigned long pkey __always_unused)
+{
+	unsigned long ret = 0;
+
+	/*
+	 * If PROT_WRITE was specified, force it to VM_READ | VM_WRITE.
+	 * Only VM_WRITE means shadow stack.
+	 */
+	if (prot & PROT_WRITE)
+		ret = (VM_READ | VM_WRITE);
+	return ret;
+}
+#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
+
+#endif /* ! __ASM_MMAN_H__ */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 6066822e7396..4d5983bc6766 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -184,6 +184,7 @@  extern struct pt_alloc_ops pt_ops __initdata;
 #define PAGE_READ_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
 #define PAGE_WRITE_EXEC		__pgprot(_PAGE_BASE | _PAGE_READ |	\
 					 _PAGE_EXEC | _PAGE_WRITE)
+#define PAGE_SHADOWSTACK       __pgprot(_PAGE_BASE | _PAGE_WRITE)
 
 #define PAGE_COPY		PAGE_READ
 #define PAGE_COPY_EXEC		PAGE_READ_EXEC
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index f1c1416a9f1e..846c36b1b3d5 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -8,6 +8,8 @@ 
 #include <linux/syscalls.h>
 #include <asm/cacheflush.h>
 #include <asm-generic/mman-common.h>
+#include <vdso/vsyscall.h>
+#include <asm/mman.h>
 
 static long riscv_sys_mmap(unsigned long addr, unsigned long len,
 			   unsigned long prot, unsigned long flags,
@@ -17,6 +19,15 @@  static long riscv_sys_mmap(unsigned long addr, unsigned long len,
 	if (unlikely(offset & (~PAGE_MASK >> page_shift_offset)))
 		return -EINVAL;
 
+	/*
+	 * If only PROT_WRITE is specified then extend that to PROT_READ
+	 * protection_map[VM_WRITE] is now going to select shadow stack encodings.
+	 * So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ]
+	 * If user wants to create shadow stack then they should use `map_shadow_stack` syscall.
+	 */
+	if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ)))
+		prot |= PROT_READ;
+
 	return ksys_mmap_pgoff(addr, len, prot, flags, fd,
 			       offset >> (PAGE_SHIFT - page_shift_offset));
 }
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index fa34cf55037b..98e5ece4052a 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -299,7 +299,7 @@  pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
 static const pgprot_t protection_map[16] = {
 	[VM_NONE]					= PAGE_NONE,
 	[VM_READ]					= PAGE_READ,
-	[VM_WRITE]					= PAGE_COPY,
+	[VM_WRITE]					= PAGE_SHADOWSTACK,
 	[VM_WRITE | VM_READ]				= PAGE_COPY,
 	[VM_EXEC]					= PAGE_EXEC,
 	[VM_EXEC | VM_READ]				= PAGE_READ_EXEC,
diff --git a/mm/mmap.c b/mm/mmap.c
index d89770eaab6b..57a974f49b00 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -47,6 +47,7 @@ 
 #include <linux/oom.h>
 #include <linux/sched/mm.h>
 #include <linux/ksm.h>
+#include <linux/processor.h>
 
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>