diff mbox series

riscv: vdso: map data page before vDSO code

Message ID 20210829094708.169037-1-cerg2010cerg2010@mail.ru (mailing list archive)
State New, archived
Headers show
Series riscv: vdso: map data page before vDSO code | expand

Commit Message

Sergey Larin Aug. 29, 2021, 9:47 a.m. UTC
Current vDSO implementation assumes that the code size always fits in
single page, and the data page follows it:

	PROVIDE(_vdso_data = . + PAGE_SIZE);

However, this was not the case with my kernel build - the
shared object had the size of 4800 bytes. This, obviously, is more than
4096 and requires second page for the rest of the data.

CLOCK_REALTIME_COARSE clock became broken. It was always returning 0
because vDSO code was reading the second code page, not the
data page. Glibc uses this clock for the time() function.

So instead of computing the offset for the data page (it is necessary to
do in runtime - you can't know the size of the binary while you're
building it) simply move it behind the code like the ARM does:

	PROVIDE(_vdso_data = . - PAGE_SIZE);

This commit also fixes arch_vma_name for the data page - it was
reporting the same '[vdso]' name for it in my case.

Since I don't have the real hardware, the change was debugged with KGDB
in RVVM and also verified in QEMU.

Signed-off-by: Sergey Larin <cerg2010cerg2010@mail.ru>
---
 arch/riscv/kernel/vdso.c          | 22 +++++++++++-----------
 arch/riscv/kernel/vdso/vdso.lds.S |  2 +-
 2 files changed, 12 insertions(+), 12 deletions(-)

Comments

Kefeng Wang Aug. 29, 2021, 9:59 a.m. UTC | #1
Hi Sergey

There is already one fix,

https://patchwork.kernel.org/project/linux-riscv/list/?series=534877

On 2021/8/29 17:47, Sergey Larin wrote:
> Current vDSO implementation assumes that the code size always fits in
> single page, and the data page follows it:
>
> 	PROVIDE(_vdso_data = . + PAGE_SIZE);
>
> However, this was not the case with my kernel build - the
> shared object had the size of 4800 bytes. This, obviously, is more than
> 4096 and requires second page for the rest of the data.
>
> CLOCK_REALTIME_COARSE clock became broken. It was always returning 0
> because vDSO code was reading the second code page, not the
> data page. Glibc uses this clock for the time() function.
>
> So instead of computing the offset for the data page (it is necessary to
> do in runtime - you can't know the size of the binary while you're
> building it) simply move it behind the code like the ARM does:
>
> 	PROVIDE(_vdso_data = . - PAGE_SIZE);
>
> This commit also fixes arch_vma_name for the data page - it was
> reporting the same '[vdso]' name for it in my case.
>
> Since I don't have the real hardware, the change was debugged with KGDB
> in RVVM and also verified in QEMU.
>
> Signed-off-by: Sergey Larin <cerg2010cerg2010@mail.ru>
> ---
>   arch/riscv/kernel/vdso.c          | 22 +++++++++++-----------
>   arch/riscv/kernel/vdso/vdso.lds.S |  2 +-
>   2 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
> index 25a3b8849599..0c49390e9be3 100644
> --- a/arch/riscv/kernel/vdso.c
> +++ b/arch/riscv/kernel/vdso.c
> @@ -44,13 +44,13 @@ static int __init vdso_init(void)
>   		return -ENOMEM;
>   	}
>   
> +	vdso_pagelist[0] = virt_to_page(vdso_data);
>   	for (i = 0; i < vdso_pages; i++) {
>   		struct page *pg;
>   
>   		pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
> -		vdso_pagelist[i] = pg;
> +		vdso_pagelist[i + 1] = pg;
>   	}
> -	vdso_pagelist[i] = virt_to_page(vdso_data);
>   
>   	return 0;
>   }
> @@ -77,21 +77,21 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
>   	 * install_special_mapping or the perf counter mmap tracking code
>   	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
>   	 */
> -	mm->context.vdso = (void *)vdso_base;
> +	mm->context.vdso = (void *)vdso_base + PAGE_SIZE;
>   
> -	ret =
> -	   install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
> -		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
> -		vdso_pagelist);
> +	ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
> +		(VM_READ | VM_MAYREAD), &vdso_pagelist[0]);
>   
>   	if (unlikely(ret)) {
>   		mm->context.vdso = NULL;
>   		goto end;
>   	}
>   
> -	vdso_base += (vdso_pages << PAGE_SHIFT);
> -	ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
> -		(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
> +	vdso_base += PAGE_SIZE;
> +	ret =
> +	   install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
> +		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
> +		&vdso_pagelist[1]);
>   
>   	if (unlikely(ret))
>   		mm->context.vdso = NULL;
> @@ -105,7 +105,7 @@ const char *arch_vma_name(struct vm_area_struct *vma)
>   	if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
>   		return "[vdso]";
>   	if (vma->vm_mm && (vma->vm_start ==
> -			   (long)vma->vm_mm->context.vdso + PAGE_SIZE))
> +			   (long)vma->vm_mm->context.vdso - PAGE_SIZE))
>   		return "[vdso_data]";
>   	return NULL;
>   }
> diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
> index e6f558bca71b..fd8a31075256 100644
> --- a/arch/riscv/kernel/vdso/vdso.lds.S
> +++ b/arch/riscv/kernel/vdso/vdso.lds.S
> @@ -8,7 +8,7 @@ OUTPUT_ARCH(riscv)
>   
>   SECTIONS
>   {
> -	PROVIDE(_vdso_data = . + PAGE_SIZE);
> +	PROVIDE(_vdso_data = . - PAGE_SIZE);
>   	. = SIZEOF_HEADERS;
>   
>   	.hash		: { *(.hash) }			:text
Sergey Larin Aug. 29, 2021, 10:16 a.m. UTC | #2
On Sun, Aug 29, 2021 at 05:59:04PM +0800, Kefeng Wang wrote:
> Hi Sergey
> 
> There is already one fix,
> 
> https://patchwork.kernel.org/project/linux-riscv/list/?series=534877
> 

Oh, I missed it. That one looks cleaner. Thanks anyway!

> On 2021/8/29 17:47, Sergey Larin wrote:
> > Current vDSO implementation assumes that the code size always fits in
> > single page, and the data page follows it:
> > 
> > 	PROVIDE(_vdso_data = . + PAGE_SIZE);
> > 
> > However, this was not the case with my kernel build - the
> > shared object had the size of 4800 bytes. This, obviously, is more than
> > 4096 and requires second page for the rest of the data.
> > 
> > CLOCK_REALTIME_COARSE clock became broken. It was always returning 0
> > because vDSO code was reading the second code page, not the
> > data page. Glibc uses this clock for the time() function.
> > 
> > So instead of computing the offset for the data page (it is necessary to
> > do in runtime - you can't know the size of the binary while you're
> > building it) simply move it behind the code like the ARM does:
> > 
> > 	PROVIDE(_vdso_data = . - PAGE_SIZE);
> > 
> > This commit also fixes arch_vma_name for the data page - it was
> > reporting the same '[vdso]' name for it in my case.
> > 
> > Since I don't have the real hardware, the change was debugged with KGDB
> > in RVVM and also verified in QEMU.
> > 
> > Signed-off-by: Sergey Larin <cerg2010cerg2010@mail.ru>
> > ---
> >   arch/riscv/kernel/vdso.c          | 22 +++++++++++-----------
> >   arch/riscv/kernel/vdso/vdso.lds.S |  2 +-
> >   2 files changed, 12 insertions(+), 12 deletions(-)
> > 
> > diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
> > index 25a3b8849599..0c49390e9be3 100644
> > --- a/arch/riscv/kernel/vdso.c
> > +++ b/arch/riscv/kernel/vdso.c
> > @@ -44,13 +44,13 @@ static int __init vdso_init(void)
> >   		return -ENOMEM;
> >   	}
> > +	vdso_pagelist[0] = virt_to_page(vdso_data);
> >   	for (i = 0; i < vdso_pages; i++) {
> >   		struct page *pg;
> >   		pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
> > -		vdso_pagelist[i] = pg;
> > +		vdso_pagelist[i + 1] = pg;
> >   	}
> > -	vdso_pagelist[i] = virt_to_page(vdso_data);
> >   	return 0;
> >   }
> > @@ -77,21 +77,21 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
> >   	 * install_special_mapping or the perf counter mmap tracking code
> >   	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
> >   	 */
> > -	mm->context.vdso = (void *)vdso_base;
> > +	mm->context.vdso = (void *)vdso_base + PAGE_SIZE;
> > -	ret =
> > -	   install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
> > -		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
> > -		vdso_pagelist);
> > +	ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
> > +		(VM_READ | VM_MAYREAD), &vdso_pagelist[0]);
> >   	if (unlikely(ret)) {
> >   		mm->context.vdso = NULL;
> >   		goto end;
> >   	}
> > -	vdso_base += (vdso_pages << PAGE_SHIFT);
> > -	ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
> > -		(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
> > +	vdso_base += PAGE_SIZE;
> > +	ret =
> > +	   install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
> > +		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
> > +		&vdso_pagelist[1]);
> >   	if (unlikely(ret))
> >   		mm->context.vdso = NULL;
> > @@ -105,7 +105,7 @@ const char *arch_vma_name(struct vm_area_struct *vma)
> >   	if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
> >   		return "[vdso]";
> >   	if (vma->vm_mm && (vma->vm_start ==
> > -			   (long)vma->vm_mm->context.vdso + PAGE_SIZE))
> > +			   (long)vma->vm_mm->context.vdso - PAGE_SIZE))
> >   		return "[vdso_data]";
> >   	return NULL;
> >   }
> > diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
> > index e6f558bca71b..fd8a31075256 100644
> > --- a/arch/riscv/kernel/vdso/vdso.lds.S
> > +++ b/arch/riscv/kernel/vdso/vdso.lds.S
> > @@ -8,7 +8,7 @@ OUTPUT_ARCH(riscv)
> >   SECTIONS
> >   {
> > -	PROVIDE(_vdso_data = . + PAGE_SIZE);
> > +	PROVIDE(_vdso_data = . - PAGE_SIZE);
> >   	. = SIZEOF_HEADERS;
> >   	.hash		: { *(.hash) }			:text
diff mbox series

Patch

diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index 25a3b8849599..0c49390e9be3 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -44,13 +44,13 @@  static int __init vdso_init(void)
 		return -ENOMEM;
 	}
 
+	vdso_pagelist[0] = virt_to_page(vdso_data);
 	for (i = 0; i < vdso_pages; i++) {
 		struct page *pg;
 
 		pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
-		vdso_pagelist[i] = pg;
+		vdso_pagelist[i + 1] = pg;
 	}
-	vdso_pagelist[i] = virt_to_page(vdso_data);
 
 	return 0;
 }
@@ -77,21 +77,21 @@  int arch_setup_additional_pages(struct linux_binprm *bprm,
 	 * install_special_mapping or the perf counter mmap tracking code
 	 * will fail to recognise it as a vDSO (since arch_vma_name fails).
 	 */
-	mm->context.vdso = (void *)vdso_base;
+	mm->context.vdso = (void *)vdso_base + PAGE_SIZE;
 
-	ret =
-	   install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
-		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
-		vdso_pagelist);
+	ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
+		(VM_READ | VM_MAYREAD), &vdso_pagelist[0]);
 
 	if (unlikely(ret)) {
 		mm->context.vdso = NULL;
 		goto end;
 	}
 
-	vdso_base += (vdso_pages << PAGE_SHIFT);
-	ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
-		(VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
+	vdso_base += PAGE_SIZE;
+	ret =
+	   install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+		(VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
+		&vdso_pagelist[1]);
 
 	if (unlikely(ret))
 		mm->context.vdso = NULL;
@@ -105,7 +105,7 @@  const char *arch_vma_name(struct vm_area_struct *vma)
 	if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
 		return "[vdso]";
 	if (vma->vm_mm && (vma->vm_start ==
-			   (long)vma->vm_mm->context.vdso + PAGE_SIZE))
+			   (long)vma->vm_mm->context.vdso - PAGE_SIZE))
 		return "[vdso_data]";
 	return NULL;
 }
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index e6f558bca71b..fd8a31075256 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -8,7 +8,7 @@  OUTPUT_ARCH(riscv)
 
 SECTIONS
 {
-	PROVIDE(_vdso_data = . + PAGE_SIZE);
+	PROVIDE(_vdso_data = . - PAGE_SIZE);
 	. = SIZEOF_HEADERS;
 
 	.hash		: { *(.hash) }			:text