diff mbox

arm64: Support VDSO remapping

Message ID 1448455781-26660-1-git-send-email-cov@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Christopher Covington Nov. 25, 2015, 12:49 p.m. UTC
Some applications such as Checkpoint/Restore In Userspace (CRIU) remap and
unmap the VDSO. Add support for this to the AArch64 kernel by copying in
the PowerPC code with slight modifications.

Signed-off-by: Christopher Covington <cov@codeaurora.org>
---
 arch/arm64/include/asm/Kbuild          |  1 -
 arch/arm64/include/asm/mm-arch-hooks.h | 28 ++++++++++++++++++++++++++++
 arch/arm64/include/asm/mmu_context.h   | 23 ++++++++++++++++++++++-
 3 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/include/asm/mm-arch-hooks.h

Comments

Will Deacon Dec. 2, 2015, 12:19 p.m. UTC | #1
Hi Christopher,

On Wed, Nov 25, 2015 at 07:49:29AM -0500, Christopher Covington wrote:
> Some applications such as Checkpoint/Restore In Userspace (CRIU) remap and
> unmap the VDSO. Add support for this to the AArch64 kernel by copying in
> the PowerPC code with slight modifications.
> 
> Signed-off-by: Christopher Covington <cov@codeaurora.org>
> ---
>  arch/arm64/include/asm/Kbuild          |  1 -
>  arch/arm64/include/asm/mm-arch-hooks.h | 28 ++++++++++++++++++++++++++++
>  arch/arm64/include/asm/mmu_context.h   | 23 ++++++++++++++++++++++-
>  3 files changed, 50 insertions(+), 2 deletions(-)
>  create mode 100644 arch/arm64/include/asm/mm-arch-hooks.h
> 
> diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
> index 70fd9ff..b112a39 100644
> --- a/arch/arm64/include/asm/Kbuild
> +++ b/arch/arm64/include/asm/Kbuild
> @@ -25,7 +25,6 @@ generic-y += kvm_para.h
>  generic-y += local.h
>  generic-y += local64.h
>  generic-y += mcs_spinlock.h
> -generic-y += mm-arch-hooks.h
>  generic-y += mman.h
>  generic-y += msgbuf.h
>  generic-y += msi.h
> diff --git a/arch/arm64/include/asm/mm-arch-hooks.h b/arch/arm64/include/asm/mm-arch-hooks.h
> new file mode 100644
> index 0000000..e6923fb
> --- /dev/null
> +++ b/arch/arm64/include/asm/mm-arch-hooks.h
> @@ -0,0 +1,28 @@
> +/*
> + * Architecture specific mm hooks
> + *
> + * Copyright (C) 2015, IBM Corporation
> + * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef __ASM_MM_ARCH_HOOKS_H
> +#define __ASM_MM_ARCH_HOOKS_H
> +
> +static inline void arch_remap(struct mm_struct *mm,
> +			      unsigned long old_start, unsigned long old_end,
> +			      unsigned long new_start, unsigned long new_end)
> +{
> +	/*
> +	 * mremap() doesn't allow moving multiple vmas so we can limit the
> +	 * check to old_start == vdso_base.
> +	 */
> +	if ((void *)old_start == mm->context.vdso)
> +		mm->context.vdso = (void *)new_start;
> +}
> +#define arch_remap arch_remap
> +
> +#endif /* __ASM_MM_ARCH_HOOKS_H */
> diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
> index 2416578..9fe0773 100644
> --- a/arch/arm64/include/asm/mmu_context.h
> +++ b/arch/arm64/include/asm/mmu_context.h
> @@ -24,7 +24,6 @@
>  
>  #include <asm/cacheflush.h>
>  #include <asm/proc-fns.h>
> -#include <asm-generic/mm_hooks.h>
>  #include <asm/cputype.h>
>  #include <asm/pgtable.h>
>  
> @@ -147,4 +146,26 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
>  #define deactivate_mm(tsk,mm)	do { } while (0)
>  #define activate_mm(prev,next)	switch_mm(prev, next, NULL)
>  
> +static inline void arch_dup_mmap(struct mm_struct *oldmm,
> +				      struct mm_struct *mm)
> +{
> +}
> +
> +static inline void arch_exit_mmap(struct mm_struct *mm)
> +{
> +}
> +
> +static inline void arch_unmap(struct mm_struct *mm,
> +			      struct vm_area_struct *vma,
> +			      unsigned long start, unsigned long end)
> +{
> +	if ((void *)start <= mm->context.vdso && mm->context.vdso < (void *)end)
> +		mm->context.vdso = NULL;
> +}
> +
> +static inline void arch_bprm_mm_init(struct mm_struct *mm,
> +				     struct vm_area_struct *vma)
> +{
> +}

This is virtually identical to the PowerPC implementation, afaict. Any
chance we could move this to core code and define some generic vdso
accessors for the mm?

Will
Christopher Covington Dec. 3, 2015, 6:05 p.m. UTC | #2
On 12/02/2015 07:19 AM, Will Deacon wrote:
> Hi Christopher,
> 
> On Wed, Nov 25, 2015 at 07:49:29AM -0500, Christopher Covington wrote:
>> Some applications such as Checkpoint/Restore In Userspace (CRIU) remap and
>> unmap the VDSO. Add support for this to the AArch64 kernel by copying in
>> the PowerPC code with slight modifications.
>>
>> Signed-off-by: Christopher Covington <cov@codeaurora.org>
>> ---
>>  arch/arm64/include/asm/Kbuild          |  1 -
>>  arch/arm64/include/asm/mm-arch-hooks.h | 28 ++++++++++++++++++++++++++++
>>  arch/arm64/include/asm/mmu_context.h   | 23 ++++++++++++++++++++++-
>>  3 files changed, 50 insertions(+), 2 deletions(-)
>>  create mode 100644 arch/arm64/include/asm/mm-arch-hooks.h
>>
>> diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
>> index 70fd9ff..b112a39 100644
>> --- a/arch/arm64/include/asm/Kbuild
>> +++ b/arch/arm64/include/asm/Kbuild
>> @@ -25,7 +25,6 @@ generic-y += kvm_para.h
>>  generic-y += local.h
>>  generic-y += local64.h
>>  generic-y += mcs_spinlock.h
>> -generic-y += mm-arch-hooks.h
>>  generic-y += mman.h
>>  generic-y += msgbuf.h
>>  generic-y += msi.h
>> diff --git a/arch/arm64/include/asm/mm-arch-hooks.h b/arch/arm64/include/asm/mm-arch-hooks.h
>> new file mode 100644
>> index 0000000..e6923fb
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/mm-arch-hooks.h
>> @@ -0,0 +1,28 @@
>> +/*
>> + * Architecture specific mm hooks
>> + *
>> + * Copyright (C) 2015, IBM Corporation
>> + * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#ifndef __ASM_MM_ARCH_HOOKS_H
>> +#define __ASM_MM_ARCH_HOOKS_H
>> +
>> +static inline void arch_remap(struct mm_struct *mm,
>> +			      unsigned long old_start, unsigned long old_end,
>> +			      unsigned long new_start, unsigned long new_end)
>> +{
>> +	/*
>> +	 * mremap() doesn't allow moving multiple vmas so we can limit the
>> +	 * check to old_start == vdso_base.
>> +	 */
>> +	if ((void *)old_start == mm->context.vdso)
>> +		mm->context.vdso = (void *)new_start;
>> +}
>> +#define arch_remap arch_remap
>> +
>> +#endif /* __ASM_MM_ARCH_HOOKS_H */
>> diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
>> index 2416578..9fe0773 100644
>> --- a/arch/arm64/include/asm/mmu_context.h
>> +++ b/arch/arm64/include/asm/mmu_context.h
>> @@ -24,7 +24,6 @@
>>  
>>  #include <asm/cacheflush.h>
>>  #include <asm/proc-fns.h>
>> -#include <asm-generic/mm_hooks.h>
>>  #include <asm/cputype.h>
>>  #include <asm/pgtable.h>
>>  
>> @@ -147,4 +146,26 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
>>  #define deactivate_mm(tsk,mm)	do { } while (0)
>>  #define activate_mm(prev,next)	switch_mm(prev, next, NULL)
>>  
>> +static inline void arch_dup_mmap(struct mm_struct *oldmm,
>> +				      struct mm_struct *mm)
>> +{
>> +}
>> +
>> +static inline void arch_exit_mmap(struct mm_struct *mm)
>> +{
>> +}
>> +
>> +static inline void arch_unmap(struct mm_struct *mm,
>> +			      struct vm_area_struct *vma,
>> +			      unsigned long start, unsigned long end)
>> +{
>> +	if ((void *)start <= mm->context.vdso && mm->context.vdso < (void *)end)
>> +		mm->context.vdso = NULL;
>> +}
>> +
>> +static inline void arch_bprm_mm_init(struct mm_struct *mm,
>> +				     struct vm_area_struct *vma)
>> +{
>> +}
> 
> This is virtually identical to the PowerPC implementation, afaict. Any
> chance we could move this to core code and define some generic vdso
> accessors for the mm?

I think the same criticism applies to the VDSO code itself, and I'm not
sure how easy it is to provide common VDSO remap/unmap without common
VDSO code. I guess I'll do some investigating when I have then chance.

Thanks,
Christopher Covington
Christopher Covington April 28, 2016, 3:18 p.m. UTC | #3
Please take a look at the following prototype of sharing the PowerPC
VDSO unmap and remap code with other architectures. I've only hooked
up arm64 to begin with. If folks think this is a reasonable approach I
can work on 32 bit ARM as well. Not hearing back from an earlier
request for guidance [1], I simply dove in and started hacking.
Laurent's test case [2][3] is a compelling illustration of whether VDSO
remap works or not on a given architecture.

1. https://lkml.org/lkml/2016/3/2/225
2. https://lists.openvz.org/pipermail/criu/2015-March/019161.html
3. http://lists.openvz.org/pipermail/criu/attachments/20150318/f02ed9ea/attachment.bin

Thanks,
Christopher Covington
Andy Lutomirski April 28, 2016, 6:53 p.m. UTC | #4
On 04/28/2016 08:18 AM, Christopher Covington wrote:
> Please take a look at the following prototype of sharing the PowerPC
> VDSO unmap and remap code with other architectures. I've only hooked
> up arm64 to begin with. If folks think this is a reasonable approach I
> can work on 32 bit ARM as well. Not hearing back from an earlier
> request for guidance [1], I simply dove in and started hacking.
> Laurent's test case [2][3] is a compelling illustration of whether VDSO
> remap works or not on a given architecture.

I think there's a much nicer way:

https://lkml.kernel.org/r/1461584223-9418-1-git-send-email-dsafonov@virtuozzo.com

Could arm64 and ppc use this approach?  These arch_xyz hooks are gross.

Also, at some point, possibly quite soon, x86 will want a way for user code to ask the kernel to map a specific vdso variant at a specific address.  Could we perhaps add a new pair of syscalls:

struct vdso_info {
    unsigned long space_needed_before;
    unsigned long space_needed_after;
    unsigned long alignment;
};

long vdso_get_info(unsigned int vdso_type, struct vdso_info *info);

long vdso_remap(unsigned int vdso_type, unsigned long addr, unsigned int flags);

#define VDSO_X86_I386 0
#define VDSO_X86_64 1
#define VDSO_X86_X32 2
// etc.

vdso_remap will map the vdso of the chosen type such at AT_SYSINFO_EHDR lines up with addr.  It will use up to space_needed_before bytes before that address and space_needed_after after than address.  It will also unmap the old vdso (or maybe only do that if some flag is set).

On x86, mremap is *not* sufficient for everything that's needed, because some programs will need to change the vdso type.

--Andy
Christopher Covington April 29, 2016, 1:22 p.m. UTC | #5
Hi Andy,

On 04/28/2016 02:53 PM, Andy Lutomirski wrote:
> On 04/28/2016 08:18 AM, Christopher Covington wrote:
>> Please take a look at the following prototype of sharing the PowerPC
>> VDSO unmap and remap code with other architectures. I've only hooked
>> up arm64 to begin with. If folks think this is a reasonable approach I
>> can work on 32 bit ARM as well. Not hearing back from an earlier
>> request for guidance [1], I simply dove in and started hacking.
>> Laurent's test case [2][3] is a compelling illustration of whether VDSO
>> remap works or not on a given architecture.
> 
> I think there's a much nicer way:
> 
> https://lkml.kernel.org/r/1461584223-9418-1-git-send-email-dsafonov@virtuozzo.com
> 
> Could arm64 and ppc use this approach?  These arch_xyz hooks are gross.

Thanks for the pointer. Any thoughts on how to keep essentially
identical definitions of vdso_mremap from proliferating into every
architecture and variant?

> Also, at some point, possibly quite soon, x86 will want a way for
> user code to ask the kernel to map a specific vdso variant at a specific
> address. Could we perhaps add a new pair of syscalls:
> 
> struct vdso_info {
>     unsigned long space_needed_before;
>     unsigned long space_needed_after;
>     unsigned long alignment;
> };
> 
> long vdso_get_info(unsigned int vdso_type, struct vdso_info *info);
> 
> long vdso_remap(unsigned int vdso_type, unsigned long addr, unsigned int flags);
> 
> #define VDSO_X86_I386 0
> #define VDSO_X86_64 1
> #define VDSO_X86_X32 2
> // etc.
> 
> vdso_remap will map the vdso of the chosen type such at
> AT_SYSINFO_EHDR lines up with addr. It will use up to
> space_needed_before bytes before that address and space_needed_after
> after than address. It will also unmap the old vdso (or maybe only do
> that if some flag is set).
> 
> On x86, mremap is *not* sufficient for everything that's needed,
> because some programs will need to change the vdso type.

I don't I understand. Why can't people just exec() the ELF type that
corresponds to the VDSO they want?

Thanks,
Cov
Dmitry Safonov April 29, 2016, 1:55 p.m. UTC | #6
On 04/29/2016 04:22 PM, Christopher Covington wrote:
> On 04/28/2016 02:53 PM, Andy Lutomirski wrote:
>> Also, at some point, possibly quite soon, x86 will want a way for
>> user code to ask the kernel to map a specific vdso variant at a specific
>> address. Could we perhaps add a new pair of syscalls:
>>
>> struct vdso_info {
>>      unsigned long space_needed_before;
>>      unsigned long space_needed_after;
>>      unsigned long alignment;
>> };
>>
>> long vdso_get_info(unsigned int vdso_type, struct vdso_info *info);
>>
>> long vdso_remap(unsigned int vdso_type, unsigned long addr, unsigned int flags);
>>
>> #define VDSO_X86_I386 0
>> #define VDSO_X86_64 1
>> #define VDSO_X86_X32 2
>> // etc.
>>
>> vdso_remap will map the vdso of the chosen type such at
>> AT_SYSINFO_EHDR lines up with addr. It will use up to
>> space_needed_before bytes before that address and space_needed_after
>> after than address. It will also unmap the old vdso (or maybe only do
>> that if some flag is set).
>>
>> On x86, mremap is *not* sufficient for everything that's needed,
>> because some programs will need to change the vdso type.
> I don't I understand. Why can't people just exec() the ELF type that
> corresponds to the VDSO they want?

I may say about my needs in it: to not lose all the existing
information in application.
Imagine you're restoring a container with 64-bit and 32-bit
applications (in compatible mode). So you need somehow
switch vdso type in restorer for a 32-bit application.
Yes, you may exec() and then - all already restored application
properties will got lost. You will need to transpher information
about mappings, make protocol between restorer binary
and main criu application, finally you'll end up with some
really much more difficult architecture than it is now.
And it'll be slower.

Also it's pretty logical: if one can switch between modes,
why can't he change vdso mapping to mode he got to?
(note: if the work about removing thread compatible flags
will be done (on x86), there will not even be such a thing,
as application mode - just difference on which syscalls it
uses: compatible or native).

Thanks,
     Dmitry Safonov
Christopher Covington May 3, 2016, 9:37 p.m. UTC | #7
On 04/29/2016 09:55 AM, Dmitry Safonov wrote:
> On 04/29/2016 04:22 PM, Christopher Covington wrote:
>> On 04/28/2016 02:53 PM, Andy Lutomirski wrote:
>>> Also, at some point, possibly quite soon, x86 will want a way for
>>> user code to ask the kernel to map a specific vdso variant at a specific
>>> address. Could we perhaps add a new pair of syscalls:
>>>
>>> struct vdso_info {
>>>      unsigned long space_needed_before;
>>>      unsigned long space_needed_after;
>>>      unsigned long alignment;
>>> };
>>>
>>> long vdso_get_info(unsigned int vdso_type, struct vdso_info *info);
>>>
>>> long vdso_remap(unsigned int vdso_type, unsigned long addr, unsigned
>>> int flags);
>>>
>>> #define VDSO_X86_I386 0
>>> #define VDSO_X86_64 1
>>> #define VDSO_X86_X32 2
>>> // etc.
>>>
>>> vdso_remap will map the vdso of the chosen type such at
>>> AT_SYSINFO_EHDR lines up with addr. It will use up to
>>> space_needed_before bytes before that address and space_needed_after
>>> after than address. It will also unmap the old vdso (or maybe only do
>>> that if some flag is set).
>>>
>>> On x86, mremap is *not* sufficient for everything that's needed,
>>> because some programs will need to change the vdso type.
>> I don't I understand. Why can't people just exec() the ELF type that
>> corresponds to the VDSO they want?
> 
> I may say about my needs in it: to not lose all the existing
> information in application.
> Imagine you're restoring a container with 64-bit and 32-bit
> applications (in compatible mode). So you need somehow
> switch vdso type in restorer for a 32-bit application.
> Yes, you may exec() and then - all already restored application
> properties will got lost. You will need to transpher information
> about mappings, make protocol between restorer binary
> and main criu application, finally you'll end up with some
> really much more difficult architecture than it is now.
> And it'll be slower.

Perhaps a more modest exec based strategy would be for x86_64 criu to
handle all of the x86_64 restores as usual but exec i386 and/or x32 criu
service daemons and use them for restoring applications needing those ABIs.

Regards,
Christopher Covington
diff mbox

Patch

diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 70fd9ff..b112a39 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -25,7 +25,6 @@  generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
-generic-y += mm-arch-hooks.h
 generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += msi.h
diff --git a/arch/arm64/include/asm/mm-arch-hooks.h b/arch/arm64/include/asm/mm-arch-hooks.h
new file mode 100644
index 0000000..e6923fb
--- /dev/null
+++ b/arch/arm64/include/asm/mm-arch-hooks.h
@@ -0,0 +1,28 @@ 
+/*
+ * Architecture specific mm hooks
+ *
+ * Copyright (C) 2015, IBM Corporation
+ * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_MM_ARCH_HOOKS_H
+#define __ASM_MM_ARCH_HOOKS_H
+
+static inline void arch_remap(struct mm_struct *mm,
+			      unsigned long old_start, unsigned long old_end,
+			      unsigned long new_start, unsigned long new_end)
+{
+	/*
+	 * mremap() doesn't allow moving multiple vmas so we can limit the
+	 * check to old_start == vdso_base.
+	 */
+	if ((void *)old_start == mm->context.vdso)
+		mm->context.vdso = (void *)new_start;
+}
+#define arch_remap arch_remap
+
+#endif /* __ASM_MM_ARCH_HOOKS_H */
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 2416578..9fe0773 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -24,7 +24,6 @@ 
 
 #include <asm/cacheflush.h>
 #include <asm/proc-fns.h>
-#include <asm-generic/mm_hooks.h>
 #include <asm/cputype.h>
 #include <asm/pgtable.h>
 
@@ -147,4 +146,26 @@  switch_mm(struct mm_struct *prev, struct mm_struct *next,
 #define deactivate_mm(tsk,mm)	do { } while (0)
 #define activate_mm(prev,next)	switch_mm(prev, next, NULL)
 
+static inline void arch_dup_mmap(struct mm_struct *oldmm,
+				      struct mm_struct *mm)
+{
+}
+
+static inline void arch_exit_mmap(struct mm_struct *mm)
+{
+}
+
+static inline void arch_unmap(struct mm_struct *mm,
+			      struct vm_area_struct *vma,
+			      unsigned long start, unsigned long end)
+{
+	if ((void *)start <= mm->context.vdso && mm->context.vdso < (void *)end)
+		mm->context.vdso = NULL;
+}
+
+static inline void arch_bprm_mm_init(struct mm_struct *mm,
+				     struct vm_area_struct *vma)
+{
+}
+
 #endif