diff mbox

[v18,03/13] arm64/kexec: Add core kexec support

Message ID 84e50e4043a2061e0f5d9273f55e158c59754af0.1465502767.git.geoff@infradead.org (mailing list archive)
State New, archived
Headers show

Commit Message

Geoff Levand June 9, 2016, 8:08 p.m. UTC
Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
arm64 architecture that add support for the kexec re-boot mechanism
(CONFIG_KEXEC) on arm64 platforms.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/Kconfig                  |  10 ++
 arch/arm64/include/asm/kexec.h      |  48 ++++++++++
 arch/arm64/kernel/Makefile          |   2 +
 arch/arm64/kernel/machine_kexec.c   | 185 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S | 131 +++++++++++++++++++++++++
 include/uapi/linux/kexec.h          |   1 +
 6 files changed, 377 insertions(+)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

Comments

James Morse June 15, 2016, 5:10 p.m. UTC | #1
Hi Geoff,

Looks good, I have a few observations and questions below.

On 09/06/16 21:08, Geoff Levand wrote:
> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
> arm64 architecture that add support for the kexec re-boot mechanism
> (CONFIG_KEXEC) on arm64 platforms.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>

> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> new file mode 100644
> index 0000000..05f7c21
> --- /dev/null
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -0,0 +1,185 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + * Copyright (C) Huawei Futurewei Technologies.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/highmem.h>

We don't have/need highmem on arm64. The kmap()/kunmap() calls just obscure what
is going on.


> +#include <linux/kexec.h>
> +#include <linux/of_fdt.h>

What do you need of_fdt.h for? I guess this should be in patch 4.


> +#include <linux/slab.h>

The control page was already allocated, I can't see anything else being
allocated... What do you need slab.h for?


> +#include <linux/smp.h>
> +#include <linux/uaccess.h>

User space access? I guess this should be in patch 4.


> +
> +#include <asm/cacheflush.h>
> +#include <asm/cpu_ops.h>
> +#include <asm/mmu_context.h>
> +#include <asm/system_misc.h>

I can't see anything in system_misc.h that you are using in here.


> +
> +#include "cpu-reset.h"
> +
> +/* Global variables for the arm64_relocate_new_kernel routine. */
> +extern const unsigned char arm64_relocate_new_kernel[];
> +extern const unsigned long arm64_relocate_new_kernel_size;
> +
> +static unsigned long kimage_start;
> +
> +void machine_kexec_cleanup(struct kimage *kimage)
> +{
> +	/* Empty routine needed to avoid build errors. */
> +}
> +
> +/**
> + * machine_kexec_prepare - Prepare for a kexec reboot.
> + *
> + * Called from the core kexec code when a kernel image is loaded.
> + * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
> + * are stuck in the kernel. This avoids a panic once we hit machine_kexec().
> + */
> +int machine_kexec_prepare(struct kimage *kimage)
> +{
> +	kimage_start = kimage->start;
> +
> +	if (kimage->type != KEXEC_TYPE_CRASH) {
> +		if (cpus_are_stuck_in_kernel()) {
> +			pr_err("Can't kexec: failed CPUs are stuck in the kernel.\n");
> +			return -EBUSY;
> +		}
> +
> +		if (num_online_cpus() > 1) {
> +#ifdef CONFIG_HOTPLUG_CPU
> +			/* any_cpu as we don't mind being preempted */
> +			int any_cpu = raw_smp_processor_id();
> +
> +			if (cpu_ops[any_cpu]->cpu_die)
> +				return 0;
> +#endif /* CONFIG_HOTPLUG_CPU */
> +
> +			pr_err("Can't kexec: no mechanism to offline secondary CPUs.\n");
> +			return -EBUSY;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * kexec_list_flush - Helper to flush the kimage list to PoC.
> + */
> +static void kexec_list_flush(struct kimage *kimage)
> +{
> +	kimage_entry_t *entry;
> +	unsigned int flag;
> +
> +	for (entry = &kimage->head, flag = 0; flag != IND_DONE; entry++) {
> +		void *addr = kmap(phys_to_page(*entry & PAGE_MASK));
> +
> +		flag = *entry & IND_FLAGS;
> +
> +		switch (flag) {
> +		case IND_INDIRECTION:
> +			entry = (kimage_entry_t *)addr - 1;

This '-1' is so that entry points before the first entry of the new table,
and is un-done by entry++ next time round the loop...
If I'm right, could you add a comment to that effect? It took me a little while
to work out!

kexec_core.c has a snazzy macro: for_each_kimage_entry(), its a shame its not in
a header file.
This loop does the same but with two variables instead of three. These
IN_INDIRECTION pages only appear at the end of a list, this list-walking looks
correct.


> +			__flush_dcache_area(addr, PAGE_SIZE);

So if we find an indirection pointer, we switch entry to the new page, and clean
it to the PoC, because later we walk this list with the MMU off.

But what cleans the very first page?


> +			break;
> +		case IND_DESTINATION:
> +			break;
> +		case IND_SOURCE:
> +			__flush_dcache_area(addr, PAGE_SIZE);
> +			break;
> +		case IND_DONE:
> +			break;
> +		default:
> +			BUG();

Unless you think its less readable, you could group the clauses together:

> 		case IND_INDIRECTION:
> 			entry = (kimage_entry_t *)addr - 1;
> 		case IND_SOURCE:
> 			__flush_dcache_area(addr, PAGE_SIZE);
> 		case IND_DESTINATION:
> 		case IND_DONE:
> 			break;


> +		}
> +		kunmap(addr);
> +	}
> +}
> +
> +/**
> + * kexec_segment_flush - Helper to flush the kimage segments to PoC.
> + */
> +static void kexec_segment_flush(const struct kimage *kimage)
> +{
> +	unsigned long i;
> +
> +	pr_debug("%s:\n", __func__);
> +
> +	for (i = 0; i < kimage->nr_segments; i++) {
> +		pr_debug("  segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
> +			i,
> +			kimage->segment[i].mem,
> +			kimage->segment[i].mem + kimage->segment[i].memsz,
> +			kimage->segment[i].memsz,
> +			kimage->segment[i].memsz /  PAGE_SIZE);
> +
> +		__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
> +			kimage->segment[i].memsz);
> +	}
> +}
> +
> +/**
> + * machine_kexec - Do the kexec reboot.
> + *
> + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
> + */
> +void machine_kexec(struct kimage *kimage)
> +{
> +	phys_addr_t reboot_code_buffer_phys;
> +	void *reboot_code_buffer;
> +
> +	/*
> +	 * New cpus may have become stuck_in_kernel after we loaded the image.
> +	 */
> +	BUG_ON(cpus_are_stuck_in_kernel() && (num_online_cpus() > 1));
> +
> +	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
> +	reboot_code_buffer = kmap(kimage->control_code_page);
> +
> +	/*
> +	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
> +	 * after the kernel is shut down.
> +	 */
> +	memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
> +		arm64_relocate_new_kernel_size);
> +
> +	/* Flush the reboot_code_buffer in preparation for its execution. */
> +	__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
> +	flush_icache_range((uintptr_t)reboot_code_buffer,
> +		arm64_relocate_new_kernel_size);
> +
> +	/* Flush the kimage list. */
> +	kexec_list_flush(kimage);
> +
> +	/* Flush the new image if already in place. */
> +	if (kimage->head & IND_DONE)
> +		kexec_segment_flush(kimage);
> +
> +	pr_info("Bye!\n");
> +
> +	/* Disable all DAIF exceptions. */
> +	asm volatile ("msr daifset, #0xf" : : : "memory");
> +
> +	/*
> +	 * cpu_soft_restart will shutdown the MMU, disable data caches, then
> +	 * transfer control to the reboot_code_buffer which contains a copy of
> +	 * the arm64_relocate_new_kernel routine.  arm64_relocate_new_kernel
> +	 * uses physical addressing to relocate the new image to its final
> +	 * position and transfers control to the image entry point when the
> +	 * relocation is complete.
> +	 */
> +
> +	cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head,
> +		kimage_start, 0);
> +
> +	BUG(); /* Should never get here. */
> +}
> +
> +void machine_crash_shutdown(struct pt_regs *regs)
> +{
> +	/* Empty routine needed to avoid build errors. */
> +}
> diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
> new file mode 100644
> index 0000000..e380db3
> --- /dev/null
> +++ b/arch/arm64/kernel/relocate_kernel.S
> @@ -0,0 +1,131 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + * Copyright (C) Huawei Futurewei Technologies.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kexec.h>
> +
> +#include <asm/assembler.h>
> +#include <asm/kexec.h>
> +#include <asm/page.h>
> +#include <asm/sysreg.h>
> +
> +/*
> + * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
> + *
> + * The memory that the old kernel occupies may be overwritten when coping the
> + * new image to its final location.  To assure that the
> + * arm64_relocate_new_kernel routine which does that copy is not overwritten,
> + * all code and data needed by arm64_relocate_new_kernel must be between the
> + * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
> + * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
> + * control_code_page, a special page which has been set up to be preserved
> + * during the copy operation.
> + */
> +.globl arm64_relocate_new_kernel
> +arm64_relocate_new_kernel:

All the other asm functions use ENTRY(), which would do the .globl and alignment
for you. (You would need a ENDPROC(arm64_relocate_new_kernel) too.)


> +
> +	/* Setup the list loop variables. */
> +	mov	x18, x1				/* x18 = kimage_start */
> +	mov	x17, x0				/* x17 = kimage_head */
> +	dcache_line_size x16, x0		/* x16 = dcache line size */
> +	mov	x15, xzr			/* x15 = segment start */

What uses this 'segment start'?


> +	mov	x14, xzr			/* x14 = entry ptr */
> +	mov	x13, xzr			/* x13 = copy dest */
> +
> +	/* Clear the sctlr_el2 flags. */
> +	mrs	x0, CurrentEL
> +	cmp	x0, #CurrentEL_EL2
> +	b.ne	1f
> +	mrs	x0, sctlr_el2
> +	ldr	x1, =SCTLR_ELx_FLAGS
> +	bic	x0, x0, x1
> +	msr	sctlr_el2, x0
> +	isb
> +1:
> +
> +	/* Check if the new image needs relocation. */
> +	cbz	x17, .Ldone

Does this happen? Do we ever come across an empty slot in the tables?

kimage_terminate() adds the IND_DONE entry, so we should never see an empty
slot. kexec_list_flush() would BUG() on this too, and we call that
unconditionally on the way in here.


> +	tbnz	x17, IND_DONE_BIT, .Ldone
> +
> +.Lloop:
> +	and	x12, x17, PAGE_MASK		/* x12 = addr */
> +
> +	/* Test the entry flags. */
> +.Ltest_source:
> +	tbz	x17, IND_SOURCE_BIT, .Ltest_indirection
> +
> +	/* Invalidate dest page to PoC. */
> +	mov     x0, x13
> +	add     x20, x0, #PAGE_SIZE
> +	sub     x1, x16, #1
> +	bic     x0, x0, x1
> +2:	dc      ivac, x0

This relies on an IND_DESTINATION being found first for x13 to be set to
something other than 0. I guess if kexec-core hands us a broken list, all bets
are off!


> +	add     x0, x0, x16
> +	cmp     x0, x20
> +	b.lo    2b
> +	dsb     sy
> +
> +	mov x20, x13
> +	mov x21, x12
> +	copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
> +
> +	/* dest += PAGE_SIZE */
> +	add	x13, x13, PAGE_SIZE
> +	b	.Lnext
> +
> +.Ltest_indirection:
> +	tbz	x17, IND_INDIRECTION_BIT, .Ltest_destination
> +
> +	/* ptr = addr */
> +	mov	x14, x12
> +	b	.Lnext
> +
> +.Ltest_destination:
> +	tbz	x17, IND_DESTINATION_BIT, .Lnext
> +
> +	mov	x15, x12

What uses this 'segment start'?


> +
> +	/* dest = addr */
> +	mov	x13, x12
> +
> +.Lnext:
> +	/* entry = *ptr++ */
> +	ldr	x17, [x14], #8
> +
> +	/* while (!(entry & DONE)) */
> +	tbz	x17, IND_DONE_BIT, .Lloop
> +
> +.Ldone:

        /* wait for writes from copy_page to finish */
> +	dsb	nsh
> +	ic	iallu
> +	dsb	nsh
> +	isb
> +
> +	/* Start new image. */
> +	mov	x0, xzr
> +	mov	x1, xzr
> +	mov	x2, xzr
> +	mov	x3, xzr
> +	br	x18
> +
> +.ltorg
> +
> +.align 3	/* To keep the 64-bit values below naturally aligned. */
> +
> +.Lcopy_end:
> +.org	KEXEC_CONTROL_PAGE_SIZE

Why do we need to pad up to KEXEC_CONTROL_PAGE_SIZE?
In machine_kexec() we only copy arm64_relocate_new_kernel_size bytes, so it
shouldn't matter what is here. As far as I can see we don't even access it.


> +
> +/*
> + * arm64_relocate_new_kernel_size - Number of bytes to copy to the
> + * control_code_page.
> + */
> +.globl arm64_relocate_new_kernel_size
> +arm64_relocate_new_kernel_size:
> +	.quad	.Lcopy_end - arm64_relocate_new_kernel


Thanks,

James
Geoff Levand June 16, 2016, 10:41 p.m. UTC | #2
On Wed, 2016-06-15 at 18:10 +0100, James Morse wrote:
> On 09/06/16 21:08, Geoff Levand wrote:
> > +++ b/arch/arm64/kernel/machine_kexec.c
> > @@ -0,0 +1,185 @@
> > +/*
> > + * kexec for arm64
> > + *
> > + * Copyright (C) Linaro.
> > + * Copyright (C) Huawei Futurewei Technologies.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include 
> 
> We don't have/need highmem on arm64. The kmap()/kunmap() calls just obscure what
> is going on.
> 
> 
> > +#include 
> > +#include 
> 
> What do you need of_fdt.h for? I guess this should be in patch 4.
> 
> 
> > +#include 
> 
> The control page was already allocated, I can't see anything else being
> allocated... What do you need slab.h for?
> 
> 
> > +#include 
> > +#include 
> 
> User space access? I guess this should be in patch 4.
> 
> 
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> 
> I can't see anything in system_misc.h that you are using in here.

I cleaned up all these includes.

> > + * kexec_list_flush - Helper to flush the kimage list to PoC.
> > + */
> > +static void kexec_list_flush(struct kimage *kimage)
> > +{
> > +> > 	> > kimage_entry_t *entry;
> > +> > 	> > unsigned int flag;
> > +
> > +> > 	> > for (entry = &kimage->head, flag = 0; flag != IND_DONE; entry++) {
> > +> > 	> > 	> > void *addr = kmap(phys_to_page(*entry & PAGE_MASK));
> > +
> > +> > 	> > 	> > flag = *entry & IND_FLAGS;
> > +
> > +> > 	> > 	> > switch (flag) {
> > +> > 	> > 	> > case IND_INDIRECTION:
> > +> > 	> > 	> > 	> > entry = (kimage_entry_t *)addr - 1;
> 
> This '-1' is so that entry points before the first entry of the new table,
> and is un-done by entry++ next time round the loop...
> If I'm right, could you add a comment to that effect? It took me a little while
> to work out!

I added a comment.

> kexec_core.c has a snazzy macro: for_each_kimage_entry(), its a shame its not in
> a header file.
> This loop does the same but with two variables instead of three. These
> IN_INDIRECTION pages only appear at the end of a list, this list-walking looks
> correct.
> 
> 
> > +> > 	> > 	> > 	> > __flush_dcache_area(addr, PAGE_SIZE);
> 
> So if we find an indirection pointer, we switch entry to the new page, and clean
> it to the PoC, because later we walk this list with the MMU off.
> 
> But what cleans the very first page?

I don't think this routine was doing the quite the right thing.  The
arm64_relocate_new_kernel routine uses the list (the entry's), and
the second stage kernel buffers (the IND_SOURCE's). Those two things
are what should be flushed here.

> > +> > 	> > 	> > 	> > break;
> > +> > 	> > 	> > case IND_DESTINATION:
> > +> > 	> > 	> > 	> > break;
> > +> > 	> > 	> > case IND_SOURCE:
> > +> > 	> > 	> > 	> > __flush_dcache_area(addr, PAGE_SIZE);
> > +> > 	> > 	> > 	> > break;
> > +> > 	> > 	> > case IND_DONE:
> > +> > 	> > 	> > 	> > break;
> > +> > 	> > 	> > default:
> > +> > 	> > 	> > 	> > BUG();
> 
> Unless you think its less readable, you could group the clauses together:

Takahiro found a bug when CONFIG_SPARSEMEM_VMEMMAP=n, and this code
has now been reworked.

> > 	> > 	> > case IND_INDIRECTION:
> > 	> > 	> > 	> > entry = (kimage_entry_t *)addr - 1;
> > 	> > 	> > case IND_SOURCE:
> > 	> > 	> > 	> > __flush_dcache_area(addr, PAGE_SIZE);
> > 	> > 	> > case IND_DESTINATION:
> > 	> > 	> > case IND_DONE:
> > 	> > 	> > 	> > break;
> diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
> > new file mode 100644
> > index 0000000..e380db3
> > --- /dev/null
> > +++ b/arch/arm64/kernel/relocate_kernel.S
> > @@ -0,0 +1,131 @@
> > +.globl arm64_relocate_new_kernel
> > +arm64_relocate_new_kernel:
> 
> All the other asm functions use ENTRY(), which would do the .globl and alignment
> for you. (You would need a ENDPROC(arm64_relocate_new_kernel) too.)

Sure.

> > +
> > +> > 	> > /* Setup the list loop variables. */
> > +> > 	> > mov> > 	> > x18, x1> > 	> > 	> > 	> > 	> > /* x18 = kimage_start */
> > +> > 	> > mov> > 	> > x17, x0> > 	> > 	> > 	> > 	> > /* x17 = kimage_head */
> > +> > 	> > dcache_line_size x16, x0> > 	> > 	> > /* x16 = dcache line size */
> > +> > 	> > mov> > 	> > x15, xzr> > 	> > 	> > 	> > /* x15 = segment start */
> 
> What uses this 'segment start'?

That is left over from when we booted without purgatory (as
the arm arch does).

> > +> > 	> > mov> > 	> > x14, xzr> > 	> > 	> > 	> > /* x14 = entry ptr */
> > +> > 	> > mov> > 	> > x13, xzr> > 	> > 	> > 	> > /* x13 = copy dest */
> > +
> > +> > 	> > /* Clear the sctlr_el2 flags. */
> > +> > 	> > mrs> > 	> > x0, CurrentEL
> > +> > 	> > cmp> > 	> > x0, #CurrentEL_EL2
> > +> > 	> > b.ne> > 	> > 1f
> > +> > 	> > mrs> > 	> > x0, sctlr_el2
> > +> > 	> > ldr> > 	> > x1, =SCTLR_ELx_FLAGS
> > +> > 	> > bic> > 	> > x0, x0, x1
> > +> > 	> > msr> > 	> > sctlr_el2, x0
> > +> > 	> > isb
> > +1:
> > +
> > +> > 	> > /* Check if the new image needs relocation. */
> > +> > 	> > cbz> > 	> > x17, .Ldone
> 
> Does this happen? Do we ever come across an empty slot in the tables?
> 
> kimage_terminate() adds the IND_DONE entry, so we should never see an empty
> slot. kexec_list_flush() would BUG() on this too, and we call that
> unconditionally on the way in here.

I put that in just in case, but never checked if it would
ever actually happen.  I can take it out.

> > +> > 	> > tbnz> > 	> > x17, IND_DONE_BIT, .Ldone
> > +
> > +.Lloop:
> > +> > 	> > and> > 	> > x12, x17, PAGE_MASK> > 	> > 	> > /* x12 = addr */
> > +
> > +> > 	> > /* Test the entry flags. */
> > +.Ltest_source:
> > +> > 	> > tbz> > 	> > x17, IND_SOURCE_BIT, .Ltest_indirection
> > +
> > +> > 	> > /* Invalidate dest page to PoC. */
> > +> > 	> > mov     x0, x13
> > +> > 	> > add     x20, x0, #PAGE_SIZE
> > +> > 	> > sub     x1, x16, #1
> > +> > 	> > bic     x0, x0, x1
> > +2:> > 	> > dc      ivac, x0
> 
> This relies on an IND_DESTINATION being found first for x13 to be set to
> something other than 0. I guess if kexec-core hands us a broken list, all bets
> are off!

Yes, assumed to be IND_DESTINATION.

> > +
> > +.Ldone:
> 
>         /* wait for writes from copy_page to finish */

Added.

> > +	dsb	nsh
> > +> > 	> > ic> > 	> > iallu
> > +> > 	> > dsb> > 	> > nsh
> > +> > 	> > isb
> > +
> > +> > 	> > /* Start new image. */
> > +> > 	> > mov> > 	> > x0, xzr
> > +> > 	> > mov> > 	> > x1, xzr
> > +> > 	> > mov> > 	> > x2, xzr
> > +> > 	> > mov> > 	> > x3, xzr
> > +> > 	> > br> > 	> > x18
> > +
> > +.ltorg
> > +
> > +.align 3> > 	> > /* To keep the 64-bit values below naturally aligned. */
> > +
> > +.Lcopy_end:
> > +.org> > 	> > KEXEC_CONTROL_PAGE_SIZE
> 
> Why do we need to pad up to KEXEC_CONTROL_PAGE_SIZE?
> In machine_kexec() we only copy arm64_relocate_new_kernel_size bytes, so it
> shouldn't matter what is here. As far as I can see we don't even access it.

This is to check if arm64_relocate_new_kernel gets too
big.  The assembler should give an error if the location
counter is set backwards.

-Geoff
diff mbox

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5a0a691..330786d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -664,6 +664,16 @@  config PARAVIRT_TIME_ACCOUNTING
 
 	  If in doubt, say N here.
 
+config KEXEC
+	depends on PM_SLEEP_SMP
+	select KEXEC_CORE
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 0000000..04744dc
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,48 @@ 
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ARM64_KEXEC_H
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE 4096
+
+#define KEXEC_ARCH KEXEC_ARCH_AARCH64
+
+#ifndef __ASSEMBLY__
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2173149..7700c0c 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -46,6 +46,8 @@  arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 arm64-obj-$(CONFIG_PARAVIRT)		+= paravirt.o
 arm64-obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 arm64-obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
+arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
+					   cpu-reset.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 0000000..05f7c21
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,185 @@ 
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/highmem.h>
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
+#include <asm/mmu_context.h>
+#include <asm/system_misc.h>
+
+#include "cpu-reset.h"
+
+/* Global variables for the arm64_relocate_new_kernel routine. */
+extern const unsigned char arm64_relocate_new_kernel[];
+extern const unsigned long arm64_relocate_new_kernel_size;
+
+static unsigned long kimage_start;
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
+ * are stuck in the kernel. This avoids a panic once we hit machine_kexec().
+ */
+int machine_kexec_prepare(struct kimage *kimage)
+{
+	kimage_start = kimage->start;
+
+	if (kimage->type != KEXEC_TYPE_CRASH) {
+		if (cpus_are_stuck_in_kernel()) {
+			pr_err("Can't kexec: failed CPUs are stuck in the kernel.\n");
+			return -EBUSY;
+		}
+
+		if (num_online_cpus() > 1) {
+#ifdef CONFIG_HOTPLUG_CPU
+			/* any_cpu as we don't mind being preempted */
+			int any_cpu = raw_smp_processor_id();
+
+			if (cpu_ops[any_cpu]->cpu_die)
+				return 0;
+#endif /* CONFIG_HOTPLUG_CPU */
+
+			pr_err("Can't kexec: no mechanism to offline secondary CPUs.\n");
+			return -EBUSY;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * kexec_list_flush - Helper to flush the kimage list to PoC.
+ */
+static void kexec_list_flush(struct kimage *kimage)
+{
+	kimage_entry_t *entry;
+	unsigned int flag;
+
+	for (entry = &kimage->head, flag = 0; flag != IND_DONE; entry++) {
+		void *addr = kmap(phys_to_page(*entry & PAGE_MASK));
+
+		flag = *entry & IND_FLAGS;
+
+		switch (flag) {
+		case IND_INDIRECTION:
+			entry = (kimage_entry_t *)addr - 1;
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DESTINATION:
+			break;
+		case IND_SOURCE:
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DONE:
+			break;
+		default:
+			BUG();
+		}
+		kunmap(addr);
+	}
+}
+
+/**
+ * kexec_segment_flush - Helper to flush the kimage segments to PoC.
+ */
+static void kexec_segment_flush(const struct kimage *kimage)
+{
+	unsigned long i;
+
+	pr_debug("%s:\n", __func__);
+
+	for (i = 0; i < kimage->nr_segments; i++) {
+		pr_debug("  segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
+			i,
+			kimage->segment[i].mem,
+			kimage->segment[i].mem + kimage->segment[i].memsz,
+			kimage->segment[i].memsz,
+			kimage->segment[i].memsz /  PAGE_SIZE);
+
+		__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
+			kimage->segment[i].memsz);
+	}
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+void machine_kexec(struct kimage *kimage)
+{
+	phys_addr_t reboot_code_buffer_phys;
+	void *reboot_code_buffer;
+
+	/*
+	 * New cpus may have become stuck_in_kernel after we loaded the image.
+	 */
+	BUG_ON(cpus_are_stuck_in_kernel() && (num_online_cpus() > 1));
+
+	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
+	reboot_code_buffer = kmap(kimage->control_code_page);
+
+	/*
+	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
+	 * after the kernel is shut down.
+	 */
+	memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
+		arm64_relocate_new_kernel_size);
+
+	/* Flush the reboot_code_buffer in preparation for its execution. */
+	__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
+	flush_icache_range((uintptr_t)reboot_code_buffer,
+		arm64_relocate_new_kernel_size);
+
+	/* Flush the kimage list. */
+	kexec_list_flush(kimage);
+
+	/* Flush the new image if already in place. */
+	if (kimage->head & IND_DONE)
+		kexec_segment_flush(kimage);
+
+	pr_info("Bye!\n");
+
+	/* Disable all DAIF exceptions. */
+	asm volatile ("msr daifset, #0xf" : : : "memory");
+
+	/*
+	 * cpu_soft_restart will shutdown the MMU, disable data caches, then
+	 * transfer control to the reboot_code_buffer which contains a copy of
+	 * the arm64_relocate_new_kernel routine.  arm64_relocate_new_kernel
+	 * uses physical addressing to relocate the new image to its final
+	 * position and transfers control to the image entry point when the
+	 * relocation is complete.
+	 */
+
+	cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head,
+		kimage_start, 0);
+
+	BUG(); /* Should never get here. */
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 0000000..e380db3
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,131 @@ 
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+#include <asm/page.h>
+#include <asm/sysreg.h>
+
+/*
+ * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new image to its final location.  To assure that the
+ * arm64_relocate_new_kernel routine which does that copy is not overwritten,
+ * all code and data needed by arm64_relocate_new_kernel must be between the
+ * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
+ * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
+ * control_code_page, a special page which has been set up to be preserved
+ * during the copy operation.
+ */
+.globl arm64_relocate_new_kernel
+arm64_relocate_new_kernel:
+
+	/* Setup the list loop variables. */
+	mov	x18, x1				/* x18 = kimage_start */
+	mov	x17, x0				/* x17 = kimage_head */
+	dcache_line_size x16, x0		/* x16 = dcache line size */
+	mov	x15, xzr			/* x15 = segment start */
+	mov	x14, xzr			/* x14 = entry ptr */
+	mov	x13, xzr			/* x13 = copy dest */
+
+	/* Clear the sctlr_el2 flags. */
+	mrs	x0, CurrentEL
+	cmp	x0, #CurrentEL_EL2
+	b.ne	1f
+	mrs	x0, sctlr_el2
+	ldr	x1, =SCTLR_ELx_FLAGS
+	bic	x0, x0, x1
+	msr	sctlr_el2, x0
+	isb
+1:
+
+	/* Check if the new image needs relocation. */
+	cbz	x17, .Ldone
+	tbnz	x17, IND_DONE_BIT, .Ldone
+
+.Lloop:
+	and	x12, x17, PAGE_MASK		/* x12 = addr */
+
+	/* Test the entry flags. */
+.Ltest_source:
+	tbz	x17, IND_SOURCE_BIT, .Ltest_indirection
+
+	/* Invalidate dest page to PoC. */
+	mov     x0, x13
+	add     x20, x0, #PAGE_SIZE
+	sub     x1, x16, #1
+	bic     x0, x0, x1
+2:	dc      ivac, x0
+	add     x0, x0, x16
+	cmp     x0, x20
+	b.lo    2b
+	dsb     sy
+
+	mov x20, x13
+	mov x21, x12
+	copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7
+
+	/* dest += PAGE_SIZE */
+	add	x13, x13, PAGE_SIZE
+	b	.Lnext
+
+.Ltest_indirection:
+	tbz	x17, IND_INDIRECTION_BIT, .Ltest_destination
+
+	/* ptr = addr */
+	mov	x14, x12
+	b	.Lnext
+
+.Ltest_destination:
+	tbz	x17, IND_DESTINATION_BIT, .Lnext
+
+	mov	x15, x12
+
+	/* dest = addr */
+	mov	x13, x12
+
+.Lnext:
+	/* entry = *ptr++ */
+	ldr	x17, [x14], #8
+
+	/* while (!(entry & DONE)) */
+	tbz	x17, IND_DONE_BIT, .Lloop
+
+.Ldone:
+	dsb	nsh
+	ic	iallu
+	dsb	nsh
+	isb
+
+	/* Start new image. */
+	mov	x0, xzr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x18
+
+.ltorg
+
+.align 3	/* To keep the 64-bit values below naturally aligned. */
+
+.Lcopy_end:
+.org	KEXEC_CONTROL_PAGE_SIZE
+
+/*
+ * arm64_relocate_new_kernel_size - Number of bytes to copy to the
+ * control_code_page.
+ */
+.globl arm64_relocate_new_kernel_size
+arm64_relocate_new_kernel_size:
+	.quad	.Lcopy_end - arm64_relocate_new_kernel
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 99048e5..aae5ebf 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -39,6 +39,7 @@ 
 #define KEXEC_ARCH_SH      (42 << 16)
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
+#define KEXEC_ARCH_AARCH64 (183 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16