diff mbox

[v12,08/16] arm64/kexec: Add core kexec support

Message ID 6ac232ad37d6b02cf2d5848b15236f26f5ac61ac.1448403503.git.geoff@infradead.org (mailing list archive)
State New, archived
Headers show

Commit Message

Geoff Levand Nov. 24, 2015, 10:25 p.m. UTC
Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
arm64 architecture that add support for the kexec re-boot mechanism
(CONFIG_KEXEC) on arm64 platforms.

Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 arch/arm64/Kconfig                  |  10 +++
 arch/arm64/include/asm/kexec.h      |  48 ++++++++++++
 arch/arm64/kernel/Makefile          |   2 +
 arch/arm64/kernel/machine_kexec.c   | 152 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S | 131 +++++++++++++++++++++++++++++++
 include/uapi/linux/kexec.h          |   1 +
 6 files changed, 344 insertions(+)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

Comments

Pratyush Anand Nov. 27, 2015, 1:13 p.m. UTC | #1
Hi Geoff,

On 24/11/2015:10:25:34 PM, Geoff Levand wrote:
> +	/* Test the entry flags. */
> +.Ltest_source:
> +	tbz	x18, IND_SOURCE_BIT, .Ltest_indirection
> +
> +	mov x20, x13				/*  x20 = copy dest */
> +	mov x21, x12				/*  x21 = copy src */

Till v10 we had here invalidation for relocated destination page to PoC. I could
not understand, why it was removed. Removing that piece of code breaks kexec
booting with mustang. I need [1] to kexec boot into second kernel with mustang
platform.

[1]
https://github.com/pratyushanand/linux/commit/431e3247391981a1e8b2864a83a5743e8a274cb9

~Pratyush
Geoff Levand Nov. 30, 2015, 6:51 p.m. UTC | #2
Hi,

On Fri, 2015-11-27 at 18:43 +0530, Pratyush Anand wrote:

> On 24/11/2015:10:25:34 PM, Geoff Levand wrote:
> > +> > 	> > /* Test the entry flags. */
> > +.Ltest_source:
> > +> > 	> > tbz> > 	> > x18, IND_SOURCE_BIT, .Ltest_indirection
> > +
> > +> > 	> > mov x20, x13> > 	> > 	> > 	> > 	> > /*  x20 = copy dest */
> > +> > 	> > mov x21, x12> > 	> > 	> > 	> > 	> > /*  x21 = copy src */
> 
> Till v10 we had here invalidation for relocated destination page to PoC. I could
> not understand, why it was removed. Removing that piece of code breaks kexec
> booting with mustang. I need [1] to kexec boot into second kernel with mustang
> platform.

We need to flush the new kernel to PoC.  The code that was here that
was doing that would only be executed when the new kernel needed
relocation (the standard kexec case).  We also need to flush kernels
that do not need relocation (the standard kdump case).

I moved the new kernel flush to kexec_segment_flush(), called
unconditionally in machine_kexec() so we can handle both cases
with one piece of code.

Have you experienced a problem on mustang with the current version?

-Geoff
Pratyush Anand Dec. 1, 2015, 2:16 a.m. UTC | #3
Hi Geoff,

On 30/11/2015:10:51:15 AM, Geoff Levand wrote:
> Hi,
> 
> On Fri, 2015-11-27 at 18:43 +0530, Pratyush Anand wrote:
> 
> > On 24/11/2015:10:25:34 PM, Geoff Levand wrote:
> > > +> > 	> > /* Test the entry flags. */
> > > +.Ltest_source:
> > > +> > 	> > tbz> > 	> > x18, IND_SOURCE_BIT, .Ltest_indirection
> > > +
> > > +> > 	> > mov x20, x13> > 	> > 	> > 	> > 	> > /*  x20 = copy dest */
> > > +> > 	> > mov x21, x12> > 	> > 	> > 	> > 	> > /*  x21 = copy src */
> > 
> > Till v10 we had here invalidation for relocated destination page to PoC. I could
> > not understand, why it was removed. Removing that piece of code breaks kexec
> > booting with mustang. I need [1] to kexec boot into second kernel with mustang
> > platform.
> 
> We need to flush the new kernel to PoC.  The code that was here that
> was doing that would only be executed when the new kernel needed
> relocation (the standard kexec case).  We also need to flush kernels
> that do not need relocation (the standard kdump case).
> 
> I moved the new kernel flush to kexec_segment_flush(), called
> unconditionally in machine_kexec() so we can handle both cases
> with one piece of code.

Yes, I had noticed that. Actually flushing before cache is disabled can always
cause heisenbug.

> 
> Have you experienced a problem on mustang with the current version?

Yes, v10 works fine, but I need invalidation fix for both v11 and v12 to work on
mustang.
I have not tested vanilla v12 on seattle, but v12 ported on rhelsa needs
invalidation fix to work on seattle as well.

~Pratyush
Azriel Samson Dec. 1, 2015, 6:32 p.m. UTC | #4
Hi,

I tested with v11 on our platform and I too needed the invalidation fix.

Thanks,
Azriel Samson
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, 
a Linux Foundation Collaborative Project

-----Original Message-----
From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of Pratyush
Anand
Sent: Monday, November 30, 2015 7:16 PM
To: Geoff Levand
Cc: Mark Rutland; marc.zyngier@arm.com; Catalin Marinas; Will Deacon; AKASHI
Takahiro; linux-arm-kernel@lists.infradead.org; Dave Young;
kexec@lists.infradead.org; christoffer.dall@linaro.org
Subject: Re: [PATCH v12 08/16] arm64/kexec: Add core kexec support

Hi Geoff,

On 30/11/2015:10:51:15 AM, Geoff Levand wrote:
> Hi,
> 
> On Fri, 2015-11-27 at 18:43 +0530, Pratyush Anand wrote:
> 
> > On 24/11/2015:10:25:34 PM, Geoff Levand wrote:
> > > +> > 	> > /* Test the entry flags. */
> > > +.Ltest_source:
> > > +> > 	> > tbz> > 	> > x18, IND_SOURCE_BIT, .Ltest_indirection
> > > +
> > > +> > 	> > mov x20, x13> > 	> > 	> > 	> > 	> > /*  x20
= copy dest */
> > > +> > 	> > mov x21, x12> > 	> > 	> > 	> > 	> > /*  x21
= copy src */
> > 
> > Till v10 we had here invalidation for relocated destination page to 
> > PoC. I could not understand, why it was removed. Removing that piece 
> > of code breaks kexec booting with mustang. I need [1] to kexec boot 
> > into second kernel with mustang platform.
> 
> We need to flush the new kernel to PoC.  The code that was here that 
> was doing that would only be executed when the new kernel needed 
> relocation (the standard kexec case).  We also need to flush kernels 
> that do not need relocation (the standard kdump case).
> 
> I moved the new kernel flush to kexec_segment_flush(), called 
> unconditionally in machine_kexec() so we can handle both cases with 
> one piece of code.

Yes, I had noticed that. Actually flushing before cache is disabled can
always cause heisenbug.

> 
> Have you experienced a problem on mustang with the current version?

Yes, v10 works fine, but I need invalidation fix for both v11 and v12 to
work on mustang.
I have not tested vanilla v12 on seattle, but v12 ported on rhelsa needs
invalidation fix to work on seattle as well.

~Pratyush
Mark Rutland Dec. 1, 2015, 7:03 p.m. UTC | #5
On Tue, Dec 01, 2015 at 07:46:15AM +0530, Pratyush Anand wrote:
> Hi Geoff,
> 
> On 30/11/2015:10:51:15 AM, Geoff Levand wrote:
> > Hi,
> > 
> > On Fri, 2015-11-27 at 18:43 +0530, Pratyush Anand wrote:
> > 
> > > On 24/11/2015:10:25:34 PM, Geoff Levand wrote:
> > > > +> > 	> > /* Test the entry flags. */
> > > > +.Ltest_source:
> > > > +> > 	> > tbz> > 	> > x18, IND_SOURCE_BIT, .Ltest_indirection
> > > > +
> > > > +> > 	> > mov x20, x13> > 	> > 	> > 	> > 	> > /*  x20 = copy dest */
> > > > +> > 	> > mov x21, x12> > 	> > 	> > 	> > 	> > /*  x21 = copy src */
> > > 
> > > Till v10 we had here invalidation for relocated destination page to PoC. I could
> > > not understand, why it was removed. Removing that piece of code breaks kexec
> > > booting with mustang. I need [1] to kexec boot into second kernel with mustang
> > > platform.
> > 
> > We need to flush the new kernel to PoC.  The code that was here that
> > was doing that would only be executed when the new kernel needed
> > relocation (the standard kexec case).  We also need to flush kernels
> > that do not need relocation (the standard kdump case).
> > 
> > I moved the new kernel flush to kexec_segment_flush(), called
> > unconditionally in machine_kexec() so we can handle both cases
> > with one piece of code.
> 
> Yes, I had noticed that. Actually flushing before cache is disabled can always
> cause heisenbug.

Please use "clean", "invalidate", or "clean+invalidate" rather than
"flush". The latter is ambiguous and misleading.

You can validly perform maintenance while the cache may allocate for a
region of memory, it's just that afterwards the cache may hold a clean
entries for that region.

You can clean/clean+invalidate to push data to the PoC, or you can
invalidate/clean+invalidate to ensure that no asynchronous writebacks
occur later (so long as you do not make a cacheable write to said
cacheline).

The only thing that you cannot guarantee is that there is not some clean
cacheline allocated for a region of memory to which cacheable accesses
may be performed.

Note that the kernel only requires its Image to be clean to the PoC. So
long as this is true, we know that there will not be asynchrnoous
writebacks, and can invalidate as necessary as part of the boot process.

Thanks,
Mark.
Geoff Levand Dec. 2, 2015, 9:08 p.m. UTC | #6
Hi Mark,

On Tue, 2015-12-01 at 19:03 +0000, Mark Rutland wrote:
> You can validly perform maintenance while the cache may allocate for a
> region of memory, it's just that afterwards the cache may hold a clean
> entries for that region.
> 
> You can clean/clean+invalidate to push data to the PoC, or you can
> invalidate/clean+invalidate to ensure that no asynchronous writebacks
> occur later (so long as you do not make a cacheable write to said
> cacheline).
> 
> The only thing that you cannot guarantee is that there is not some clean
> cacheline allocated for a region of memory to which cacheable accesses
> may be performed.
> 
> Note that the kernel only requires its Image to be clean to the PoC. So
> long as this is true, we know that there will not be asynchrnoous
> writebacks, and can invalidate as necessary as part of the boot process.

In v10, which worked for Mustang and Qualcomm, we had:

  clean+invalidate to PoC all source pages
  disable d-cache
  loop {
    invalidate to PoC destination page
    copy page source->destination
  }
  enter new image

In v11 I changed this, and it did not work for those platforms:
 
  clean+invalidate to PoC all source pages
  clean+invalidate to PoC all destination pages
  disable d-cache
  loop {
    copy page source->destination
  }
  enter new image

Based on your comments above I would think both should work OK.

-Geoff
Geoff Levand Dec. 2, 2015, 10:49 p.m. UTC | #7
Azriel and Pratyush,

On Tue, 2015-12-01 at 11:32 -0700, Azriel Samson wrote:
> I tested with v11 on our platform and I too needed the invalidation
> fix.

I pushed out a kexec-v12.1 branch to my linux-kexec repo [1] with
fixes.  Could you test it with both kexec reboot and kdump?  For
kdump, use something like:

  CMDLINE += 'crashkernel=64M@2240M'

then

  # kexec -d --load-panic /boot/vmlinux.strip --append="1 mem=64M maxcpus=1 reset_devices console=... root=..."
  # echo c > /proc/sysrq-trigger

We need to test both reboot and kdump since the the way the kexec is
done is different for the two.

[1]  https://git.kernel.org/cgit/linux/kernel/git/geoff/linux-kexec.git
-Geoff
Azriel Samson Dec. 3, 2015, 4:37 a.m. UTC | #8
Hi Geoff,

> I pushed out a kexec-v12.1 branch to my linux-kexec repo [1] with fixes.
Could you test it with both kexec reboot and kdump?  For kdump, use
something like:
 > CMDLINE += 'crashkernel=64M@2240M'

For kdump, I also require a version of the patch "arm64/kexec: Add support
for kexec-lite". Without this, the dump capture kernel cannot find the
device tree even on v11 that I tested.

Thanks,
Azriel Samson
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, 
a Linux Foundation Collaborative Project
Pratyush Anand Dec. 3, 2015, 6:09 a.m. UTC | #9
Hi Geoff,

On 02/12/2015:02:49:22 PM, Geoff Levand wrote:
> Azriel and Pratyush,
> 
> On Tue, 2015-12-01 at 11:32 -0700, Azriel Samson wrote:
> > I tested with v11 on our platform and I too needed the invalidation
> > fix.
> 
> I pushed out a kexec-v12.1 branch to my linux-kexec repo [1] with
> fixes.  Could you test it with both kexec reboot and kdump?  For
> kdump, use something like:

I tested kexec-v12.1 with mustang and it works for both kexec and kdump.

There had been few updates with Akashi, which are not there in kexec-v12.1.
Please take his updates and merge them in v13.

~Pratyush
Mark Rutland Dec. 3, 2015, 4:06 p.m. UTC | #10
On Wed, Dec 02, 2015 at 01:08:33PM -0800, Geoff Levand wrote:
> Hi Mark,
> 
> On Tue, 2015-12-01 at 19:03 +0000, Mark Rutland wrote:
> > You can validly perform maintenance while the cache may allocate for a
> > region of memory, it's just that afterwards the cache may hold a clean
> > entries for that region.
> > 
> > You can clean/clean+invalidate to push data to the PoC, or you can
> > invalidate/clean+invalidate to ensure that no asynchronous writebacks
> > occur later (so long as you do not make a cacheable write to said
> > cacheline).
> > 
> > The only thing that you cannot guarantee is that there is not some clean
> > cacheline allocated for a region of memory to which cacheable accesses
> > may be performed.
> > 
> > Note that the kernel only requires its Image to be clean to the PoC. So
> > long as this is true, we know that there will not be asynchrnoous
> > writebacks, and can invalidate as necessary as part of the boot process.

I've realised that my wording here is somewhat confusing.

When I say "clean to the PoC", I mean that for a given PA the cache can
either:
* Not hold an entry (i.e. be invalid).
* Hold a clean entry, where the data is identical to that at the PoC.

> In v10, which worked for Mustang and Qualcomm, we had:
> 
>   clean+invalidate to PoC all source pages
>   disable d-cache
>   loop {
>     invalidate to PoC destination page
>     copy page source->destination
>   }
>   enter new image
> 
> In v11 I changed this, and it did not work for those platforms:
>  
>   clean+invalidate to PoC all source pages
>   clean+invalidate to PoC all destination pages
>   disable d-cache
>   loop {
>     copy page source->destination
>   }
>   enter new image
> 
> Based on your comments above I would think both should work OK.

No.

In the latter case, clean lines can be allocated before cacheable data
accesses are inhibited. So stale clean lines can shadow the data copied
via non-cacheable accesses.

While the cache is clean, it isn't clean to the PoC.

Thanks,
Mark.
Geoff Levand Dec. 3, 2015, 7:56 p.m. UTC | #11
Hi,

On Wed, 2015-12-02 at 21:37 -0700, Azriel Samson wrote:
> I pushed out a kexec-v12.1 branch to my linux-kexec repo [1] with fixes.
> Could you test it with both kexec reboot and kdump?  For kdump, use
> something like:
>  > CMDLINE += 'crashkernel=64M@2240M'
> 
> For kdump, I also require a version of the patch "arm64/kexec: Add support
> for kexec-lite". Without this, the dump capture kernel cannot find the
> device tree even on v11 that I tested.

Without that patch, does the 2nd stage kernel get the correct
dtb address in register x0?

Is the problem that the dtb data is corrupt when in the 2nd
stage kernel?

Is the dtb data OK when in the 1st stage kernel? 

-Geoff
Azriel Samson Dec. 4, 2015, 12:39 a.m. UTC | #12
Hi,

On 12/3/2015 12:56 PM, Geoff Levand wrote:
> Hi,
>
> On Wed, 2015-12-02 at 21:37 -0700, Azriel Samson wrote:
>> For kdump, I also require a version of the patch "arm64/kexec: Add support
>> for kexec-lite". Without this, the dump capture kernel cannot find the
>> device tree even on v11 that I tested.

This was a setup issue on my side. kdump with your v11 patches worked 
fine without the above patch when I used your version of kexec-tools.

In kexec-tools, I had to skip check_cpu_nodes() since we are using ACPI 
and the device tree does not contain cpu_info.

>
> Without that patch, does the 2nd stage kernel get the correct
> dtb address in register x0?
>
> Is the problem that the dtb data is corrupt when in the 2nd
> stage kernel?
>
> Is the dtb data OK when in the 1st stage kernel?
>
> -Geoff
>

Thanks,
Azriel Samson
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
Pratyush Anand Dec. 4, 2015, 3:54 a.m. UTC | #13
Hi Geoff,

On 03/12/2015:05:39:12 PM, Azriel Samson wrote:
> This was a setup issue on my side. kdump with your v11 patches worked fine
> without the above patch when I used your version of kexec-tools.
> 
> In kexec-tools, I had to skip check_cpu_nodes() since we are using ACPI and
> the device tree does not contain cpu_info.

I think, something like following should be squashed into patch "arm64: Add arm64
kexec support".
https://github.com/pratyushanand/kexec-tools/commit/85f403cabb9ae0d7a37d1656a00fb609a9525086

~Pratyush
Geoff Levand Dec. 7, 2015, 6:47 p.m. UTC | #14
On Fri, 2015-12-04 at 09:24 +0530, Pratyush Anand wrote:
> Hi Geoff,
> 
> On 03/12/2015:05:39:12 PM, Azriel Samson wrote:
> > This was a setup issue on my side. kdump with your v11 patches worked fine
> > without the above patch when I used your version of kexec-tools.
> > 
> > In kexec-tools, I had to skip check_cpu_nodes() since we are using ACPI and
> > the device tree does not contain cpu_info.
> 
> I think, something like following should be squashed into patch "arm64: Add arm64
> kexec support".
> https://github.com/pratyushanand/kexec-tools/commit/85f403cabb9ae0d7a37d1656a00fb609a9525086

I think it better to test for ACPI and then run
any meaningful checks than to just ignore the
check_cpu_nodes failure.

-Geoff
Will Deacon Dec. 15, 2015, 6:29 p.m. UTC | #15
On Tue, Nov 24, 2015 at 10:25:34PM +0000, Geoff Levand wrote:
> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to the
> arm64 architecture that add support for the kexec re-boot mechanism
> (CONFIG_KEXEC) on arm64 platforms.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  arch/arm64/Kconfig                  |  10 +++
>  arch/arm64/include/asm/kexec.h      |  48 ++++++++++++
>  arch/arm64/kernel/Makefile          |   2 +
>  arch/arm64/kernel/machine_kexec.c   | 152 ++++++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/relocate_kernel.S | 131 +++++++++++++++++++++++++++++++
>  include/uapi/linux/kexec.h          |   1 +
>  6 files changed, 344 insertions(+)
>  create mode 100644 arch/arm64/include/asm/kexec.h
>  create mode 100644 arch/arm64/kernel/machine_kexec.c
>  create mode 100644 arch/arm64/kernel/relocate_kernel.S
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 9ac16a4..c23fd77 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -535,6 +535,16 @@ config SECCOMP
>  	  and the task is only allowed to execute a few safe syscalls
>  	  defined by each seccomp mode.
>  
> +config KEXEC
> +	depends on PM_SLEEP_SMP
> +	select KEXEC_CORE
> +	bool "kexec system call"
> +	---help---
> +	  kexec is a system call that implements the ability to shutdown your
> +	  current kernel, and to start another kernel.  It is like a reboot
> +	  but it is independent of the system firmware.   And like a reboot
> +	  you can start any kernel with it, not just Linux.
> +
>  config XEN_DOM0
>  	def_bool y
>  	depends on XEN
> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
> new file mode 100644
> index 0000000..46d63cd
> --- /dev/null
> +++ b/arch/arm64/include/asm/kexec.h
> @@ -0,0 +1,48 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + * Copyright (C) Huawei Futurewei Technologies.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#if !defined(_ARM64_KEXEC_H)
> +#define _ARM64_KEXEC_H

Please keep to the style used elsewhere in the arch headers.

> +
> +/* Maximum physical address we can use pages from */
> +
> +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
> +
> +/* Maximum address we can reach in physical address mode */
> +
> +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
> +
> +/* Maximum address we can use for the control code buffer */
> +
> +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
> +
> +#define KEXEC_CONTROL_PAGE_SIZE	4096

Does this work on kernels configured with 64k pages? It looks like the
kexec core code will end up using order-0 pages, so I worry that we'll
actually put down 64k and potentially confuse a 4k crash kernel, for
example.

> +#define KEXEC_ARCH KEXEC_ARCH_ARM64
> +
> +#if !defined(__ASSEMBLY__)

#ifndef

> diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
> new file mode 100644
> index 0000000..8b990b8
> --- /dev/null
> +++ b/arch/arm64/kernel/machine_kexec.c
> @@ -0,0 +1,152 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + * Copyright (C) Huawei Futurewei Technologies.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kexec.h>
> +#include <linux/of_fdt.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +
> +#include <asm/cacheflush.h>
> +#include <asm/system_misc.h>
> +
> +#include "cpu-reset.h"
> +
> +/* Global variables for the arm64_relocate_new_kernel routine. */
> +extern const unsigned char arm64_relocate_new_kernel[];
> +extern const unsigned long arm64_relocate_new_kernel_size;
> +
> +static unsigned long kimage_start;
> +
> +void machine_kexec_cleanup(struct kimage *kimage)
> +{
> +	/* Empty routine needed to avoid build errors. */
> +}
> +
> +/**
> + * machine_kexec_prepare - Prepare for a kexec reboot.
> + *
> + * Called from the core kexec code when a kernel image is loaded.
> + */
> +int machine_kexec_prepare(struct kimage *kimage)
> +{
> +	kimage_start = kimage->start;
> +	return 0;
> +}
> +
> +/**
> + * kexec_list_flush - Helper to flush the kimage list to PoC.
> + */
> +static void kexec_list_flush(unsigned long kimage_head)
> +{
> +	unsigned long *entry;
> +
> +	for (entry = &kimage_head; ; entry++) {
> +		unsigned int flag = *entry & IND_FLAGS;
> +		void *addr = phys_to_virt(*entry & PAGE_MASK);
> +
> +		switch (flag) {
> +		case IND_INDIRECTION:
> +			entry = (unsigned long *)addr - 1;
> +			__flush_dcache_area(addr, PAGE_SIZE);
> +			break;
> +		case IND_DESTINATION:
> +			break;
> +		case IND_SOURCE:
> +			__flush_dcache_area(addr, PAGE_SIZE);
> +			break;
> +		case IND_DONE:
> +			return;
> +		default:
> +			BUG();
> +		}
> +	}
> +}
> +
> +/**
> + * kexec_segment_flush - Helper to flush the kimage segments to PoC.
> + */
> +static void kexec_segment_flush(const struct kimage *kimage)
> +{
> +	unsigned long i;
> +
> +	pr_devel("%s:\n", __func__);
> +
> +	for (i = 0; i < kimage->nr_segments; i++) {
> +		pr_devel("  segment[%lu]: %016lx - %016lx, %lx bytes, %lu pages\n",
> +			i,
> +			kimage->segment[i].mem,
> +			kimage->segment[i].mem + kimage->segment[i].memsz,
> +			kimage->segment[i].memsz,
> +			kimage->segment[i].memsz /  PAGE_SIZE);
> +
> +		__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
> +			kimage->segment[i].memsz);
> +	}
> +}
> +
> +/**
> + * machine_kexec - Do the kexec reboot.
> + *
> + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
> + */
> +void machine_kexec(struct kimage *kimage)
> +{
> +	phys_addr_t reboot_code_buffer_phys;
> +	void *reboot_code_buffer;
> +
> +	BUG_ON(num_online_cpus() > 1);
> +
> +	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
> +	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
> +
> +	/*
> +	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
> +	 * after the kernel is shut down.
> +	 */
> +	memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
> +		arm64_relocate_new_kernel_size);

At which point does the I-cache get invalidated for this?

> +
> +	/* Flush the reboot_code_buffer in preparation for its execution. */
> +	__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
> +
> +	/* Flush the new image. */
> +	kexec_segment_flush(kimage);
> +
> +	/* Flush the kimage list. */
> +	kexec_list_flush(kimage->head);
> +
> +	pr_info("Bye!\n");
> +
> +	/* Disable all DAIF exceptions. */
> +	asm volatile ("msr daifset, #0xf" : : : "memory");

Can we not use our helpers for this?

> +
> +	setup_mm_for_reboot();
> +
> +	/*
> +	 * cpu_soft_restart will shutdown the MMU, disable data caches, then
> +	 * transfer control to the reboot_code_buffer which contains a copy of
> +	 * the arm64_relocate_new_kernel routine.  arm64_relocate_new_kernel
> +	 * uses physical addressing to relocate the new image to its final
> +	 * position and transfers control to the image entry point when the
> +	 * relocation is complete.
> +	 */
> +
> +	cpu_soft_restart(virt_to_phys(cpu_reset),
> +		is_hyp_mode_available(),
> +		reboot_code_buffer_phys, kimage->head, kimage_start);
> +
> +	BUG(); /* Should never get here. */
> +}
> +
> +void machine_crash_shutdown(struct pt_regs *regs)
> +{
> +	/* Empty routine needed to avoid build errors. */
> +}
> diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
> new file mode 100644
> index 0000000..71cab0e
> --- /dev/null
> +++ b/arch/arm64/kernel/relocate_kernel.S
> @@ -0,0 +1,131 @@
> +/*
> + * kexec for arm64
> + *
> + * Copyright (C) Linaro.
> + * Copyright (C) Huawei Futurewei Technologies.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kexec.h>
> +
> +#include <asm/assembler.h>
> +#include <asm/kexec.h>
> +#include <asm/kvm_arm.h>
> +#include <asm/page.h>
> +
> +/*
> + * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
> + *
> + * The memory that the old kernel occupies may be overwritten when coping the
> + * new image to its final location.  To assure that the
> + * arm64_relocate_new_kernel routine which does that copy is not overwritten,
> + * all code and data needed by arm64_relocate_new_kernel must be between the
> + * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
> + * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
> + * control_code_page, a special page which has been set up to be preserved
> + * during the copy operation.
> + */
> +.globl arm64_relocate_new_kernel
> +arm64_relocate_new_kernel:
> +
> +	/* Setup the list loop variables. */
> +	mov	x18, x0				/* x18 = kimage_head */
> +	mov	x17, x1				/* x17 = kimage_start */
> +	dcache_line_size x16, x0		/* x16 = dcache line size */

Why is this needed?

> +	mov	x15, xzr			/* x15 = segment start */
> +	mov	x14, xzr			/* x14 = entry ptr */
> +	mov	x13, xzr			/* x13 = copy dest */
> +
> +	/* Clear the sctlr_el2 flags. */
> +	mrs	x0, CurrentEL
> +	cmp	x0, #CurrentEL_EL2
> +	b.ne	1f
> +	mrs	x0, sctlr_el2
> +	ldr	x1, =SCTLR_EL2_FLAGS

If we're using literal pools, we probably want a .ltorg directive somewhere.

> +	bic	x0, x0, x1
> +	msr	sctlr_el2, x0
> +	isb
> +1:
> +
> +	/* Check if the new image needs relocation. */
> +	cbz	x18, .Ldone
> +	tbnz	x18, IND_DONE_BIT, .Ldone
> +
> +.Lloop:
> +	and	x12, x18, PAGE_MASK		/* x12 = addr */
> +
> +	/* Test the entry flags. */
> +.Ltest_source:
> +	tbz	x18, IND_SOURCE_BIT, .Ltest_indirection
> +
> +	mov x20, x13				/*  x20 = copy dest */
> +	mov x21, x12				/*  x21 = copy src */

Weird indentation.

> +	/* Copy page. */
> +1:	ldp	x22, x23, [x21]
> +	ldp	x24, x25, [x21, #16]
> +	ldp	x26, x27, [x21, #32]
> +	ldp	x28, x29, [x21, #48]
> +	add	x21, x21, #64
> +	stnp	x22, x23, [x20]
> +	stnp	x24, x25, [x20, #16]
> +	stnp	x26, x27, [x20, #32]
> +	stnp	x28, x29, [x20, #48]
> +	add	x20, x20, #64
> +	tst	x21, #(PAGE_SIZE - 1)
> +	b.ne	1b

We should macroise this, to save on duplication of a common routine.
You also need to address the caching issues that Mark raised separately.

> +	/* dest += PAGE_SIZE */
> +	add	x13, x13, PAGE_SIZE
> +	b	.Lnext
> +
> +.Ltest_indirection:
> +	tbz	x18, IND_INDIRECTION_BIT, .Ltest_destination
> +
> +	/* ptr = addr */
> +	mov	x14, x12
> +	b	.Lnext
> +
> +.Ltest_destination:
> +	tbz	x18, IND_DESTINATION_BIT, .Lnext
> +
> +	mov	x15, x12
> +
> +	/* dest = addr */
> +	mov	x13, x12
> +
> +.Lnext:
> +	/* entry = *ptr++ */
> +	ldr	x18, [x14], #8
> +
> +	/* while (!(entry & DONE)) */
> +	tbz	x18, IND_DONE_BIT, .Lloop
> +
> +.Ldone:
> +	dsb	sy
> +	ic	ialluis

I don't think this needs to be inner-shareable, and these dsbs can probably
be non-shareable too.

> +	dsb	sy
> +	isb
> +
> +	/* Start new image. */
> +	mov	x0, xzr
> +	mov	x1, xzr
> +	mov	x2, xzr
> +	mov	x3, xzr
> +	br	x17
> +
> +.align 3	/* To keep the 64-bit values below naturally aligned. */
> +
> +.Lcopy_end:
> +.org	KEXEC_CONTROL_PAGE_SIZE
> +
> +/*
> + * arm64_relocate_new_kernel_size - Number of bytes to copy to the
> + * control_code_page.
> + */
> +.globl arm64_relocate_new_kernel_size
> +arm64_relocate_new_kernel_size:
> +	.quad	.Lcopy_end - arm64_relocate_new_kernel
> diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
> index 99048e5..ccec467 100644
> --- a/include/uapi/linux/kexec.h
> +++ b/include/uapi/linux/kexec.h
> @@ -39,6 +39,7 @@
>  #define KEXEC_ARCH_SH      (42 << 16)
>  #define KEXEC_ARCH_MIPS_LE (10 << 16)
>  #define KEXEC_ARCH_MIPS    ( 8 << 16)
> +#define KEXEC_ARCH_ARM64   (183 << 16)

This should probably be called KEXEC_ARCH_AARCH64 for consistency with
the ELF machine name.

Will
Geoff Levand Dec. 16, 2015, 12:14 a.m. UTC | #16
Hi Will,

I'll post a v12.4 of this patch that addresses your comments.

On Tue, 2015-12-15 at 18:29 +0000, Will Deacon wrote:
> +#if !defined(_ARM64_KEXEC_H)
> > +#define _ARM64_KEXEC_H
> 
> Please keep to the style used elsewhere in the arch headers.

OK.

> > +
> > +#define KEXEC_CONTROL_PAGE_SIZE> > 	> > 4096
> 
> Does this work on kernels configured with 64k pages? It looks like the
> kexec core code will end up using order-0 pages, so I worry that we'll
> actually put down 64k and potentially confuse a 4k crash kernel, for
> example.

KEXEC_CONTROL_PAGE_SIZE just tells the core kexec code how big
we need control_code_buffer to be.  That buffer is only used by
the arch code of the first stage kernel.  With 64k pages the buffer
will be a full page, but we'll only use the first 4k of it. 

> > +#define KEXEC_ARCH KEXEC_ARCH_ARM64
> > +
> > +#if !defined(__ASSEMBLY__)
> 
> #ifndef

OK.

> > + * machine_kexec - Do the kexec reboot.
> > + *
> > + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
> > + */
> > +void machine_kexec(struct kimage *kimage)
> > +{
> > +> > 	> > phys_addr_t reboot_code_buffer_phys;
> > +> > 	> > void *reboot_code_buffer;
> > +
> > +> > 	> > BUG_ON(num_online_cpus() > 1);
> > +
> > +> > 	> > reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
> > +> > 	> > reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
> > +
> > +> > 	> > /*
> > +> > 	> >  * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
> > +> > 	> >  * after the kernel is shut down.
> > +> > 	> >  */
> > +> > 	> > memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
> > +> > 	> > 	> > arm64_relocate_new_kernel_size);
> 
> At which point does the I-cache get invalidated for this?

I'll add a call to flush_icache_range() for reboot_code_buffer.  I
think that should do it.

> > +
> > +> > 	> > /* Flush the reboot_code_buffer in preparation for its execution. */
> > +> > 	> > __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
> > +
> > +> > 	> > /* Flush the new image. */
> > +> > 	> > kexec_segment_flush(kimage);
> > +
> > +> > 	> > /* Flush the kimage list. */
> > +> > 	> > kexec_list_flush(kimage->head);
> > +
> > +> > 	> > pr_info("Bye!\n");
> > +
> > +> > 	> > /* Disable all DAIF exceptions. */
> > +> > 	> > asm volatile ("msr > > daifset> > , #0xf" : : :
> > "memory");
> 
> Can we not use our helpers for this?

Mark Rutland had commented that calling daifset four times
through the different macros took considerable time, and
recommended a single call here.

Would you prefer a new macro for irqflags.h, maybe

  #define local_daif_disable() asm("msr daifset, #0xf" : : : "memory")?

> > +/*
> > + * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
> > + *
> > + * The memory that the old kernel occupies may be overwritten when coping the
> > + * new image to its final location.  To assure that the
> > + * arm64_relocate_new_kernel routine which does that copy is not overwritten,
> > + * all code and data needed by arm64_relocate_new_kernel must be between the
> > + * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
> > + * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
> > + * control_code_page, a special page which has been set up to be preserved
> > + * during the copy operation.
> > + */
> > +.globl arm64_relocate_new_kernel
> > +arm64_relocate_new_kernel:
> > +
> > +> > 	> > /* Setup the list loop variables. */
> > +> > 	> > mov> > 	> > x18, x0> > 	> > 	> > 	> > 	> > /* x18 = kimage_head */
> > +> > 	> > mov> > 	> > x17, x1> > 	> > 	> > 	> > 	> > /* x17 = kimage_start */
> > +> > 	> > dcache_line_size x16, x0> > 	> > 	> > /* x16 = dcache line size */
> 
> Why is this needed?

This was left over from the previous version where we
invalidated pages while copying them.  I've since added
that invalidate back, so this is again needed.

> 
> > +> > 	> > mov> > 	> > x15, xzr> > 	> > 	> > 	> > /* x15 = segment start */
> > +> > 	> > mov> > 	> > x14, xzr> > 	> > 	> > 	> > /* x14 = entry ptr */
> > +> > 	> > mov> > 	> > x13, xzr> > 	> > 	> > 	> > /* x13 = copy dest */
> > +
> > +> > 	> > /* Clear the sctlr_el2 flags. */
> > +> > 	> > mrs> > 	> > x0, CurrentEL
> > +> > 	> > cmp> > 	> > x0, #CurrentEL_EL2
> > +> > 	> > b.ne> > 	> > 1f
> > +> > 	> > mrs> > 	> > x0, sctlr_el2
> > +> > 	> > ldr> > 	> > x1, =SCTLR_EL2_FLAGS
> 
> If we're using literal pools, we probably want a .ltorg directive somewhere.

I've added one in at the end of the arm64_relocate_new_kernel
code.

> > +> > 	> > bic> > 	> > x0, x0, x1
> > +> > 	> > msr> > 	> > sctlr_el2, x0
> > +> > 	> > isb
> > +1:
> > +
> > +> > 	> > /* Check if the new image needs relocation. */
> > +> > 	> > cbz> > 	> > x18, .Ldone
> > +> > 	> > tbnz> > 	> > x18, IND_DONE_BIT, .Ldone
> > +
> > +.Lloop:
> > +> > 	> > and> > 	> > x12, x18, PAGE_MASK> > 	> > 	> > /* x12 = addr */
> > +
> > +> > 	> > /* Test the entry flags. */
> > +.Ltest_source:
> > +> > 	> > tbz> > 	> > x18, IND_SOURCE_BIT, .Ltest_indirection
> > +
> > +> > 	> > mov x20, x13> > 	> > 	> > 	> > 	> > /*  x20 = copy dest */
> > +> > 	> > mov x21, x12> > 	> > 	> > 	> > 	> > /*  x21 = copy src */
> 
> Weird indentation.

Fixed.

> > +> > 	> > /* Copy page. */
> > +1:> > 	> > ldp> > 	> > x22, x23, [x21]
> > +> > 	> > ldp> > 	> > x24, x25, [x21, #16]
> > +> > 	> > ldp> > 	> > x26, x27, [x21, #32]
> > +> > 	> > ldp> > 	> > x28, x29, [x21, #48]
> > +> > 	> > add> > 	> > x21, x21, #64
> > +> > 	> > stnp> > 	> > x22, x23, [x20]
> > +> > 	> > stnp> > 	> > x24, x25, [x20, #16]
> > +> > 	> > stnp> > 	> > x26, x27, [x20, #32]
> > +> > 	> > stnp> > 	> > x28, x29, [x20, #48]
> > +> > 	> > add> > 	> > x20, x20, #64
> > +> > 	> > tst> > 	> > x21, #(PAGE_SIZE - 1)
> > +> > 	> > b.ne> > 	> > 1b
> 
> We should macroise this, to save on duplication of a common routine.

So something like this in assembler.h?

+/*
+ * copy_page - copy src to dest using temp registers t1-t8
+ */
+	.macro copy_page dest:req src:req t1:req t2:req t3:req t4:req t5:req t6:req t7:req t8:req
+1:	ldp	/t1, /t2, [/src]
+	ldp	/t3, /t4, [/src, #16]
+	ldp	/t5, /t6, [/src, #32]
+	ldp	/t7, /t8, [/src, #48]
+	add	/src, /src, #64
+	stnp	/t1, /t2, [/dest]
+	stnp	/t3, /t4, [/dest, #16]
+	stnp	/t5, /t6, [/dest, #32]
+	stnp	/t7, /t8, [/dest, #48]
+	add	/dest, /dest, #64
+	tst	/src, #(PAGE_SIZE - 1)
+	b.ne	1b
+	.endm

> You also need to address the caching issues that Mark raised separately.

Cache maintenance has been fixed (reintroduced) in the current code.

> 
> > +> > 	> > /* dest += PAGE_SIZE */
> > +> > 	> > add> > 	> > x13, x13, PAGE_SIZE
> > +> > 	> > b> > 	> > .Lnext
> > +
> > +.Ltest_indirection:
> > +> > 	> > tbz> > 	> > x18, IND_INDIRECTION_BIT, .Ltest_destination
> > +
> > +> > 	> > /* ptr = addr */
> > +> > 	> > mov> > 	> > x14, x12
> > +> > 	> > b> > 	> > .Lnext
> > +
> > +.Ltest_destination:
> > +> > 	> > tbz> > 	> > x18, IND_DESTINATION_BIT, .Lnext
> > +
> > +> > 	> > mov> > 	> > x15, x12
> > +
> > +> > 	> > /* dest = addr */
> > +> > 	> > mov> > 	> > x13, x12
> > +
> > +.Lnext:
> > +> > 	> > /* entry = *ptr++ */
> > +> > 	> > ldr> > 	> > x18, [x14], #8
> > +
> > +> > 	> > /* while (!(entry & DONE)) */
> > +> > 	> > tbz> > 	> > x18, IND_DONE_BIT, .Lloop
> > +
> > +.Ldone:
> > +> > 	> > dsb> > 	> > sy
> > +> > 	> > ic> > 	> > ialluis
> 
> I don't think this needs to be inner-shareable, and these dsbs can probably
> be non-shareable too.

OK.

> > +> > 	> > dsb> > 	> > sy
> > +> > 	> > isb
> > +
> > +> > 	> > /* Start new image. */
> > +> > 	> > mov> > 	> > x0, xzr
> > +> > 	> > mov> > 	> > x1, xzr
> > +> > 	> > mov> > 	> > x2, xzr
> > +> > 	> > mov> > 	> > x3, xzr
> > +> > 	> > br> > 	> > x17
> > +
> > +.align 3> > 	> > /* To keep the 64-bit values below naturally aligned. */
> > +
> > +.Lcopy_end:
> > +.org> > 	> > KEXEC_CONTROL_PAGE_SIZE
> > +
> > +/*
> > + * arm64_relocate_new_kernel_size - Number of bytes to copy to the
> > + * control_code_page.
> > + */
> > +.globl arm64_relocate_new_kernel_size
> > +arm64_relocate_new_kernel_size:
> > +> > 	> > .quad> > 	> > .Lcopy_end - arm64_relocate_new_kernel
> > diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
> > index 99048e5..ccec467 100644
> > --- a/include/uapi/linux/kexec.h
> > +++ b/include/uapi/linux/kexec.h
> > @@ -39,6 +39,7 @@
> >  #define KEXEC_ARCH_SH      (42 << 16)
> >  #define KEXEC_ARCH_MIPS_LE (10 << 16)
> >  #define KEXEC_ARCH_MIPS    ( 8 << 16)
> > +#define KEXEC_ARCH_ARM64   (183 << 16)
> 
> This should probably be called KEXEC_ARCH_AARCH64 for consistency with
> the ELF machine name.

OK.

-Geoff
Pratyush Anand Dec. 16, 2015, 7:18 a.m. UTC | #17
On 15/12/2015:04:14:30 PM, Geoff Levand wrote:
> On Tue, 2015-12-15 at 18:29 +0000, Will Deacon wrote:
> > > +> > 	> >  * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
> > > +> > 	> >  * after the kernel is shut down.
> > > +> > 	> >  */
> > > +> > 	> > memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
> > > +> > 	> > 	> > arm64_relocate_new_kernel_size);
> > 
> > At which point does the I-cache get invalidated for this?
> 
> I'll add a call to flush_icache_range() for reboot_code_buffer.  I
> think that should do it.

We execute arm64_relocate_new_kernel() code with I-cache disabled. So, do we
really need to invalidate I-cache?

~Pratyush
James Morse Dec. 16, 2015, 9:30 a.m. UTC | #18
On 16/12/15 07:18, Pratyush Anand wrote:
> On 15/12/2015:04:14:30 PM, Geoff Levand wrote:
>> On Tue, 2015-12-15 at 18:29 +0000, Will Deacon wrote:
>>>> +> > 	> >  * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
>>>> +> > 	> >  * after the kernel is shut down.
>>>> +> > 	> >  */
>>>> +> > 	> > memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
>>>> +> > 	> > 	> > arm64_relocate_new_kernel_size);
>>>
>>> At which point does the I-cache get invalidated for this?
>>
>> I'll add a call to flush_icache_range() for reboot_code_buffer.  I
>> think that should do it.
> 
> We execute arm64_relocate_new_kernel() code with I-cache disabled. So, do we
> really need to invalidate I-cache?

I got bitten by this, see Mark's earlier reply[0]:

Mark Rutland wrote:
> The SCTLR_ELx.I only affects the attributes that the I-cache uses to
> fetch with, not whether it is enabled (it cannot be disabled
> architecturally).



James


[0]
http://lists.infradead.org/pipermail/linux-arm-kernel/2015-October/382101.html
Pratyush Anand Dec. 16, 2015, 10:32 a.m. UTC | #19
On 16/12/2015:09:30:34 AM, James Morse wrote:
> On 16/12/15 07:18, Pratyush Anand wrote:
> > We execute arm64_relocate_new_kernel() code with I-cache disabled. So, do we
> > really need to invalidate I-cache?
> 
> I got bitten by this, see Mark's earlier reply[0]:
> 
> Mark Rutland wrote:
> > The SCTLR_ELx.I only affects the attributes that the I-cache uses to
> > fetch with, not whether it is enabled (it cannot be disabled
> > architecturally).

Thanks James for pointing to it. I had missed that.

~Pratyush
diff mbox

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9ac16a4..c23fd77 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -535,6 +535,16 @@  config SECCOMP
 	  and the task is only allowed to execute a few safe syscalls
 	  defined by each seccomp mode.
 
+config KEXEC
+	depends on PM_SLEEP_SMP
+	select KEXEC_CORE
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 0000000..46d63cd
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,48 @@ 
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(_ARM64_KEXEC_H)
+#define _ARM64_KEXEC_H
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE	4096
+
+#define KEXEC_ARCH KEXEC_ARCH_ARM64
+
+#if !defined(__ASSEMBLY__)
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+#endif /* !defined(__ASSEMBLY__) */
+
+#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 474691f..f68420d 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -41,6 +41,8 @@  arm64-obj-$(CONFIG_EFI)			+= efi.o efi-entry.stub.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
+arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o	\
+					   cpu-reset.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 0000000..8b990b8
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,152 @@ 
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/system_misc.h>
+
+#include "cpu-reset.h"
+
+/* Global variables for the arm64_relocate_new_kernel routine. */
+extern const unsigned char arm64_relocate_new_kernel[];
+extern const unsigned long arm64_relocate_new_kernel_size;
+
+static unsigned long kimage_start;
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+	/* Empty routine needed to avoid build errors. */
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ */
+int machine_kexec_prepare(struct kimage *kimage)
+{
+	kimage_start = kimage->start;
+	return 0;
+}
+
+/**
+ * kexec_list_flush - Helper to flush the kimage list to PoC.
+ */
+static void kexec_list_flush(unsigned long kimage_head)
+{
+	unsigned long *entry;
+
+	for (entry = &kimage_head; ; entry++) {
+		unsigned int flag = *entry & IND_FLAGS;
+		void *addr = phys_to_virt(*entry & PAGE_MASK);
+
+		switch (flag) {
+		case IND_INDIRECTION:
+			entry = (unsigned long *)addr - 1;
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DESTINATION:
+			break;
+		case IND_SOURCE:
+			__flush_dcache_area(addr, PAGE_SIZE);
+			break;
+		case IND_DONE:
+			return;
+		default:
+			BUG();
+		}
+	}
+}
+
+/**
+ * kexec_segment_flush - Helper to flush the kimage segments to PoC.
+ */
+static void kexec_segment_flush(const struct kimage *kimage)
+{
+	unsigned long i;
+
+	pr_devel("%s:\n", __func__);
+
+	for (i = 0; i < kimage->nr_segments; i++) {
+		pr_devel("  segment[%lu]: %016lx - %016lx, %lx bytes, %lu pages\n",
+			i,
+			kimage->segment[i].mem,
+			kimage->segment[i].mem + kimage->segment[i].memsz,
+			kimage->segment[i].memsz,
+			kimage->segment[i].memsz /  PAGE_SIZE);
+
+		__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
+			kimage->segment[i].memsz);
+	}
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+void machine_kexec(struct kimage *kimage)
+{
+	phys_addr_t reboot_code_buffer_phys;
+	void *reboot_code_buffer;
+
+	BUG_ON(num_online_cpus() > 1);
+
+	reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
+	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
+
+	/*
+	 * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
+	 * after the kernel is shut down.
+	 */
+	memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
+		arm64_relocate_new_kernel_size);
+
+	/* Flush the reboot_code_buffer in preparation for its execution. */
+	__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
+
+	/* Flush the new image. */
+	kexec_segment_flush(kimage);
+
+	/* Flush the kimage list. */
+	kexec_list_flush(kimage->head);
+
+	pr_info("Bye!\n");
+
+	/* Disable all DAIF exceptions. */
+	asm volatile ("msr daifset, #0xf" : : : "memory");
+
+	setup_mm_for_reboot();
+
+	/*
+	 * cpu_soft_restart will shutdown the MMU, disable data caches, then
+	 * transfer control to the reboot_code_buffer which contains a copy of
+	 * the arm64_relocate_new_kernel routine.  arm64_relocate_new_kernel
+	 * uses physical addressing to relocate the new image to its final
+	 * position and transfers control to the image entry point when the
+	 * relocation is complete.
+	 */
+
+	cpu_soft_restart(virt_to_phys(cpu_reset),
+		is_hyp_mode_available(),
+		reboot_code_buffer_phys, kimage->head, kimage_start);
+
+	BUG(); /* Should never get here. */
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	/* Empty routine needed to avoid build errors. */
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 0000000..71cab0e
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,131 @@ 
+/*
+ * kexec for arm64
+ *
+ * Copyright (C) Linaro.
+ * Copyright (C) Huawei Futurewei Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kexec.h>
+
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+#include <asm/kvm_arm.h>
+#include <asm/page.h>
+
+/*
+ * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new image to its final location.  To assure that the
+ * arm64_relocate_new_kernel routine which does that copy is not overwritten,
+ * all code and data needed by arm64_relocate_new_kernel must be between the
+ * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end.  The
+ * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec
+ * control_code_page, a special page which has been set up to be preserved
+ * during the copy operation.
+ */
+.globl arm64_relocate_new_kernel
+arm64_relocate_new_kernel:
+
+	/* Setup the list loop variables. */
+	mov	x18, x0				/* x18 = kimage_head */
+	mov	x17, x1				/* x17 = kimage_start */
+	dcache_line_size x16, x0		/* x16 = dcache line size */
+	mov	x15, xzr			/* x15 = segment start */
+	mov	x14, xzr			/* x14 = entry ptr */
+	mov	x13, xzr			/* x13 = copy dest */
+
+	/* Clear the sctlr_el2 flags. */
+	mrs	x0, CurrentEL
+	cmp	x0, #CurrentEL_EL2
+	b.ne	1f
+	mrs	x0, sctlr_el2
+	ldr	x1, =SCTLR_EL2_FLAGS
+	bic	x0, x0, x1
+	msr	sctlr_el2, x0
+	isb
+1:
+
+	/* Check if the new image needs relocation. */
+	cbz	x18, .Ldone
+	tbnz	x18, IND_DONE_BIT, .Ldone
+
+.Lloop:
+	and	x12, x18, PAGE_MASK		/* x12 = addr */
+
+	/* Test the entry flags. */
+.Ltest_source:
+	tbz	x18, IND_SOURCE_BIT, .Ltest_indirection
+
+	mov x20, x13				/*  x20 = copy dest */
+	mov x21, x12				/*  x21 = copy src */
+
+	/* Copy page. */
+1:	ldp	x22, x23, [x21]
+	ldp	x24, x25, [x21, #16]
+	ldp	x26, x27, [x21, #32]
+	ldp	x28, x29, [x21, #48]
+	add	x21, x21, #64
+	stnp	x22, x23, [x20]
+	stnp	x24, x25, [x20, #16]
+	stnp	x26, x27, [x20, #32]
+	stnp	x28, x29, [x20, #48]
+	add	x20, x20, #64
+	tst	x21, #(PAGE_SIZE - 1)
+	b.ne	1b
+
+	/* dest += PAGE_SIZE */
+	add	x13, x13, PAGE_SIZE
+	b	.Lnext
+
+.Ltest_indirection:
+	tbz	x18, IND_INDIRECTION_BIT, .Ltest_destination
+
+	/* ptr = addr */
+	mov	x14, x12
+	b	.Lnext
+
+.Ltest_destination:
+	tbz	x18, IND_DESTINATION_BIT, .Lnext
+
+	mov	x15, x12
+
+	/* dest = addr */
+	mov	x13, x12
+
+.Lnext:
+	/* entry = *ptr++ */
+	ldr	x18, [x14], #8
+
+	/* while (!(entry & DONE)) */
+	tbz	x18, IND_DONE_BIT, .Lloop
+
+.Ldone:
+	dsb	sy
+	ic	ialluis
+	dsb	sy
+	isb
+
+	/* Start new image. */
+	mov	x0, xzr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+	br	x17
+
+.align 3	/* To keep the 64-bit values below naturally aligned. */
+
+.Lcopy_end:
+.org	KEXEC_CONTROL_PAGE_SIZE
+
+/*
+ * arm64_relocate_new_kernel_size - Number of bytes to copy to the
+ * control_code_page.
+ */
+.globl arm64_relocate_new_kernel_size
+arm64_relocate_new_kernel_size:
+	.quad	.Lcopy_end - arm64_relocate_new_kernel
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 99048e5..ccec467 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -39,6 +39,7 @@ 
 #define KEXEC_ARCH_SH      (42 << 16)
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
+#define KEXEC_ARCH_ARM64   (183 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16