diff mbox

[v2,02/31] arm64: Kernel booting and initialisation

Message ID 1344966752-16102-3-git-send-email-catalin.marinas@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Catalin Marinas Aug. 14, 2012, 5:52 p.m. UTC
The patch adds the kernel booting and the initial setup code.
Documentation/arm64/booting.txt describes the booting protocol on the
AArch64 Linux kernel. This is subject to change following the work on
boot standardisation, ACPI.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/booting.txt |  141 +++++++++++
 arch/arm64/include/asm/setup.h  |   26 ++
 arch/arm64/kernel/head.S        |  521 +++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/setup.c       |  357 +++++++++++++++++++++++++++
 4 files changed, 1045 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/arm64/booting.txt
 create mode 100644 arch/arm64/include/asm/setup.h
 create mode 100644 arch/arm64/kernel/head.S
 create mode 100644 arch/arm64/kernel/setup.c

Comments

Olof Johansson Aug. 14, 2012, 11:06 p.m. UTC | #1
Hi,


On Tue, Aug 14, 2012 at 06:52:03PM +0100, Catalin Marinas wrote:

> +Before jumping into the kernel, the following conditions must be met:
> +
> +- Quiesce all DMA capable devices so that memory does not get
> +  corrupted by bogus network packets or disk data.  This will save
> +  you many hours of debug.
> +
> +- Primary CPU general-purpose register settings
> +  x0 = physical address of device tree blob (dtb) in system RAM.
> +
> +- CPU mode
> +  All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
> +  IRQ and FIQ).
> +  The CPU must be in either EL2 (RECOMMENDED in order to have access to
> +  the virtualisation extensions) or non-secure EL1.
> +
> +- Caches, MMUs
> +  The MMU must be off.
> +  Instruction cache may be on or off.
> +  Data cache must be off and invalidated.
> +
> +- Architected timers
> +  CNTFRQ must be programmed with the timer frequency.
> +  If entering the kernel at EL1, CNTHCTL_EL2 must have EL1PCTEN (bit 0)
> +  set where available.
> +
> +- Coherency
> +  All CPUs to be booted by the kernel must be part of the same coherency
> +  domain on entry to the kernel.  This may require IMPLEMENTATION DEFINED
> +  initialisation to enable the receiving of maintenance operations on
> +  each CPU.
> +
> +- System registers
> +  All writable architected system registers at the exception level where
> +  the kernel image will be entered must be initialised by software at a
> +  higher exception level to prevent execution in an UNKNOWN state.

Given the recent development of ARM platforms, you might want to mandate
the state of IOMMUs as well (they should probably be off, since there
should be no active DMA activity). Graphics would be the exception to
this, since if you want to keep scanning out a splash screen, you'll
have to keep doing DMA...

> +- The primary CPU must jump directly to the first instruction of the
> +  kernel image.  The device tree blob passed by this CPU must contain
> +  for each CPU node:
> +
> +    1. An 'enable-method' property. Currently, the only supported value
> +       for this field is the string "spin-table".
> +
> +    2. A 'cpu-release-addr' property identifying a 64-bit,
> +       zero-initialised memory location.

These would be good to have documented in the
Documentation/devicetree/bindings hierarchy as well.

> index 0000000..d766493
> --- /dev/null
> +++ b/arch/arm64/include/asm/setup.h
> @@ -0,0 +1,26 @@
> +/*
> + * Based on arch/arm/include/asm/setup.h
> + *
> + * Copyright (C) 1997-1999 Russell King
> + * Copyright (C) 2012 ARM Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +#ifndef __ASM_SETUP_H
> +#define __ASM_SETUP_H
> +
> +#include <linux/types.h>
> +
> +#define COMMAND_LINE_SIZE 1024

Probably not a huge deal, and other architectures seem to be all over
the map on this, but you might want to go with a larger value now rather
than later. 2048 or 4096 perhaps?

> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> new file mode 100644
> index 0000000..34ccdc0
> --- /dev/null
> +++ b/arch/arm64/kernel/head.S

[...]

> +/*
> + * Setup common bits before finally enabling the MMU. Essentially this is just
> + * loading the page table pointer and vector base registers.
> + *
> + * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on
> + * the MMU.
> + */
> +__enable_mmu:

ENTRY()?

> +	ldr	x5, =vectors
> +	msr	vbar_el1, x5
> +	msr	ttbr0_el1, x25			// load TTBR0
> +	msr	ttbr1_el1, x26			// load TTBR1
> +	isb
> +	b	__turn_mmu_on
> +ENDPROC(__enable_mmu)

...or just END()? Same for a few of the other functions below.

> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> new file mode 100644
> index 0000000..f25186f
> --- /dev/null
> +++ b/arch/arm64/kernel/setup.c

[...]

> +static void __init setup_processor(void)
> +{
> +	struct proc_info_list *list;
> +
> +	/*
> +	 * locate processor in the list of supported processor
> +	 * types.  The linker builds this table for us from the
> +	 * entries in arch/arm/mm/proc.S
> +	 */

Probably from arch/arm64/... somewhere?


[...]

> +	printk("CPU: %s [%08x] revision %d\n",
> +	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15);
> +
> +	sprintf(init_utsname()->machine, "aarch64");

> +	initial_boot_params = devtree;
> +	dt_root = of_get_flat_dt_root();
> +
> +	machine_name = of_get_flat_dt_prop(dt_root, "model", NULL);
> +	if (!machine_name)
> +		machine_name = of_get_flat_dt_prop(dt_root, "compatible", NULL);
> +	if (!machine_name)
> +		machine_name = "<unknown>";
> +	pr_info("Machine: %s\n", machine_name);

This property is an array of strings. It would be more valuable to print out
the entry that was matched for a platform instead of the provided one from the
device tree.


-Olof
Arnd Bergmann Aug. 15, 2012, 1:20 p.m. UTC | #2
On Tuesday 14 August 2012, Catalin Marinas wrote:

> +The AArch64 exception model is made up of a number of exception levels
> +(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
> +counterpart.  EL2 is the hypervisor level and exists only in non-secure
> +mode. EL3 is the highest priority level and exists only in secure mode.

I'm always confused by a description like this. It sounds like you cannot
have a hypervisor if you have code running in secure mode in EL3. What
I instead understand is that you enter non-secure mode by going from
EL3 into EL2.

> +2. Setup the device tree
> +-------------------------
> +
> +Requirement: MANDATORY
> +
> +The device tree blob (dtb) must be no bigger than 2 megabytes in size
> +and placed at a 2-megabyte boundary within the first 512 megabytes from
> +the start of the kernel image. This is to allow the kernel to map the
> +blob using a single section mapping in the initial page tables.

I've seen people put firmware for some peripherals into the device tree,
so that a device driver can grab a blob from there and load it into the
device, rather than calling request_firmware() which would fail if the
OS running on the system does not contain the blob. If such firmware is
too large, you end up violating the 2 MB limit you impose here.

Should we keep that limit and declare those use cases as invalid, or
should we try to make the boot protocol more flexible?

> diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
> new file mode 100644
> index 0000000..d766493
> --- /dev/null
> +++ b/arch/arm64/include/asm/setup.h
> @@ -0,0 +1,26 @@
> +#ifndef __ASM_SETUP_H
> +#define __ASM_SETUP_H
> +
> +#include <linux/types.h>
> +
> +#define COMMAND_LINE_SIZE 1024
> +
> +#endif

Is this necessary? The asm-generic version of this file allows 512 bytes,
which seems plenty.

> +unsigned int processor_id;
> +EXPORT_SYMBOL(processor_id);
> +
> +unsigned int elf_hwcap __read_mostly;
> +EXPORT_SYMBOL(elf_hwcap);

EXPORT_SYMBOL_GPL?

Neither of these looks like they should be used in drivers.

	Arnd
Olof Johansson Aug. 15, 2012, 5:06 p.m. UTC | #3
On Wed, Aug 15, 2012 at 01:20:02PM +0000, Arnd Bergmann wrote:
> On Tuesday 14 August 2012, Catalin Marinas wrote:
> 
> > +The AArch64 exception model is made up of a number of exception levels
> > +(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
> > +counterpart.  EL2 is the hypervisor level and exists only in non-secure
> > +mode. EL3 is the highest priority level and exists only in secure mode.
> 
> I'm always confused by a description like this. It sounds like you cannot
> have a hypervisor if you have code running in secure mode in EL3. What
> I instead understand is that you enter non-secure mode by going from
> EL3 into EL2.
> 
> > +2. Setup the device tree
> > +-------------------------
> > +
> > +Requirement: MANDATORY
> > +
> > +The device tree blob (dtb) must be no bigger than 2 megabytes in size
> > +and placed at a 2-megabyte boundary within the first 512 megabytes from
> > +the start of the kernel image. This is to allow the kernel to map the
> > +blob using a single section mapping in the initial page tables.
> 
> I've seen people put firmware for some peripherals into the device tree,
> so that a device driver can grab a blob from there and load it into the
> device, rather than calling request_firmware() which would fail if the
> OS running on the system does not contain the blob. If such firmware is
> too large, you end up violating the 2 MB limit you impose here.
> 
> Should we keep that limit and declare those use cases as invalid, or
> should we try to make the boot protocol more flexible?
> 
> > diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
> > new file mode 100644
> > index 0000000..d766493
> > --- /dev/null
> > +++ b/arch/arm64/include/asm/setup.h
> > @@ -0,0 +1,26 @@
> > +#ifndef __ASM_SETUP_H
> > +#define __ASM_SETUP_H
> > +
> > +#include <linux/types.h>
> > +
> > +#define COMMAND_LINE_SIZE 1024
> > +
> > +#endif
> 
> Is this necessary? The asm-generic version of this file allows 512 bytes,
> which seems plenty.

Chrome OS on my system today uses a 553 character cmdline, in particular
because some of the device mapper arguments are in there (since we boot without
ramdisk). It adds up quickly.

I suggest keeping it common with x86 since those limits are what people
will be used to (2048).


-Olof
Catalin Marinas Aug. 15, 2012, 5:37 p.m. UTC | #4
Hi Olof,

On Wed, Aug 15, 2012 at 12:06:45AM +0100, Olof Johansson wrote:
> On Tue, Aug 14, 2012 at 06:52:03PM +0100, Catalin Marinas wrote:
> > +Before jumping into the kernel, the following conditions must be met:
> > +
> > +- Quiesce all DMA capable devices so that memory does not get
> > +  corrupted by bogus network packets or disk data.  This will save
> > +  you many hours of debug.
> > +
> > +- Primary CPU general-purpose register settings
> > +  x0 = physical address of device tree blob (dtb) in system RAM.
> > +
> > +- CPU mode
> > +  All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
> > +  IRQ and FIQ).
> > +  The CPU must be in either EL2 (RECOMMENDED in order to have access to
> > +  the virtualisation extensions) or non-secure EL1.
> > +
> > +- Caches, MMUs
> > +  The MMU must be off.
> > +  Instruction cache may be on or off.
> > +  Data cache must be off and invalidated.
> > +
> > +- Architected timers
> > +  CNTFRQ must be programmed with the timer frequency.
> > +  If entering the kernel at EL1, CNTHCTL_EL2 must have EL1PCTEN (bit 0)
> > +  set where available.
> > +
> > +- Coherency
> > +  All CPUs to be booted by the kernel must be part of the same coherency
> > +  domain on entry to the kernel.  This may require IMPLEMENTATION DEFINED
> > +  initialisation to enable the receiving of maintenance operations on
> > +  each CPU.
> > +
> > +- System registers
> > +  All writable architected system registers at the exception level where
> > +  the kernel image will be entered must be initialised by software at a
> > +  higher exception level to prevent execution in an UNKNOWN state.
> 
> Given the recent development of ARM platforms, you might want to mandate
> the state of IOMMUs as well (they should probably be off, since there
> should be no active DMA activity). Graphics would be the exception to
> this, since if you want to keep scanning out a splash screen, you'll
> have to keep doing DMA...

We'll enhance this document as we get hardware as it's not clear whether
we can simply mandate it to be off. We may have situations with some
simple IOMMU that is previously set up by the firmware and the kernel
doesn't get access to it. One example is the System MMU from ARM that
supports stage 2 (hypervisor) translations and you just run a guest
kernel without any control of the IOMMU.

> > +- The primary CPU must jump directly to the first instruction of the
> > +  kernel image.  The device tree blob passed by this CPU must contain
> > +  for each CPU node:
> > +
> > +    1. An 'enable-method' property. Currently, the only supported value
> > +       for this field is the string "spin-table".
> > +
> > +    2. A 'cpu-release-addr' property identifying a 64-bit,
> > +       zero-initialised memory location.
> 
> These would be good to have documented in the
> Documentation/devicetree/bindings hierarchy as well.

OK.

> > index 0000000..d766493
> > --- /dev/null
> > +++ b/arch/arm64/include/asm/setup.h
> > @@ -0,0 +1,26 @@
> > +/*
> > + * Based on arch/arm/include/asm/setup.h
> > + *
> > + * Copyright (C) 1997-1999 Russell King
> > + * Copyright (C) 2012 ARM Ltd.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> > + */
> > +#ifndef __ASM_SETUP_H
> > +#define __ASM_SETUP_H
> > +
> > +#include <linux/types.h>
> > +
> > +#define COMMAND_LINE_SIZE 1024
> 
> Probably not a huge deal, and other architectures seem to be all over
> the map on this, but you might want to go with a larger value now rather
> than later. 2048 or 4096 perhaps?

It looks like there are many different values, including the asm-generic
one which is 512. I'm happy to follow the x86 example and change it to
2048, it doesn't really matter.

> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> > new file mode 100644
> > index 0000000..34ccdc0
> > --- /dev/null
> > +++ b/arch/arm64/kernel/head.S
> 
> [...]
> 
> > +/*
> > + * Setup common bits before finally enabling the MMU. Essentially this is just
> > + * loading the page table pointer and vector base registers.
> > + *
> > + * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on
> > + * the MMU.
> > + */
> > +__enable_mmu:
> 
> ENTRY()?

__enable_mmu is not used outside this file, so no need for ENTRY().

> > +	ldr	x5, =vectors
> > +	msr	vbar_el1, x5
> > +	msr	ttbr0_el1, x25			// load TTBR0
> > +	msr	ttbr1_el1, x26			// load TTBR1
> > +	isb
> > +	b	__turn_mmu_on
> > +ENDPROC(__enable_mmu)
> 
> ...or just END()? Same for a few of the other functions below.

ENDPROC() gives us ".type @function" in addition to END(). This proved
to be useful in the past for debugging symbols, unwind table (though we
don't have the latter on AArch64).

> > diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> > new file mode 100644
> > index 0000000..f25186f
> > --- /dev/null
> > +++ b/arch/arm64/kernel/setup.c
> 
> [...]
> 
> > +static void __init setup_processor(void)
> > +{
> > +	struct proc_info_list *list;
> > +
> > +	/*
> > +	 * locate processor in the list of supported processor
> > +	 * types.  The linker builds this table for us from the
> > +	 * entries in arch/arm/mm/proc.S
> > +	 */
> 
> Probably from arch/arm64/... somewhere?

Yes, I did a grep and found a few more.

> > +	printk("CPU: %s [%08x] revision %d\n",
> > +	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15);
> > +
> > +	sprintf(init_utsname()->machine, "aarch64");
> 
> > +	initial_boot_params = devtree;
> > +	dt_root = of_get_flat_dt_root();
> > +
> > +	machine_name = of_get_flat_dt_prop(dt_root, "model", NULL);
> > +	if (!machine_name)
> > +		machine_name = of_get_flat_dt_prop(dt_root, "compatible", NULL);
> > +	if (!machine_name)
> > +		machine_name = "<unknown>";
> > +	pr_info("Machine: %s\n", machine_name);
> 
> This property is an array of strings. It would be more valuable to print out
> the entry that was matched for a platform instead of the provided one from the
> device tree.

If we add machine_desc structure back, we could print which machine was
matched. But so far I try to keep the SoC code to a minimum and just do
the probing later in the SoC code (of_find_matching_node). Ideally we
shouldn't have any SoC code and just keep code in drivers but we'll see
how far we can get. We can discuss more details at the KS as I would
like the arm-soc team to get involved here.

Thanks.
Olof Johansson Aug. 15, 2012, 7:03 p.m. UTC | #5
Hi,

On Wed, Aug 15, 2012 at 06:37:11PM +0100, Catalin Marinas wrote:
> Hi Olof,
> 
> > Given the recent development of ARM platforms, you might want to mandate
> > the state of IOMMUs as well (they should probably be off, since there
> > should be no active DMA activity). Graphics would be the exception to
> > this, since if you want to keep scanning out a splash screen, you'll
> > have to keep doing DMA...
> 
> We'll enhance this document as we get hardware as it's not clear whether
> we can simply mandate it to be off. We may have situations with some
> simple IOMMU that is previously set up by the firmware and the kernel
> doesn't get access to it. One example is the System MMU from ARM that
> supports stage 2 (hypervisor) translations and you just run a guest
> kernel without any control of the IOMMU.

Ok, fair enough.

> > > +/*
> > > + * Setup common bits before finally enabling the MMU. Essentially this is just
> > > + * loading the page table pointer and vector base registers.
> > > + *
> > > + * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on
> > > + * the MMU.
> > > + */
> > > +__enable_mmu:
> > 
> > ENTRY()?
> 
> __enable_mmu is not used outside this file, so no need for ENTRY().
> 
> > > +	ldr	x5, =vectors
> > > +	msr	vbar_el1, x5
> > > +	msr	ttbr0_el1, x25			// load TTBR0
> > > +	msr	ttbr1_el1, x26			// load TTBR1
> > > +	isb
> > > +	b	__turn_mmu_on
> > > +ENDPROC(__enable_mmu)
> > 
> > ...or just END()? Same for a few of the other functions below.
> 
> ENDPROC() gives us ".type @function" in addition to END(). This proved
> to be useful in the past for debugging symbols, unwind table (though we
> don't have the latter on AArch64).

A good as reason as any, sounds good.

> > > +static void __init setup_processor(void)
> > > +{
> > > +	struct proc_info_list *list;
> > > +
> > > +	/*
> > > +	 * locate processor in the list of supported processor
> > > +	 * types.  The linker builds this table for us from the
> > > +	 * entries in arch/arm/mm/proc.S
> > > +	 */
> > 
> > Probably from arch/arm64/... somewhere?
> 
> Yes, I did a grep and found a few more.

Yeah, I pointed out some other stale ARM-derived comments in other patches.

> > > +	printk("CPU: %s [%08x] revision %d\n",
> > > +	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15);
> > > +
> > > +	sprintf(init_utsname()->machine, "aarch64");
> > 
> > > +	initial_boot_params = devtree;
> > > +	dt_root = of_get_flat_dt_root();
> > > +
> > > +	machine_name = of_get_flat_dt_prop(dt_root, "model", NULL);
> > > +	if (!machine_name)
> > > +		machine_name = of_get_flat_dt_prop(dt_root, "compatible", NULL);
> > > +	if (!machine_name)
> > > +		machine_name = "<unknown>";
> > > +	pr_info("Machine: %s\n", machine_name);
> > 
> > This property is an array of strings. It would be more valuable to print out
> > the entry that was matched for a platform instead of the provided one from the
> > device tree.
> 
> If we add machine_desc structure back, we could print which machine was
> matched. But so far I try to keep the SoC code to a minimum and just do
> the probing later in the SoC code (of_find_matching_node). Ideally we
> shouldn't have any SoC code and just keep code in drivers but we'll see
> how far we can get. We can discuss more details at the KS as I would
> like the arm-soc team to get involved here.

Interesting approach, I wonder if it'll scale, in particular if it comes
to needing to do early setup and init. For device-level setup, generic
will probably work just fine. And if it doesn't, things can be changed
later. So it sounds like a good start.

Definitely something we should discuss. I suggest not doing it at KS
though, since only half of the arm-soc team is invited there. So the
ARM mini-summit or hallway around LPC is a better venue.


-Olof
Catalin Marinas Aug. 15, 2012, 7:53 p.m. UTC | #6
On 15 August 2012 20:03, Olof Johansson <olof@lixom.net> wrote:
> On Wed, Aug 15, 2012 at 06:37:11PM +0100, Catalin Marinas wrote:
>> If we add machine_desc structure back, we could print which machine was
>> matched. But so far I try to keep the SoC code to a minimum and just do
>> the probing later in the SoC code (of_find_matching_node). Ideally we
>> shouldn't have any SoC code and just keep code in drivers but we'll see
>> how far we can get. We can discuss more details at the KS as I would
>> like the arm-soc team to get involved here.
>
> Interesting approach, I wonder if it'll scale, in particular if it comes
> to needing to do early setup and init. For device-level setup, generic
> will probably work just fine. And if it doesn't, things can be changed
> later. So it sounds like a good start.
>
> Definitely something we should discuss. I suggest not doing it at KS
> though, since only half of the arm-soc team is invited there. So the
> ARM mini-summit or hallway around LPC is a better venue.

I was indeed thinking of the ARM mini-summit or hallway discussions.
The KS has different topics and it wouldn't have been of wide interest
anyway.
Catalin Marinas Aug. 16, 2012, 12:53 p.m. UTC | #7
On Wed, Aug 15, 2012 at 02:20:02PM +0100, Arnd Bergmann wrote:
> On Tuesday 14 August 2012, Catalin Marinas wrote:
> 
> > +The AArch64 exception model is made up of a number of exception levels
> > +(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
> > +counterpart.  EL2 is the hypervisor level and exists only in non-secure
> > +mode. EL3 is the highest priority level and exists only in secure mode.
> 
> I'm always confused by a description like this. It sounds like you cannot
> have a hypervisor if you have code running in secure mode in EL3. What
> I instead understand is that you enter non-secure mode by going from
> EL3 into EL2.

From EL3 you can drop to either EL2 (non-secure) or EL1 (secure or
non-secure), it's the highest privilege level. But we don't support the
kernel entering at EL3, the SoC firmware runs in this mode. I'll try to
make it clearer.

> > +2. Setup the device tree
> > +-------------------------
> > +
> > +Requirement: MANDATORY
> > +
> > +The device tree blob (dtb) must be no bigger than 2 megabytes in size
> > +and placed at a 2-megabyte boundary within the first 512 megabytes from
> > +the start of the kernel image. This is to allow the kernel to map the
> > +blob using a single section mapping in the initial page tables.
> 
> I've seen people put firmware for some peripherals into the device tree,
> so that a device driver can grab a blob from there and load it into the
> device, rather than calling request_firmware() which would fail if the
> OS running on the system does not contain the blob. If such firmware is
> too large, you end up violating the 2 MB limit you impose here.
> 
> Should we keep that limit and declare those use cases as invalid, or
> should we try to make the boot protocol more flexible?

I would be ok to allowing larger range here but we currently don't get
the information about the size of the dtb early enough to know how much
to map. We could make some other arbitrary choice if needed like being
in the first 16MB of RAM and we map this range.
Nicolas Pitre Aug. 16, 2012, 6:59 p.m. UTC | #8
On Tue, 14 Aug 2012, Catalin Marinas wrote:

> The patch adds the kernel booting and the initial setup code.
> Documentation/arm64/booting.txt describes the booting protocol on the
> AArch64 Linux kernel. This is subject to change following the work on
> boot standardisation, ACPI.
> 
> Signed-off-by: Will Deacon <will.deacon@arm.com>
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

A few minor comments below, otherwise...

Acked-by: Nicolas Pitre <nico@linaro.org>

> ---
>  Documentation/arm64/booting.txt |  141 +++++++++++
>  arch/arm64/include/asm/setup.h  |   26 ++
>  arch/arm64/kernel/head.S        |  521 +++++++++++++++++++++++++++++++++++++++
>  arch/arm64/kernel/setup.c       |  357 +++++++++++++++++++++++++++
>  4 files changed, 1045 insertions(+), 0 deletions(-)
>  create mode 100644 Documentation/arm64/booting.txt
>  create mode 100644 arch/arm64/include/asm/setup.h
>  create mode 100644 arch/arm64/kernel/head.S
>  create mode 100644 arch/arm64/kernel/setup.c
> 
> diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
> new file mode 100644
> index 0000000..3197820
> --- /dev/null
> +++ b/Documentation/arm64/booting.txt
> @@ -0,0 +1,141 @@
> +			Booting AArch64 Linux
> +			=====================
> +
> +Author: Will Deacon <will.deacon@arm.com>
> +Date  : 25 April 2012
> +
> +This document is based on the ARM booting document by Russell King and
> +is relevant to all public releases of the AArch64 Linux kernel.
> +
> +The AArch64 exception model is made up of a number of exception levels
> +(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
> +counterpart.  EL2 is the hypervisor level and exists only in non-secure
> +mode. EL3 is the highest priority level and exists only in secure mode.
> +
> +For the purposes of this document, we will use the term `boot loader'
> +simply to define all software that executes on the CPU(s) before control
> +is passed to the Linux kernel.  This may include secure monitor and
> +hypervisor code, or it may just be a handful of instructions for
> +preparing a minimal boot environment.
> +
> +Essentially, the boot loader should provide (as a minimum) the
> +following:
> +
> +1. Setup and initialise the RAM
> +2. Setup the device tree
> +3. Decompress the kernel image
> +4. Call the kernel image
> +
> +
> +1. Setup and initialise RAM
> +---------------------------
> +
> +Requirement: MANDATORY
> +
> +The boot loader is expected to find and initialise all RAM that the
> +kernel will use for volatile data storage in the system.  It performs
> +this in a machine dependent manner.  (It may use internal algorithms
> +to automatically locate and size all RAM, or it may use knowledge of
> +the RAM in the machine, or any other method the boot loader designer
> +sees fit.)
> +
> +
> +2. Setup the device tree
> +-------------------------
> +
> +Requirement: MANDATORY
> +
> +The device tree blob (dtb) must be no bigger than 2 megabytes in size
> +and placed at a 2-megabyte boundary within the first 512 megabytes from
> +the start of the kernel image. This is to allow the kernel to map the
> +blob using a single section mapping in the initial page tables.

It might be a good idea to specify the minimum information that should 
be contained in the DTB.  Memory size is certainly one such item.

> +3. Decompress the kernel image
> +------------------------------
> +
> +Requirement: OPTIONAL
> +
> +The AArch64 kernel does not provide a decompressor and therefore
> +requires gzip decompression to be performed by the boot loader if the
> +default Image.gz target is used.  For bootloaders that do not implement
> +this requirement, the larger Image target is available instead.

Some people will want to use bzip2 or whatever other decompressor du 
jour.  Maybe this shouldn't be gzip specific, or just presented as a 
possible option?

> +4. Call the kernel image
> +------------------------
> +
> +Requirement: MANDATORY
> +
> +The decompressed kernel image contains a 32-byte header as follows:
> +
> +  u32 magic	= 0x14000008;	/* branch to stext, little-endian */
> +  u32 res0	= 0;		/* reserved */
> +  u64 text_offset;		/* Image load offset */
> +  u64 res1	= 0;		/* reserved */
> +  u64 res2	= 0;		/* reserved */
> +
> +The image must be placed at the specified offset (currently 0x80000)
> +from the start of the system RAM and called there. The start of the
> +system RAM must be aligned to 2MB.
> +
> +Before jumping into the kernel, the following conditions must be met:
> +
> +- Quiesce all DMA capable devices so that memory does not get
> +  corrupted by bogus network packets or disk data.  This will save
> +  you many hours of debug.
> +
> +- Primary CPU general-purpose register settings
> +  x0 = physical address of device tree blob (dtb) in system RAM.

I think you should mandate that some additional registers be explicitly 
initialized to 0 for possible future usage (and also mentioned in the 
corresponding code comment).  We have that issue on ARM32 where it is 
unclear if r2 contains a valid ATAG/DTB address or not as its content 
was not defined before.

[...]


Nicolas
Tony Lindgren Aug. 17, 2012, 8:56 a.m. UTC | #9
* Catalin Marinas <catalin.marinas@arm.com> [120814 11:00]:
> +3. Decompress the kernel image
> +------------------------------
> +
> +Requirement: OPTIONAL
> +
> +The AArch64 kernel does not provide a decompressor and therefore
> +requires gzip decompression to be performed by the boot loader if the
> +default Image.gz target is used.  For bootloaders that do not implement
> +this requirement, the larger Image target is available instead.

Maybe add something here about why AArch64 does not provide a
decompressor? That's something everybody will wonder while reading
this part.

> +- Caches, MMUs
> +  The MMU must be off.
> +  Instruction cache may be on or off.
> +  Data cache must be off and invalidated.

External caches must be configured by bootloader but set to disabled
state?

Other than that:

Acked-by: Tony Lindgren <tony@atomide.com>
Santosh Shilimkar Aug. 17, 2012, 9:41 a.m. UTC | #10
On Tuesday 14 August 2012 11:22 PM, Catalin Marinas wrote:
> The patch adds the kernel booting and the initial setup code.
> Documentation/arm64/booting.txt describes the booting protocol on the
> AArch64 Linux kernel. This is subject to change following the work on
> boot standardisation, ACPI.
>
> Signed-off-by: Will Deacon<will.deacon@arm.com>
> Signed-off-by: Catalin Marinas<catalin.marinas@arm.com>
> ---
>   Documentation/arm64/booting.txt |  141 +++++++++++
>   arch/arm64/include/asm/setup.h  |   26 ++
>   arch/arm64/kernel/head.S        |  521 +++++++++++++++++++++++++++++++++++++++
>   arch/arm64/kernel/setup.c       |  357 +++++++++++++++++++++++++++
>   4 files changed, 1045 insertions(+), 0 deletions(-)
>   create mode 100644 Documentation/arm64/booting.txt
>   create mode 100644 arch/arm64/include/asm/setup.h
>   create mode 100644 arch/arm64/kernel/head.S
>   create mode 100644 arch/arm64/kernel/setup.c
>
> diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
> new file mode 100644
> index 0000000..3197820
> --- /dev/null
> +++ b/Documentation/arm64/booting.txt

[...]

> +
> +The boot loader is expected to enter the kernel on each CPU in the
> +following manner:
> +
> +- The primary CPU must jump directly to the first instruction of the
> +  kernel image.  The device tree blob passed by this CPU must contain
> +  for each CPU node:
> +
> +    1. An 'enable-method' property. Currently, the only supported value
> +       for this field is the string "spin-table".
> +
> +    2. A 'cpu-release-addr' property identifying a 64-bit,
> +       zero-initialised memory location.
> +
> +  It is expected that the bootloader will generate these device tree
> +  properties and insert them into the blob prior to kernel entry.
> +
> +- Any secondary CPUs must spin outside of the kernel in a reserved area
> +  of memory (communicated to the kernel by a /memreserve/ region in the
> +  device tree) polling their cpu-release-addr location, which must be
> +  contained in the reserved region.  A wfe instruction may be inserted
> +  to reduce the overhead of the busy-loop and a sev will be issued by
> +  the primary CPU.  When a read of the location pointed to by the
> +  cpu-release-addr returns a non-zero value, the CPU must jump directly
> +  to this value.

So you expect all the secondary CPUs to be in wakeup state and probably
looping in WFE for a signal from kernel to boot. There is one issue
with this requirement though. For large CPU system, you need to reset
all the CPUs and hit this waiting loop. This will lead to large inrush
current need at bootup which may be not be supported. To avoid this
issue, secondary CPUs are kept in OFF state and then they are woken
up from kernel one by one whenever they need to be brought into the
system. This requirement should be considered.


> diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
> new file mode 100644
> index 0000000..d766493
> --- /dev/null
> +++ b/arch/arm64/include/asm/setup.h
> @@ -0,0 +1,26 @@
> +/*
> + * Based on arch/arm/include/asm/setup.h
> + *
> + * Copyright (C) 1997-1999 Russell King
> + * Copyright (C) 2012 ARM Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see<http://www.gnu.org/licenses/>.
> + */
> +#ifndef __ASM_SETUP_H
> +#define __ASM_SETUP_H
> +
> +#include<linux/types.h>
> +
> +#define COMMAND_LINE_SIZE 1024
> +
> +#endif
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> new file mode 100644
> index 0000000..34ccdc0
> --- /dev/null
> +++ b/arch/arm64/kernel/head.S
> @@ -0,0 +1,521 @@
> +/*
> + * Low-level CPU initialisation
> + * Based on arch/arm/kernel/head.S
> + *
> + * Copyright (C) 1994-2002 Russell King
> + * Copyright (C) 2003-2012 ARM Ltd.
> + * Authors:	Catalin Marinas<catalin.marinas@arm.com>
> + *		Will Deacon<will.deacon@arm.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see<http://www.gnu.org/licenses/>.
> + */
> +

[..]

> +
> +	/*
> +	 * DO NOT MODIFY. Image header expected by Linux boot-loaders.
> +	 */
> +	b	stext				// branch to kernel start, magic
> +	.long	0				// reserved
> +	.quad	TEXT_OFFSET			// Image load offset from start of RAM
> +	.quad	0				// reserved
> +	.quad	0				// reserved
> +

Minor nit. Avoid C++ commenting style "//"  here and rest of the patch.

Regards
santosh
Catalin Marinas Aug. 17, 2012, 10:05 a.m. UTC | #11
On Fri, Aug 17, 2012 at 10:41:10AM +0100, Santosh Shilimkar wrote:
> On Tuesday 14 August 2012 11:22 PM, Catalin Marinas wrote:
> > +The boot loader is expected to enter the kernel on each CPU in the
> > +following manner:
> > +
> > +- The primary CPU must jump directly to the first instruction of the
> > +  kernel image.  The device tree blob passed by this CPU must contain
> > +  for each CPU node:
> > +
> > +    1. An 'enable-method' property. Currently, the only supported value
> > +       for this field is the string "spin-table".
> > +
> > +    2. A 'cpu-release-addr' property identifying a 64-bit,
> > +       zero-initialised memory location.
> > +
> > +  It is expected that the bootloader will generate these device tree
> > +  properties and insert them into the blob prior to kernel entry.
> > +
> > +- Any secondary CPUs must spin outside of the kernel in a reserved area
> > +  of memory (communicated to the kernel by a /memreserve/ region in the
> > +  device tree) polling their cpu-release-addr location, which must be
> > +  contained in the reserved region.  A wfe instruction may be inserted
> > +  to reduce the overhead of the busy-loop and a sev will be issued by
> > +  the primary CPU.  When a read of the location pointed to by the
> > +  cpu-release-addr returns a non-zero value, the CPU must jump directly
> > +  to this value.
> 
> So you expect all the secondary CPUs to be in wakeup state and probably
> looping in WFE for a signal from kernel to boot. There is one issue
> with this requirement though. For large CPU system, you need to reset
> all the CPUs and hit this waiting loop. This will lead to large inrush
> current need at bootup which may be not be supported. To avoid this
> issue, secondary CPUs are kept in OFF state and then they are woken
> up from kernel one by one whenever they need to be brought into the
> system. This requirement should be considered.

I agree, this part will be extended. That's one method that we currently
support and suitable to the model.

The better method is the SMC standardisation that Charles Garcia-Tobin
has written (to be made available soon) and was presented at the last
Linaro Connect in HK. Given that the CPU power is usually controlled by
the secure side, we'll ask for an SMC to be issued for waking up
secondary CPUs, so it's up to the secure firmware to write the correct
hardware registers.

> > --- /dev/null
> > +++ b/arch/arm64/kernel/head.S
> [..]
> > +	/*
> > +	 * DO NOT MODIFY. Image header expected by Linux boot-loaders.
> > +	 */
> > +	b	stext				// branch to kernel start, magic
> > +	.long	0				// reserved
> > +	.quad	TEXT_OFFSET			// Image load offset from start of RAM
> > +	.quad	0				// reserved
> > +	.quad	0				// reserved
> > +
> 
> Minor nit. Avoid C++ commenting style "//"  here and rest of the patch.

That's not C++ comment style, it's the *official* assembly comment style
for AArch64 ('@' is no longer supported).
Santosh Shilimkar Aug. 17, 2012, 10:10 a.m. UTC | #12
On Fri, Aug 17, 2012 at 3:35 PM, Catalin Marinas
<catalin.marinas@arm.com> wrote:
>
> On Fri, Aug 17, 2012 at 10:41:10AM +0100, Santosh Shilimkar wrote:
> > On Tuesday 14 August 2012 11:22 PM, Catalin Marinas wrote:
> > > +The boot loader is expected to enter the kernel on each CPU in the
> > > +following manner:
> > > +
> > > +- The primary CPU must jump directly to the first instruction of the
> > > +  kernel image.  The device tree blob passed by this CPU must contain
> > > +  for each CPU node:
> > > +
> > > +    1. An 'enable-method' property. Currently, the only supported
> > > value
> > > +       for this field is the string "spin-table".
> > > +
> > > +    2. A 'cpu-release-addr' property identifying a 64-bit,
> > > +       zero-initialised memory location.
> > > +
> > > +  It is expected that the bootloader will generate these device tree
> > > +  properties and insert them into the blob prior to kernel entry.
> > > +
> > > +- Any secondary CPUs must spin outside of the kernel in a reserved
> > > area
> > > +  of memory (communicated to the kernel by a /memreserve/ region in
> > > the
> > > +  device tree) polling their cpu-release-addr location, which must be
> > > +  contained in the reserved region.  A wfe instruction may be
> > > inserted
> > > +  to reduce the overhead of the busy-loop and a sev will be issued by
> > > +  the primary CPU.  When a read of the location pointed to by the
> > > +  cpu-release-addr returns a non-zero value, the CPU must jump
> > > directly
> > > +  to this value.
> >
> > So you expect all the secondary CPUs to be in wakeup state and probably
> > looping in WFE for a signal from kernel to boot. There is one issue
> > with this requirement though. For large CPU system, you need to reset
> > all the CPUs and hit this waiting loop. This will lead to large inrush
> > current need at bootup which may be not be supported. To avoid this
> > issue, secondary CPUs are kept in OFF state and then they are woken
> > up from kernel one by one whenever they need to be brought into the
> > system. This requirement should be considered.
>
> I agree, this part will be extended. That's one method that we currently
> support and suitable to the model.
>
> The better method is the SMC standardisation that Charles Garcia-Tobin
> has written (to be made available soon) and was presented at the last
> Linaro Connect in HK. Given that the CPU power is usually controlled by
> the secure side, we'll ask for an SMC to be issued for waking up
> secondary CPUs, so it's up to the secure firmware to write the correct
> hardware registers.
>
Thanks for the information. SMC standardization would indeed help
to overcome some of these. Will wait for that information before
next set of questions.

> > > --- /dev/null
> > > +++ b/arch/arm64/kernel/head.S
> > [..]
> > > +   /*
> > > +    * DO NOT MODIFY. Image header expected by Linux boot-loaders.
> > > +    */
> > > +   b       stext                           // branch to kernel start,
> > > magic
> > > +   .long   0                               // reserved
> > > +   .quad   TEXT_OFFSET                     // Image load offset from
> > > start of RAM
> > > +   .quad   0                               // reserved
> > > +   .quad   0                               // reserved
> > > +
> >
> > Minor nit. Avoid C++ commenting style "//"  here and rest of the patch.
>
> That's not C++ comment style, it's the *official* assembly comment style
> for AArch64 ('@' is no longer supported).
>
Ok. Thanks for clarifying.

Regards
Santosh
Arnd Bergmann Aug. 17, 2012, 11:20 a.m. UTC | #13
On Thursday 16 August 2012, Nicolas Pitre wrote:
> > +3. Decompress the kernel image
> > +------------------------------
> > +
> > +Requirement: OPTIONAL
> > +
> > +The AArch64 kernel does not provide a decompressor and therefore
> > +requires gzip decompression to be performed by the boot loader if the
> > +default Image.gz target is used.  For bootloaders that do not implement
> > +this requirement, the larger Image target is available instead.
> 
> Some people will want to use bzip2 or whatever other decompressor du 
> jour.  Maybe this shouldn't be gzip specific, or just presented as a 
> possible option?

Good point. Whether this should be part of this document depends on
what assumptions we make about the boot loader getting the image
in the first place.

In the strict sense, we are documenting the interface between the boot
loader and the kernel here, which already specifies that  the kernel
must be uncompressed by the time we enter it. If the boot loader wants
to add its own encryption or compression methods, or its own headers
in front of the binary, the boot protocol isn't really impacted.

That said, I think it's a good idea to also specify what kind of
format we want to be used, e.g. a stripped ELF Image compressed
with one of gzip/bzip2/lzo/xz and with no other headers added,
on a vfat/ext4/btrfs formatted file system. There are probably a
lot of other things one might want to specify if we go down this
route. Or we could refer to the UEFI spec and mandate that the
same format that UEFI uses should be used here independent of
what boot loader is used. I think we can still allow other ways to
get to the image for deeply embedded systems, e.g. linking the
kernel into the boot loader as a blob on tightly constrained
systems. For that case, we'd only specify the interface between
boot loader and kernel as described above.

	Arnd
Tony Lindgren Aug. 17, 2012, 1:13 p.m. UTC | #14
* Shilimkar, Santosh <santosh.shilimkar@ti.com> [120817 03:11]:
> On Fri, Aug 17, 2012 at 3:35 PM, Catalin Marinas
> <catalin.marinas@arm.com> wrote:
> >
> > On Fri, Aug 17, 2012 at 10:41:10AM +0100, Santosh Shilimkar wrote:
> > >
> > > So you expect all the secondary CPUs to be in wakeup state and probably
> > > looping in WFE for a signal from kernel to boot. There is one issue
> > > with this requirement though. For large CPU system, you need to reset
> > > all the CPUs and hit this waiting loop. This will lead to large inrush
> > > current need at bootup which may be not be supported. To avoid this
> > > issue, secondary CPUs are kept in OFF state and then they are woken
> > > up from kernel one by one whenever they need to be brought into the
> > > system. This requirement should be considered.
> >
> > I agree, this part will be extended. That's one method that we currently
> > support and suitable to the model.
> >
> > The better method is the SMC standardisation that Charles Garcia-Tobin
> > has written (to be made available soon) and was presented at the last
> > Linaro Connect in HK. Given that the CPU power is usually controlled by
> > the secure side, we'll ask for an SMC to be issued for waking up
> > secondary CPUs, so it's up to the secure firmware to write the correct
> > hardware registers.
> >
> Thanks for the information. SMC standardization would indeed help
> to overcome some of these. Will wait for that information before
> next set of questions.

Yes please. If the SMC is not standardized for most calls at least,
we'll end up with a horrible mess of SoC specific calls like we
currently have. Related to that, the virtualization calls should be
also standardized so we don't end up with multiple different hypervisors
with different calls.

Regards,

Tony
Catalin Marinas Aug. 17, 2012, 1:45 p.m. UTC | #15
On Fri, Aug 17, 2012 at 12:20:40PM +0100, Arnd Bergmann wrote:
> On Thursday 16 August 2012, Nicolas Pitre wrote:
> > > +3. Decompress the kernel image
> > > +------------------------------
> > > +
> > > +Requirement: OPTIONAL
> > > +
> > > +The AArch64 kernel does not provide a decompressor and therefore
> > > +requires gzip decompression to be performed by the boot loader if the
> > > +default Image.gz target is used.  For bootloaders that do not implement
> > > +this requirement, the larger Image target is available instead.
> > 
> > Some people will want to use bzip2 or whatever other decompressor du 
> > jour.  Maybe this shouldn't be gzip specific, or just presented as a 
> > possible option?
> 
> Good point. Whether this should be part of this document depends on
> what assumptions we make about the boot loader getting the image
> in the first place.

It ended up here because there is a Makefile target, Image.gz, though I
haven't used it at all. I'll expand the text so that it is not
restricted to gzip.

> In the strict sense, we are documenting the interface between the boot
> loader and the kernel here, which already specifies that  the kernel
> must be uncompressed by the time we enter it. If the boot loader wants
> to add its own encryption or compression methods, or its own headers
> in front of the binary, the boot protocol isn't really impacted.

Yes.

> That said, I think it's a good idea to also specify what kind of
> format we want to be used, e.g. a stripped ELF Image compressed
> with one of gzip/bzip2/lzo/xz and with no other headers added,
> on a vfat/ext4/btrfs formatted file system. There are probably a
> lot of other things one might want to specify if we go down this
> route. Or we could refer to the UEFI spec and mandate that the
> same format that UEFI uses should be used here independent of
> what boot loader is used. I think we can still allow other ways to
> get to the image for deeply embedded systems, e.g. linking the
> kernel into the boot loader as a blob on tightly constrained
> systems. For that case, we'd only specify the interface between
> boot loader and kernel as described above.

At least the latter case is used on the model. For the other cases, it's
quite early to clearly specify what is needed as we don't currently have
a boot-loader other than UEFI. We may expect GRUB as well.
Catalin Marinas Aug. 17, 2012, 1:48 p.m. UTC | #16
On Fri, Aug 17, 2012 at 02:13:59PM +0100, Tony Lindgren wrote:
> * Shilimkar, Santosh <santosh.shilimkar@ti.com> [120817 03:11]:
> > On Fri, Aug 17, 2012 at 3:35 PM, Catalin Marinas
> > <catalin.marinas@arm.com> wrote:
> > >
> > > On Fri, Aug 17, 2012 at 10:41:10AM +0100, Santosh Shilimkar wrote:
> > > >
> > > > So you expect all the secondary CPUs to be in wakeup state and probably
> > > > looping in WFE for a signal from kernel to boot. There is one issue
> > > > with this requirement though. For large CPU system, you need to reset
> > > > all the CPUs and hit this waiting loop. This will lead to large inrush
> > > > current need at bootup which may be not be supported. To avoid this
> > > > issue, secondary CPUs are kept in OFF state and then they are woken
> > > > up from kernel one by one whenever they need to be brought into the
> > > > system. This requirement should be considered.
> > >
> > > I agree, this part will be extended. That's one method that we currently
> > > support and suitable to the model.
> > >
> > > The better method is the SMC standardisation that Charles Garcia-Tobin
> > > has written (to be made available soon) and was presented at the last
> > > Linaro Connect in HK. Given that the CPU power is usually controlled by
> > > the secure side, we'll ask for an SMC to be issued for waking up
> > > secondary CPUs, so it's up to the secure firmware to write the correct
> > > hardware registers.
> > >
> > Thanks for the information. SMC standardization would indeed help
> > to overcome some of these. Will wait for that information before
> > next set of questions.
> 
> Yes please. If the SMC is not standardized for most calls at least,
> we'll end up with a horrible mess of SoC specific calls like we
> currently have. Related to that, the virtualization calls should be
> also standardized so we don't end up with multiple different hypervisors
> with different calls.

For hypervisor, there are two kinds of API - one meant for power
management that is pretty much the same as the SMC and another specific
to the hypervisor solution (KVM, Xen etc.). The latter is specific to
the host kernel that's running as we start it at EL2 but it's not
standardised. It would be good indeed and there are other advantages
like self-virtualisation but it needs the KVM and Xen guys to come to a
common definition.
Nicolas Pitre Aug. 17, 2012, 6:21 p.m. UTC | #17
On Fri, 17 Aug 2012, Arnd Bergmann wrote:

> On Thursday 16 August 2012, Nicolas Pitre wrote:
> > Some people will want to use bzip2 or whatever other decompressor du 
> > jour.  Maybe this shouldn't be gzip specific, or just presented as a 
> > possible option?
> 
> Good point. Whether this should be part of this document depends on
> what assumptions we make about the boot loader getting the image
> in the first place.
> 
> In the strict sense, we are documenting the interface between the boot
> loader and the kernel here, which already specifies that  the kernel
> must be uncompressed by the time we enter it. If the boot loader wants
> to add its own encryption or compression methods, or its own headers
> in front of the binary, the boot protocol isn't really impacted.

Right.  And someone else will insist on wrapping the kernel into a boot 
loader specific image format e.g. u-Boot.  If all those variations could 
be kept out of the kernel build that would be a good thing.

That means the kernel should be wrapped/compressed/scrambled at 
installation time, not at build time.  This way the kernel image remains 
universal and flexibility in its installation is possible.

> That said, I think it's a good idea to also specify what kind of 
> format we want to be used, e.g. a stripped ELF Image compressed with 
> one of gzip/bzip2/lzo/xz and with no other headers added, on a 
> vfat/ext4/btrfs formatted file system. There are probably a lot of 
> other things one might want to specify if we go down this route. Or we 
> could refer to the UEFI spec and mandate that the same format that 
> UEFI uses should be used here independent of what boot loader is used. 
> I think we can still allow other ways to get to the image for deeply 
> embedded systems, e.g. linking the kernel into the boot loader as a 
> blob on tightly constrained systems. For that case, we'd only specify 
> the interface between boot loader and kernel as described above.

I don't think we'll have to concern ourselves with tightly constrained 
systems that much on ARM64.


Nicolas
Catalin Marinas Aug. 24, 2012, 9:50 a.m. UTC | #18
On Fri, Aug 17, 2012 at 02:13:59PM +0100, Tony Lindgren wrote:
> * Shilimkar, Santosh <santosh.shilimkar@ti.com> [120817 03:11]:
> > On Fri, Aug 17, 2012 at 3:35 PM, Catalin Marinas
> > <catalin.marinas@arm.com> wrote:
> > >
> > > On Fri, Aug 17, 2012 at 10:41:10AM +0100, Santosh Shilimkar wrote:
> > > >
> > > > So you expect all the secondary CPUs to be in wakeup state and probably
> > > > looping in WFE for a signal from kernel to boot. There is one issue
> > > > with this requirement though. For large CPU system, you need to reset
> > > > all the CPUs and hit this waiting loop. This will lead to large inrush
> > > > current need at bootup which may be not be supported. To avoid this
> > > > issue, secondary CPUs are kept in OFF state and then they are woken
> > > > up from kernel one by one whenever they need to be brought into the
> > > > system. This requirement should be considered.
> > >
> > > I agree, this part will be extended. That's one method that we currently
> > > support and suitable to the model.
> > >
> > > The better method is the SMC standardisation that Charles Garcia-Tobin
> > > has written (to be made available soon) and was presented at the last
> > > Linaro Connect in HK. Given that the CPU power is usually controlled by
> > > the secure side, we'll ask for an SMC to be issued for waking up
> > > secondary CPUs, so it's up to the secure firmware to write the correct
> > > hardware registers.
> > >
> > Thanks for the information. SMC standardization would indeed help
> > to overcome some of these. Will wait for that information before
> > next set of questions.
> 
> Yes please. If the SMC is not standardized for most calls at least,
> we'll end up with a horrible mess of SoC specific calls like we
> currently have. Related to that, the virtualization calls should be
> also standardized so we don't end up with multiple different hypervisors
> with different calls.

The Power State Coordination Interface initial proposal has been
published here:

http://infocenter.arm.com/help/topic/com.arm.doc.den0022a/index.html

(as with other ARM documents, they are public but free registration
required).
diff mbox

Patch

diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
new file mode 100644
index 0000000..3197820
--- /dev/null
+++ b/Documentation/arm64/booting.txt
@@ -0,0 +1,141 @@ 
+			Booting AArch64 Linux
+			=====================
+
+Author: Will Deacon <will.deacon@arm.com>
+Date  : 25 April 2012
+
+This document is based on the ARM booting document by Russell King and
+is relevant to all public releases of the AArch64 Linux kernel.
+
+The AArch64 exception model is made up of a number of exception levels
+(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
+counterpart.  EL2 is the hypervisor level and exists only in non-secure
+mode. EL3 is the highest priority level and exists only in secure mode.
+
+For the purposes of this document, we will use the term `boot loader'
+simply to define all software that executes on the CPU(s) before control
+is passed to the Linux kernel.  This may include secure monitor and
+hypervisor code, or it may just be a handful of instructions for
+preparing a minimal boot environment.
+
+Essentially, the boot loader should provide (as a minimum) the
+following:
+
+1. Setup and initialise the RAM
+2. Setup the device tree
+3. Decompress the kernel image
+4. Call the kernel image
+
+
+1. Setup and initialise RAM
+---------------------------
+
+Requirement: MANDATORY
+
+The boot loader is expected to find and initialise all RAM that the
+kernel will use for volatile data storage in the system.  It performs
+this in a machine dependent manner.  (It may use internal algorithms
+to automatically locate and size all RAM, or it may use knowledge of
+the RAM in the machine, or any other method the boot loader designer
+sees fit.)
+
+
+2. Setup the device tree
+-------------------------
+
+Requirement: MANDATORY
+
+The device tree blob (dtb) must be no bigger than 2 megabytes in size
+and placed at a 2-megabyte boundary within the first 512 megabytes from
+the start of the kernel image. This is to allow the kernel to map the
+blob using a single section mapping in the initial page tables.
+
+
+3. Decompress the kernel image
+------------------------------
+
+Requirement: OPTIONAL
+
+The AArch64 kernel does not provide a decompressor and therefore
+requires gzip decompression to be performed by the boot loader if the
+default Image.gz target is used.  For bootloaders that do not implement
+this requirement, the larger Image target is available instead.
+
+
+4. Call the kernel image
+------------------------
+
+Requirement: MANDATORY
+
+The decompressed kernel image contains a 32-byte header as follows:
+
+  u32 magic	= 0x14000008;	/* branch to stext, little-endian */
+  u32 res0	= 0;		/* reserved */
+  u64 text_offset;		/* Image load offset */
+  u64 res1	= 0;		/* reserved */
+  u64 res2	= 0;		/* reserved */
+
+The image must be placed at the specified offset (currently 0x80000)
+from the start of the system RAM and called there. The start of the
+system RAM must be aligned to 2MB.
+
+Before jumping into the kernel, the following conditions must be met:
+
+- Quiesce all DMA capable devices so that memory does not get
+  corrupted by bogus network packets or disk data.  This will save
+  you many hours of debug.
+
+- Primary CPU general-purpose register settings
+  x0 = physical address of device tree blob (dtb) in system RAM.
+
+- CPU mode
+  All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
+  IRQ and FIQ).
+  The CPU must be in either EL2 (RECOMMENDED in order to have access to
+  the virtualisation extensions) or non-secure EL1.
+
+- Caches, MMUs
+  The MMU must be off.
+  Instruction cache may be on or off.
+  Data cache must be off and invalidated.
+
+- Architected timers
+  CNTFRQ must be programmed with the timer frequency.
+  If entering the kernel at EL1, CNTHCTL_EL2 must have EL1PCTEN (bit 0)
+  set where available.
+
+- Coherency
+  All CPUs to be booted by the kernel must be part of the same coherency
+  domain on entry to the kernel.  This may require IMPLEMENTATION DEFINED
+  initialisation to enable the receiving of maintenance operations on
+  each CPU.
+
+- System registers
+  All writable architected system registers at the exception level where
+  the kernel image will be entered must be initialised by software at a
+  higher exception level to prevent execution in an UNKNOWN state.
+
+The boot loader is expected to enter the kernel on each CPU in the
+following manner:
+
+- The primary CPU must jump directly to the first instruction of the
+  kernel image.  The device tree blob passed by this CPU must contain
+  for each CPU node:
+
+    1. An 'enable-method' property. Currently, the only supported value
+       for this field is the string "spin-table".
+
+    2. A 'cpu-release-addr' property identifying a 64-bit,
+       zero-initialised memory location.
+
+  It is expected that the bootloader will generate these device tree
+  properties and insert them into the blob prior to kernel entry.
+
+- Any secondary CPUs must spin outside of the kernel in a reserved area
+  of memory (communicated to the kernel by a /memreserve/ region in the
+  device tree) polling their cpu-release-addr location, which must be
+  contained in the reserved region.  A wfe instruction may be inserted
+  to reduce the overhead of the busy-loop and a sev will be issued by
+  the primary CPU.  When a read of the location pointed to by the
+  cpu-release-addr returns a non-zero value, the CPU must jump directly
+  to this value.
diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
new file mode 100644
index 0000000..d766493
--- /dev/null
+++ b/arch/arm64/include/asm/setup.h
@@ -0,0 +1,26 @@ 
+/*
+ * Based on arch/arm/include/asm/setup.h
+ *
+ * Copyright (C) 1997-1999 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SETUP_H
+#define __ASM_SETUP_H
+
+#include <linux/types.h>
+
+#define COMMAND_LINE_SIZE 1024
+
+#endif
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
new file mode 100644
index 0000000..34ccdc0
--- /dev/null
+++ b/arch/arm64/kernel/head.S
@@ -0,0 +1,521 @@ 
+/*
+ * Low-level CPU initialisation
+ * Based on arch/arm/kernel/head.S
+ *
+ * Copyright (C) 1994-2002 Russell King
+ * Copyright (C) 2003-2012 ARM Ltd.
+ * Authors:	Catalin Marinas <catalin.marinas@arm.com>
+ *		Will Deacon <will.deacon@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+#include <asm/assembler.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+#include <asm/memory.h>
+#include <asm/thread_info.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+
+/*
+ * swapper_pg_dir is the virtual address of the initial page table. We place
+ * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has
+ * 2 pages and is placed below swapper_pg_dir.
+ */
+#define KERNEL_RAM_VADDR	(PAGE_OFFSET + TEXT_OFFSET)
+
+#if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000
+#error KERNEL_RAM_VADDR must start at 0xXXX80000
+#endif
+
+#define SWAPPER_DIR_SIZE	(3 * PAGE_SIZE)
+#define IDMAP_DIR_SIZE		(2 * PAGE_SIZE)
+
+	.globl	swapper_pg_dir
+	.equ	swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE
+
+	.globl	idmap_pg_dir
+	.equ	idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE
+
+	.macro	pgtbl, ttb0, ttb1, phys
+	add	\ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE
+	sub	\ttb0, \ttb1, #IDMAP_DIR_SIZE
+	.endm
+
+#ifdef CONFIG_ARM64_64K_PAGES
+#define BLOCK_SHIFT	PAGE_SHIFT
+#define BLOCK_SIZE	PAGE_SIZE
+#else
+#define BLOCK_SHIFT	SECTION_SHIFT
+#define BLOCK_SIZE	SECTION_SIZE
+#endif
+
+#define KERNEL_START	KERNEL_RAM_VADDR
+#define KERNEL_END	_end
+
+/*
+ * Initial memory map attributes.
+ */
+#ifndef CONFIG_SMP
+#define PTE_FLAGS	PTE_ATTRINDX(MT_NORMAL) | PTE_AF
+#define PMD_FLAGS	PMD_ATTRINDX(MT_NORMAL) | PMD_SECT_AF
+#else
+#define PTE_FLAGS	PTE_ATTRINDX(MT_NORMAL) | PTE_AF | PTE_SHARED
+#define PMD_FLAGS	PMD_ATTRINDX(MT_NORMAL) | PMD_SECT_AF | PMD_SECT_S
+#endif
+
+#ifdef CONFIG_ARM64_64K_PAGES
+#define MM_MMUFLAGS	PTE_TYPE_PAGE | PTE_FLAGS
+#define IO_MMUFLAGS	PTE_TYPE_PAGE | PTE_XN | PTE_FLAGS
+#else
+#define MM_MMUFLAGS	PMD_TYPE_SECT | PMD_FLAGS
+#define IO_MMUFLAGS	PMD_TYPE_SECT | PMD_SECT_XN | PMD_FLAGS
+#endif
+
+/*
+ * Kernel startup entry point.
+ * ---------------------------
+ *
+ * The requirements are:
+ *   MMU = off, D-cache = off, I-cache = on or off,
+ *   x0 = physical address to the FDT blob.
+ *
+ * This code is mostly position independent so you call this at
+ * __pa(PAGE_OFFSET + TEXT_OFFSET).
+ *
+ * Note that the callee-saved registers are used for storing variables
+ * that are useful before the MMU is enabled. The allocations are described
+ * in the entry routines.
+ */
+	__HEAD
+
+	/*
+	 * DO NOT MODIFY. Image header expected by Linux boot-loaders.
+	 */
+	b	stext				// branch to kernel start, magic
+	.long	0				// reserved
+	.quad	TEXT_OFFSET			// Image load offset from start of RAM
+	.quad	0				// reserved
+	.quad	0				// reserved
+
+ENTRY(stext)
+	mov	x21, x0				// x21=FDT
+	bl	el2_setup			// Drop to EL1
+	mrs	x22, midr_el1			// x22=cpuid
+	mov	x0, x22
+	bl	__lookup_processor_type
+	mov	x23, x0				// x23=procinfo
+	cbz	x23, __error_p			// invalid processor (x23=0)?
+	bl	__calc_phys_offset		// x24=phys offset
+	bl	__vet_fdt
+	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1
+	/*
+	 * The following calls CPU specific code in a position independent
+	 * manner. See arch/arm64/mm/proc.S for details. x23 = base of
+	 * cpu_proc_info structure selected by __lookup_processor_type above.
+	 * On return, the CPU will be ready for the MMU to be turned on and
+	 * the TCR will have been set.
+	 */
+	ldr	x27, __switch_data		// address to jump to after
+						// MMU has been enabled
+	adr	lr, __enable_mmu		// return (PIC) address
+	add	x12, x23, #PROCINFO_INITFUNC
+	br	x12				// initialise processor
+ENDPROC(stext)
+
+/*
+ * If we're fortunate enough to boot at EL2, ensure that the world is
+ * sane before dropping to EL1.
+ */
+ENTRY(el2_setup)
+	mrs	x0, CurrentEL
+	cmp	x0, #PSR_MODE_EL2t
+	ccmp	x0, #PSR_MODE_EL2h, #0x4, ne
+	b.eq	1f
+	ret
+
+	/* Hyp configuration. */
+1:	mov	x0, #(1 << 31)			// 64-bit EL1
+	msr	hcr_el2, x0
+
+	/* Generic timers. */
+	mrs	x0, cnthctl_el2
+	orr	x0, x0, #3			// Enable EL1 physical timers
+	msr	cnthctl_el2, x0
+
+	/* Populate ID registers. */
+	mrs	x0, midr_el1
+	mrs	x1, mpidr_el1
+	msr	vpidr_el2, x0
+	msr	vmpidr_el2, x1
+
+	/* sctlr_el1 */
+	mov	x0, #0x0800			// Set/clear RES{1,0} bits
+	movk	x0, #0x30d0, lsl #16
+	msr	sctlr_el1, x0
+
+	/* Coprocessor traps. */
+	mov	x0, #0x33ff
+	msr	cptr_el2, x0			// Disable copro. traps to EL2
+
+#ifdef CONFIG_AARCH32_EMULATION
+	msr	hstr_el2, xzr			// Disable CP15 traps to EL2
+#endif
+
+	/* spsr */
+	mov	x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+		      PSR_MODE_EL1h)
+	msr	spsr_el2, x0
+	msr	elr_el2, lr
+	eret
+ENDPROC(el2_setup)
+
+	.align	3
+2:	.quad	.
+	.quad	PAGE_OFFSET
+
+#ifdef CONFIG_SMP
+	.pushsection    .smp.pen.text, "ax"
+	.align	3
+1:	.quad	.
+	.quad	secondary_holding_pen_release
+
+	/*
+	 * This provides a "holding pen" for platforms to hold all secondary
+	 * cores are held until we're ready for them to initialise.
+	 */
+ENTRY(secondary_holding_pen)
+	bl	el2_setup			// Drop to EL1
+	mrs	x0, mpidr_el1
+	and	x0, x0, #15			// CPU number
+	adr	x1, 1b
+	ldp	x2, x3, [x1]
+	sub	x1, x1, x2
+	add	x3, x3, x1
+pen:	ldr	x4, [x3]
+	cmp	x4, x0
+	b.eq	secondary_startup
+	wfe
+	b	pen
+ENDPROC(secondary_holding_pen)
+	.popsection
+
+ENTRY(secondary_startup)
+	/*
+	 * Common entry point for secondary CPUs.
+	 */
+	mrs	x22, midr_el1			// x22=cpuid
+	mov	x0, x22
+	bl	__lookup_processor_type
+	mov	x23, x0				// x23=procinfo
+	cbz	x23, __error_p			// invalid processor (x23=0)?
+
+	bl	__calc_phys_offset		// x24=phys offset
+	pgtbl	x25, x26, x24			// x25=TTBR0, x26=TTBR1
+	add	x12, x23, #PROCINFO_INITFUNC
+	blr	x12				// initialise processor
+
+	ldr	x21, =secondary_data
+	ldr	x27, =__secondary_switched	// address to jump to after enabling the MMU
+	b	__enable_mmu
+ENDPROC(secondary_startup)
+
+ENTRY(__secondary_switched)
+	ldr	x0, [x21]			// get secondary_data.stack
+	mov	sp, x0
+	mov	x29, #0
+	b	secondary_start_kernel
+ENDPROC(__secondary_switched)
+#endif	/* CONFIG_SMP */
+
+/*
+ * Setup common bits before finally enabling the MMU. Essentially this is just
+ * loading the page table pointer and vector base registers.
+ *
+ * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on
+ * the MMU.
+ */
+__enable_mmu:
+	ldr	x5, =vectors
+	msr	vbar_el1, x5
+	msr	ttbr0_el1, x25			// load TTBR0
+	msr	ttbr1_el1, x26			// load TTBR1
+	isb
+	b	__turn_mmu_on
+ENDPROC(__enable_mmu)
+
+/*
+ * Enable the MMU. This completely changes the structure of the visible memory
+ * space. You will not be able to trace execution through this.
+ *
+ *  x0  = system control register
+ *  x27 = *virtual* address to jump to upon completion
+ *
+ * other registers depend on the function called upon completion
+ */
+	.align	6
+__turn_mmu_on:
+	msr	sctlr_el1, x0
+	isb
+	br	x27
+ENDPROC(__turn_mmu_on)
+
+/*
+ * Calculate the start of physical memory.
+ */
+__calc_phys_offset:
+	adr	x0, 1f
+	ldp	x1, x2, [x0]
+	sub	x3, x0, x1			// PHYS_OFFSET - PAGE_OFFSET
+	add	x24, x2, x3			// x24=PHYS_OFFSET
+	ret
+ENDPROC(__calc_phys_offset)
+
+	.align 3
+1:	.quad	.
+	.quad	PAGE_OFFSET
+
+/*
+ * Macro to populate the PGD for the corresponding block entry in the next
+ * level (tbl) for the given virtual address.
+ *
+ * Preserves:	pgd, tbl, virt
+ * Corrupts:	tmp1, tmp2
+ */
+	.macro	create_pgd_entry, pgd, tbl, virt, tmp1, tmp2
+	lsr	\tmp1, \virt, #PGDIR_SHIFT
+	and	\tmp1, \tmp1, #PTRS_PER_PGD - 1	// PGD index
+	orr	\tmp2, \tbl, #3			// PGD entry table type
+	str	\tmp2, [\pgd, \tmp1, lsl #3]
+	.endm
+
+/*
+ * Macro to populate block entries in the page table for the start..end
+ * virtual range (inclusive).
+ *
+ * Preserves:	tbl, flags
+ * Corrupts:	phys, start, end, pstate
+ */
+	.macro	create_block_map, tbl, flags, phys, start, end, idmap=0
+	lsr	\phys, \phys, #BLOCK_SHIFT
+	.if	\idmap
+	and	\start, \phys, #PTRS_PER_PTE - 1	// table index
+	.else
+	lsr	\start, \start, #BLOCK_SHIFT
+	and	\start, \start, #PTRS_PER_PTE - 1	// table index
+	.endif
+	orr	\phys, \flags, \phys, lsl #BLOCK_SHIFT	// table entry
+	.ifnc	\start,\end
+	lsr	\end, \end, #BLOCK_SHIFT
+	and	\end, \end, #PTRS_PER_PTE - 1		// table end index
+	.endif
+9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry
+	.ifnc	\start,\end
+	add	\start, \start, #1			// next entry
+	add	\phys, \phys, #BLOCK_SIZE		// next block
+	cmp	\start, \end
+	b.ls	9999b
+	.endif
+	.endm
+
+/*
+ * Setup the initial page tables. We only setup the barest amount which is
+ * required to get the kernel running. The following sections are required:
+ *   - identity mapping to enable the MMU (low address, TTBR0)
+ *   - first few MB of the kernel linear mapping to jump to once the MMU has
+ *     been enabled, including the FDT blob (TTBR1)
+ */
+__create_page_tables:
+	pgtbl	x25, x26, x24			// idmap_pg_dir and swapper_pg_dir addresses
+
+	/*
+	 * Clear the idmap and swapper page tables.
+	 */
+	mov	x0, x25
+	add	x6, x26, #SWAPPER_DIR_SIZE
+1:	stp	xzr, xzr, [x0], #16
+	stp	xzr, xzr, [x0], #16
+	stp	xzr, xzr, [x0], #16
+	stp	xzr, xzr, [x0], #16
+	cmp	x0, x6
+	b.lo	1b
+
+	ldr	x7, =MM_MMUFLAGS
+
+	/*
+	 * Create the identity mapping.
+	 */
+	add	x0, x25, #PAGE_SIZE		// section table address
+	adr	x3, __turn_mmu_on		// virtual/physical address
+	create_pgd_entry x25, x0, x3, x5, x6
+	create_block_map x0, x7, x3, x5, x5, idmap=1
+
+	/*
+	 * Map the kernel image (starting with PHYS_OFFSET).
+	 */
+	add	x0, x26, #PAGE_SIZE		// section table address
+	mov	x5, #PAGE_OFFSET
+	create_pgd_entry x26, x0, x5, x3, x6
+	ldr	x6, =KERNEL_END - 1
+	mov	x3, x24				// phys offset
+	create_block_map x0, x7, x3, x5, x6
+
+	/*
+	 * Map the FDT blob (maximum 2MB; must be within 512MB of
+	 * PHYS_OFFSET).
+	 */
+	mov	x3, x21				// FDT phys address
+	and	x3, x3, #~((1 << 21) - 1)	// 2MB aligned
+	mov	x6, #PAGE_OFFSET
+	sub	x5, x3, x24			// subtract PHYS_OFFSET
+	tst	x5, #~((1 << 29) - 1)		// within 512MB?
+	csel	x21, xzr, x21, ne		// zero the FDT pointer
+	b.ne	1f
+	add	x5, x5, x6			// __va(FDT blob)
+	add	x6, x5, #1 << 21		// 2MB for the FDT blob
+	sub	x6, x6, #1			// inclusive range
+	create_block_map x0, x7, x3, x5, x6
+1:
+	ret
+ENDPROC(__create_page_tables)
+	.ltorg
+
+	.align	3
+	.type	__switch_data, %object
+__switch_data:
+	.quad	__mmap_switched
+	.quad	__data_loc			// x4
+	.quad	_data				// x5
+	.quad	__bss_start			// x6
+	.quad	_end				// x7
+	.quad	processor_id			// x4
+	.quad	__fdt_pointer			// x5
+	.quad	memstart_addr			// x6
+	.quad	init_thread_union + THREAD_START_SP // sp
+
+/*
+ * The following fragment of code is executed with the MMU on in MMU mode, and
+ * uses absolute addresses; this is not position independent.
+ */
+__mmap_switched:
+	adr	x3, __switch_data + 8
+
+	ldp	x4, x5, [x3], #16
+	ldp	x6, x7, [x3], #16
+	cmp	x4, x5				// Copy data segment if needed
+1:	ccmp	x5, x6, #4, ne
+	b.eq	2f
+	ldr	x16, [x4], #8
+	str	x16, [x5], #8
+	b	1b
+2:
+1:	cmp	x6, x7
+	b.hs	2f
+	str	xzr, [x6], #8			// Clear BSS
+	b	1b
+2:
+	ldp	x4, x5, [x3], #16
+	ldr	x6, [x3], #8
+	ldr	x16, [x3]
+	mov	sp, x16
+	str	x22, [x4]			// Save processor ID
+	str	x21, [x5]			// Save FDT pointer
+	str	x24, [x6]			// Save PHYS_OFFSET
+	mov	x29, #0
+	b	start_kernel
+ENDPROC(__mmap_switched)
+
+/*
+ * Exception handling. Something went wrong and we can't proceed. We ought to
+ * tell the user, but since we don't have any guarantee that we're even
+ * running on the right architecture, we do virtually nothing.
+ */
+__error_p:
+ENDPROC(__error_p)
+
+__error:
+1:	nop
+	b	1b
+ENDPROC(__error)
+
+/*
+ * Read processor ID register and look up in the linker-built supported
+ * processor list. Note that we can't use the absolute addresses for the
+ * __proc_info lists since we aren't running with the MMU on (and therefore,
+ * we are not in the correct address space). We have to calculate the offset.
+ *
+ * This routine can be called via C code, so to avoid needlessly saving
+ * callee-saved registers, we take the CPUID in x0 and return the physical
+ * proc_info pointer in x0 as well.
+ */
+__lookup_processor_type:
+	adr	x1, __lookup_processor_type_data
+	ldr	x2, [x1]
+	ldp	x3, x4, [x1, #8]
+	sub	x1, x1, x2			// get offset between virt&phys
+	add	x3, x3, x1			// convert virt addresses to
+	add	x4, x4, x1			// physical address space
+1:
+	ldp	w5, w6, [x3]			// load cpu_val and cpu_mask
+	and	x6, x6, x0
+	cmp	x5, x6
+	b.eq	2f
+	add	x3, x3, #PROC_INFO_SZ
+	cmp	x4, x4
+	b.ne	1b
+	mov	x3, #0				// unknown processor
+2:
+	mov	x0, x3
+	ret
+ENDPROC(__lookup_processor_type)
+
+/*
+ * This provides a C-API version of the above function.
+ */
+ENTRY(lookup_processor_type)
+	mov	x8, lr
+	bl	__lookup_processor_type
+	ret	x8
+ENDPROC(lookup_processor_type)
+
+	.align	3
+	.type	__lookup_processor_type_data, %object
+__lookup_processor_type_data:
+	.quad	.
+	.quad	__proc_info_begin
+	.quad	__proc_info_end
+	.size	__lookup_processor_type_data, . - __lookup_processor_type_data
+
+/*
+ * Determine validity of the x21 FDT pointer.
+ * The dtb must be 8-byte aligned and live in the first 512M of memory.
+ */
+__vet_fdt:
+	tst	x21, #0x7
+	b.ne	1f
+	cmp	x21, x24
+	b.lt	1f
+	mov	x0, #(1 << 29)
+	add	x0, x0, x24
+	cmp	x21, x0
+	b.ge	1f
+	ret
+1:
+	mov	x21, #0
+	ret
+ENDPROC(__vet_fdt)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
new file mode 100644
index 0000000..f25186f
--- /dev/null
+++ b/arch/arm64/kernel/setup.c
@@ -0,0 +1,357 @@ 
+/*
+ * Based on arch/arm/kernel/setup.c
+ *
+ * Copyright (C) 1995-2001 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/utsname.h>
+#include <linux/initrd.h>
+#include <linux/console.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <linux/screen_info.h>
+#include <linux/init.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/root_dev.h>
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+
+#include <asm/cputype.h>
+#include <asm/elf.h>
+#include <asm/procinfo.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/traps.h>
+#include <asm/memblock.h>
+
+extern void paging_init(void);
+
+unsigned int processor_id;
+EXPORT_SYMBOL(processor_id);
+
+unsigned int elf_hwcap __read_mostly;
+EXPORT_SYMBOL(elf_hwcap);
+
+static const char *cpu_name;
+static const char *machine_name;
+phys_addr_t __fdt_pointer __initdata;
+
+/*
+ * Standard memory resources
+ */
+static struct resource mem_res[] = {
+	{
+		.name = "Kernel code",
+		.start = 0,
+		.end = 0,
+		.flags = IORESOURCE_MEM
+	},
+	{
+		.name = "Kernel data",
+		.start = 0,
+		.end = 0,
+		.flags = IORESOURCE_MEM
+	}
+};
+
+#define kernel_code mem_res[0]
+#define kernel_data mem_res[1]
+
+/*
+ * These functions re-use the assembly code in head.S, which
+ * already provide the required functionality.
+ */
+extern struct proc_info_list *lookup_processor_type(unsigned int);
+
+void __init early_print(const char *str, ...)
+{
+	char buf[256];
+	va_list ap;
+
+	va_start(ap, str);
+	vsnprintf(buf, sizeof(buf), str, ap);
+	va_end(ap);
+
+	printk("%s", buf);
+}
+
+static void __init setup_processor(void)
+{
+	struct proc_info_list *list;
+
+	/*
+	 * locate processor in the list of supported processor
+	 * types.  The linker builds this table for us from the
+	 * entries in arch/arm/mm/proc.S
+	 */
+	list = lookup_processor_type(read_cpuid_id());
+	if (!list) {
+		printk("CPU configuration botched (ID %08x), unable to continue.\n",
+		       read_cpuid_id());
+		while (1);
+	}
+
+	cpu_name = list->cpu_name;
+
+	printk("CPU: %s [%08x] revision %d\n",
+	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15);
+
+	sprintf(init_utsname()->machine, "aarch64");
+	elf_hwcap = 0;
+
+	cpu_proc_init();
+}
+
+static void __init setup_machine_fdt(phys_addr_t dt_phys)
+{
+	struct boot_param_header *devtree;
+	unsigned long dt_root;
+
+	/* Check we have a non-NULL DT pointer */
+	if (!dt_phys) {
+		early_print("\n"
+			"Error: NULL or invalid device tree blob\n"
+			"The dtb must be 8-byte aligned and passed in the first 512MB of memory\n"
+			"\nPlease check your bootloader.\n");
+
+		while (true)
+			cpu_relax();
+
+	}
+
+	devtree = phys_to_virt(dt_phys);
+
+	/* Check device tree validity */
+	if (be32_to_cpu(devtree->magic) != OF_DT_HEADER) {
+		early_print("\n"
+			"Error: invalid device tree blob at physical address 0x%p (virtual address 0x%p)\n"
+			"Expected 0x%x, found 0x%x\n"
+			"\nPlease check your bootloader.\n",
+			dt_phys, devtree, OF_DT_HEADER,
+			be32_to_cpu(devtree->magic));
+
+		while (true)
+			cpu_relax();
+	}
+
+	initial_boot_params = devtree;
+	dt_root = of_get_flat_dt_root();
+
+	machine_name = of_get_flat_dt_prop(dt_root, "model", NULL);
+	if (!machine_name)
+		machine_name = of_get_flat_dt_prop(dt_root, "compatible", NULL);
+	if (!machine_name)
+		machine_name = "<unknown>";
+	pr_info("Machine: %s\n", machine_name);
+
+	/* Retrieve various information from the /chosen node */
+	of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
+	/* Initialize {size,address}-cells info */
+	of_scan_flat_dt(early_init_dt_scan_root, NULL);
+	/* Setup memory, calling early_init_dt_add_memory_arch */
+	of_scan_flat_dt(early_init_dt_scan_memory, NULL);
+}
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+	size &= PAGE_MASK;
+	memblock_add(base, size);
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+	return __va(memblock_alloc(size, align));
+}
+
+/*
+ * Limit the memory size that was specified via FDT.
+ */
+static int __init early_mem(char *p)
+{
+	phys_addr_t limit;
+
+	if (!p)
+		return 1;
+
+	limit = memparse(p, &p) & PAGE_MASK;
+	pr_notice("Memory limited to %lldMB\n", limit >> 20);
+
+	memblock_enforce_memory_limit(limit);
+
+	return 0;
+}
+early_param("mem", early_mem);
+
+static void __init request_standard_resources(void)
+{
+	struct memblock_region *region;
+	struct resource *res;
+
+	kernel_code.start   = virt_to_phys(_text);
+	kernel_code.end     = virt_to_phys(_etext - 1);
+	kernel_data.start   = virt_to_phys(_sdata);
+	kernel_data.end     = virt_to_phys(_end - 1);
+
+	for_each_memblock(memory, region) {
+		res = alloc_bootmem_low(sizeof(*res));
+		res->name  = "System RAM";
+		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
+		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+		request_resource(&iomem_resource, res);
+
+		if (kernel_code.start >= res->start &&
+		    kernel_code.end <= res->end)
+			request_resource(res, &kernel_code);
+		if (kernel_data.start >= res->start &&
+		    kernel_data.end <= res->end)
+			request_resource(res, &kernel_data);
+	}
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+	setup_processor();
+
+	setup_machine_fdt(__fdt_pointer);
+
+	init_mm.start_code = (unsigned long) _text;
+	init_mm.end_code   = (unsigned long) _etext;
+	init_mm.end_data   = (unsigned long) _edata;
+	init_mm.brk	   = (unsigned long) _end;
+
+	*cmdline_p = boot_command_line;
+
+	parse_early_param();
+
+	arm64_memblock_init();
+
+	paging_init();
+	request_standard_resources();
+
+	unflatten_device_tree();
+
+#ifdef CONFIG_SMP
+	smp_init_cpus();
+#endif
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+	conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+	conswitchp = &dummy_con;
+#endif
+#endif
+}
+
+static DEFINE_PER_CPU(struct cpu, cpu_data);
+
+static int __init topology_init(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct cpu *cpu = &per_cpu(cpu_data, i);
+		cpu->hotpluggable = 1;
+		register_cpu(cpu, i);
+	}
+
+	return 0;
+}
+subsys_initcall(topology_init);
+
+static const char *hwcap_str[] = {
+	"fp",
+	"asimd",
+	NULL
+};
+
+static int c_show(struct seq_file *m, void *v)
+{
+	int i;
+
+	seq_printf(m, "Processor\t: %s rev %d (%s)\n",
+		   cpu_name, read_cpuid_id() & 15, ELF_PLATFORM);
+
+	for_each_online_cpu(i) {
+		/*
+		 * glibc reads /proc/cpuinfo to determine the number of
+		 * online processors, looking for lines beginning with
+		 * "processor".  Give glibc what it expects.
+		 */
+#ifdef CONFIG_SMP
+		seq_printf(m, "processor\t: %d\n", i);
+#endif
+		seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n",
+			   loops_per_jiffy / (500000UL/HZ),
+			   loops_per_jiffy / (5000UL/HZ) % 100);
+	}
+
+	/* dump out the processor features */
+	seq_puts(m, "Features\t: ");
+
+	for (i = 0; hwcap_str[i]; i++)
+		if (elf_hwcap & (1 << i))
+			seq_printf(m, "%s ", hwcap_str[i]);
+
+	seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24);
+	seq_printf(m, "CPU architecture: AArch64\n");
+	seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15);
+	seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff);
+	seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15);
+
+	seq_puts(m, "\n");
+
+	seq_printf(m, "Hardware\t: %s\n", machine_name);
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < 1 ? (void *)1 : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return NULL;
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= c_show
+};