diff mbox

[07/12] arm64: mm: Place kImage at bottom of VA space

Message ID 20171204141313.31604-8-steve.capper@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Steve Capper Dec. 4, 2017, 2:13 p.m. UTC
Re-arrange the kernel memory map s.t. the kernel image resides in the
bottom 514MB of memory. With the modules, fixed map, PCI IO space placed
above it. At the very bottom of the memory map we set aside a 2MB guard
region to prevent ambiguity with PTR_ERR/ERR_PTR.

Dynamically resizable objects such as KASAN shadow and sparsemem map
are placed above the fixed size objects.

This means that kernel addresses are now no longer directly dependent on
VA space size.

Signed-off-by: Steve Capper <steve.capper@arm.com>
---
 arch/arm64/include/asm/memory.h  | 17 +++++++++--------
 arch/arm64/include/asm/pgtable.h |  4 ++--
 arch/arm64/mm/dump.c             | 12 +++++++-----
 3 files changed, 18 insertions(+), 15 deletions(-)

Comments

Ard Biesheuvel Dec. 4, 2017, 4:25 p.m. UTC | #1
On 4 December 2017 at 14:13, Steve Capper <steve.capper@arm.com> wrote:
> Re-arrange the kernel memory map s.t. the kernel image resides in the
> bottom 514MB of memory.

I guess this breaks KASLR entirely, no? Given that it adds an offset
in the range [0 ... sizeof(VMALLOC_SPACE) /4 ].

In any case, it makes sense to keep the kernel VA space adjacent to
the VMALLOC space, rather than put stuff like PCI I/O and the fixmap
in between.

> With the modules, fixed map, PCI IO space placed
> above it. At the very bottom of the memory map we set aside a 2MB guard
> region to prevent ambiguity with PTR_ERR/ERR_PTR.
>

Interesting. In another thread, we discussed whether it is necessary
to prevent the linear map randomization code from allocating at the
very top [bottom in Steve-speak] of the kernel virtual address space,
and this is a thing I did not consider.

> Dynamically resizable objects such as KASAN shadow and sparsemem map
> are placed above the fixed size objects.
>

The current placement of the sparsemem map was carefully chosen so
that virt_to_page/page_to_virt translations are extremely cheap. Is
that still the case?

> This means that kernel addresses are now no longer directly dependent on
> VA space size.
>
> Signed-off-by: Steve Capper <steve.capper@arm.com>
> ---
>  arch/arm64/include/asm/memory.h  | 17 +++++++++--------
>  arch/arm64/include/asm/pgtable.h |  4 ++--
>  arch/arm64/mm/dump.c             | 12 +++++++-----
>  3 files changed, 18 insertions(+), 15 deletions(-)
>
> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
> index 0a912eb3d74f..ba80561c6ed8 100644
> --- a/arch/arm64/include/asm/memory.h
> +++ b/arch/arm64/include/asm/memory.h
> @@ -68,14 +68,15 @@
>  #define PAGE_OFFSET            (UL(0xffffffffffffffff) - \
>         (UL(1) << VA_BITS) + 1)
>  #define PAGE_OFFSET_END                (VA_START)
> -#define KIMAGE_VADDR           (MODULES_END)
> -#define MODULES_END            (MODULES_VADDR + MODULES_VSIZE)
> -#define MODULES_VADDR          (VA_START + KASAN_SHADOW_SIZE)
> +#define KIMAGE_VSIZE           (SZ_512M)
> +#define KIMAGE_VADDR           (UL(0) - SZ_2M - KIMAGE_VSIZE)
>  #define MODULES_VSIZE          (SZ_128M)
> -#define VMEMMAP_START          (-VMEMMAP_SIZE)
> -#define PCI_IO_END             (VMEMMAP_START - SZ_2M)
> +#define MODULES_END            (KIMAGE_VADDR)
> +#define MODULES_VADDR          (MODULES_END - MODULES_VSIZE)
> +#define PCI_IO_END             (MODULES_VADDR - SZ_2M)
>  #define PCI_IO_START           (PCI_IO_END - PCI_IO_SIZE)
> -#define FIXADDR_TOP            (PCI_IO_START - SZ_2M)
> +#define FIXADDR_TOP            (PCI_IO_START - PGDIR_SIZE)
> +#define VMEMMAP_START          (FIXADDR_START - VMEMMAP_SIZE)
>
>  #define KERNEL_START      _text
>  #define KERNEL_END        _end
> @@ -292,10 +293,10 @@ static inline void *phys_to_virt(phys_addr_t x)
>  #define _virt_addr_valid(kaddr)        pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
>  #else
>  #define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page))
> -#define __page_to_voff(kaddr)  (((u64)(kaddr) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
> +#define __page_to_voff(kaddr)  (((u64)(kaddr) - VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
>
>  #define page_to_virt(page)     ((void *)((__page_to_voff(page)) | PAGE_OFFSET))
> -#define virt_to_page(vaddr)    ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))
> +#define virt_to_page(vaddr)    ((struct page *)((__virt_to_pgoff(vaddr)) + VMEMMAP_START))
>
>  #define _virt_addr_valid(kaddr)        pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \
>                                            + PHYS_OFFSET) >> PAGE_SHIFT)
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 054b37143a50..e8b4dcc11fed 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -30,8 +30,8 @@
>   * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space
>   *     and fixed mappings
>   */
> -#define VMALLOC_START          (MODULES_END)
> -#define VMALLOC_END            (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
> +#define VMALLOC_START          (VA_START + KASAN_SHADOW_SIZE)
> +#define VMALLOC_END            (FIXADDR_TOP - PUD_SIZE)
>
>  #define vmemmap                        ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
>
> diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
> index b7b09c0fc50d..e5d1b5f432fe 100644
> --- a/arch/arm64/mm/dump.c
> +++ b/arch/arm64/mm/dump.c
> @@ -36,17 +36,19 @@ static const struct addr_marker address_markers[] = {
>         { KASAN_SHADOW_START,           "Kasan shadow start" },
>         { KASAN_SHADOW_END,             "Kasan shadow end" },
>  #endif
> -       { MODULES_VADDR,                "Modules start" },
> -       { MODULES_END,                  "Modules end" },
>         { VMALLOC_START,                "vmalloc() Area" },
>         { VMALLOC_END,                  "vmalloc() End" },
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> +       { VMEMMAP_START,                "vmemmap start" },
> +       { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end"},
> +#endif
>         { FIXADDR_START,                "Fixmap start" },
>         { FIXADDR_TOP,                  "Fixmap end" },
>         { PCI_IO_START,                 "PCI I/O start" },
>         { PCI_IO_END,                   "PCI I/O end" },
> -#ifdef CONFIG_SPARSEMEM_VMEMMAP
> -       { VMEMMAP_START,                "vmemmap" },
> -#endif
> +       { MODULES_VADDR,                "Modules start" },
> +       { MODULES_END,                  "Modules end" },
> +       { KIMAGE_VADDR,                 "kImage start"},
>         { -1,                           NULL },
>  };
>
> --
> 2.11.0
>
Steve Capper Dec. 4, 2017, 5:18 p.m. UTC | #2
Hi Ard,

On Mon, Dec 04, 2017 at 04:25:18PM +0000, Ard Biesheuvel wrote:
> On 4 December 2017 at 14:13, Steve Capper <steve.capper@arm.com> wrote:
> > Re-arrange the kernel memory map s.t. the kernel image resides in the
> > bottom 514MB of memory.
>
> I guess this breaks KASLR entirely, no? Given that it adds an offset
> in the range [0 ... sizeof(VMALLOC_SPACE) /4 ].

Yes, yes it does. Sorry about this. I had very carefully tested KASLR
with custom offsets... on my early page table code. I will have a think
about this.

From a KASLR side, my (renewed) understanding is that a virtual address
as low as possible is desired for the kimage start as that affords the
most wiggle room?

>
> In any case, it makes sense to keep the kernel VA space adjacent to
> the VMALLOC space, rather than put stuff like PCI I/O and the fixmap
> in between.
>
> > With the modules, fixed map, PCI IO space placed
> > above it. At the very bottom of the memory map we set aside a 2MB guard
> > region to prevent ambiguity with PTR_ERR/ERR_PTR.
> >
>
> Interesting. In another thread, we discussed whether it is necessary
> to prevent the linear map randomization code from allocating at the
> very top [bottom in Steve-speak] of the kernel virtual address space,
> and this is a thing I did not consider.
>

I'll adjust my nomenclature to be less confusing.

> > Dynamically resizable objects such as KASAN shadow and sparsemem map
> > are placed above the fixed size objects.
> >
>
> The current placement of the sparsemem map was carefully chosen so
> that virt_to_page/page_to_virt translations are extremely cheap. Is
> that still the case?

I will double check the virt_to_page/page_to_virt. I had adjuested virt_to_phys
and phys_to_virt and I think this one escaped me.

Cheers,
--
Steve
IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Steve Capper Dec. 4, 2017, 5:21 p.m. UTC | #3
On Mon, Dec 04, 2017 at 05:18:09PM +0000, Steve Capper wrote:
> Hi Ard,
> 

[...]

> IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

Apologies for this email disclaimer, it was sent erroneously
and can be ignored.
Ard Biesheuvel Dec. 4, 2017, 5:27 p.m. UTC | #4
On 4 December 2017 at 17:18, Steve Capper <steve.capper@arm.com> wrote:
> Hi Ard,
>
> On Mon, Dec 04, 2017 at 04:25:18PM +0000, Ard Biesheuvel wrote:
>> On 4 December 2017 at 14:13, Steve Capper <steve.capper@arm.com> wrote:
>> > Re-arrange the kernel memory map s.t. the kernel image resides in the
>> > bottom 514MB of memory.
>>
>> I guess this breaks KASLR entirely, no? Given that it adds an offset
>> in the range [0 ... sizeof(VMALLOC_SPACE) /4 ].
>
> Yes, yes it does. Sorry about this. I had very carefully tested KASLR
> with custom offsets... on my early page table code. I will have a think
> about this.
>
> From a KASLR side, my (renewed) understanding is that a virtual address
> as low as possible is desired for the kimage start as that affords the
> most wiggle room?
>

Well, the nice thing about the current arrangement is that the default
is adjacent to the vmalloc space so any non-zero [bounded] offset
produces a valid placement. Addition with subtraction is easy, so
which side the default placement happens to be at does not really
matter. Having to implement additional bounds checking in the early
KASLR init code to stay clear of the PCI I/O or fixmap regions sounds
a bit more cumbersome.

>>
>> In any case, it makes sense to keep the kernel VA space adjacent to
>> the VMALLOC space, rather than put stuff like PCI I/O and the fixmap
>> in between.
>>
>> > With the modules, fixed map, PCI IO space placed
>> > above it. At the very bottom of the memory map we set aside a 2MB guard
>> > region to prevent ambiguity with PTR_ERR/ERR_PTR.
>> >
>>
>> Interesting. In another thread, we discussed whether it is necessary
>> to prevent the linear map randomization code from allocating at the
>> very top [bottom in Steve-speak] of the kernel virtual address space,
>> and this is a thing I did not consider.
>>
>
> I'll adjust my nomenclature to be less confusing.
>

Thanks :-)

>> > Dynamically resizable objects such as KASAN shadow and sparsemem map
>> > are placed above the fixed size objects.
>> >
>>
>> The current placement of the sparsemem map was carefully chosen so
>> that virt_to_page/page_to_virt translations are extremely cheap. Is
>> that still the case?
>
> I will double check the virt_to_page/page_to_virt. I had adjuested virt_to_phys
> and phys_to_virt and I think this one escaped me.
>
> Cheers,
> --
> Steve
> IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Steve Capper Dec. 4, 2017, 6:12 p.m. UTC | #5
On Mon, Dec 04, 2017 at 05:27:10PM +0000, Ard Biesheuvel wrote:
> On 4 December 2017 at 17:18, Steve Capper <steve.capper@arm.com> wrote:
> > Hi Ard,
> >
> > On Mon, Dec 04, 2017 at 04:25:18PM +0000, Ard Biesheuvel wrote:
> >> On 4 December 2017 at 14:13, Steve Capper <steve.capper@arm.com> wrote:
> >> > Re-arrange the kernel memory map s.t. the kernel image resides in the
> >> > bottom 514MB of memory.
> >>
> >> I guess this breaks KASLR entirely, no? Given that it adds an offset
> >> in the range [0 ... sizeof(VMALLOC_SPACE) /4 ].
> >
> > Yes, yes it does. Sorry about this. I had very carefully tested KASLR
> > with custom offsets... on my early page table code. I will have a think
> > about this.
> >
> > From a KASLR side, my (renewed) understanding is that a virtual address
> > as low as possible is desired for the kimage start as that affords the
> > most wiggle room?
> >
> 
> Well, the nice thing about the current arrangement is that the default
> is adjacent to the vmalloc space so any non-zero [bounded] offset
> produces a valid placement. Addition with subtraction is easy, so
> which side the default placement happens to be at does not really
> matter. Having to implement additional bounds checking in the early
> KASLR init code to stay clear of the PCI I/O or fixmap regions sounds
> a bit more cumbersome.
> 

I *think* I can fix KASAN_SHADOW_END to be 0xFFFF200000000000 on both 48-bit
and 52-bit VA configurations. Thus I may be able to enable 52-bit VA with
minimal disruption to the layout of the VA space (i.e. no need to change
the layout) if I also depend on CONFIG_RELOCATABLE.

I'll investigate...

Cheers,
--
Steve
Steve Capper Dec. 12, 2017, 11:03 a.m. UTC | #6
On Mon, Dec 04, 2017 at 06:12:13PM +0000, Steve Capper wrote:
> On Mon, Dec 04, 2017 at 05:27:10PM +0000, Ard Biesheuvel wrote:
> > On 4 December 2017 at 17:18, Steve Capper <steve.capper@arm.com> wrote:
> > > Hi Ard,
> > >
> > > On Mon, Dec 04, 2017 at 04:25:18PM +0000, Ard Biesheuvel wrote:
> > >> On 4 December 2017 at 14:13, Steve Capper <steve.capper@arm.com> wrote:
> > >> > Re-arrange the kernel memory map s.t. the kernel image resides in the
> > >> > bottom 514MB of memory.
> > >>
> > >> I guess this breaks KASLR entirely, no? Given that it adds an offset
> > >> in the range [0 ... sizeof(VMALLOC_SPACE) /4 ].
> > >
> > > Yes, yes it does. Sorry about this. I had very carefully tested KASLR
> > > with custom offsets... on my early page table code. I will have a think
> > > about this.
> > >
> > > From a KASLR side, my (renewed) understanding is that a virtual address
> > > as low as possible is desired for the kimage start as that affords the
> > > most wiggle room?
> > >
> > 
> > Well, the nice thing about the current arrangement is that the default
> > is adjacent to the vmalloc space so any non-zero [bounded] offset
> > produces a valid placement. Addition with subtraction is easy, so
> > which side the default placement happens to be at does not really
> > matter. Having to implement additional bounds checking in the early
> > KASLR init code to stay clear of the PCI I/O or fixmap regions sounds
> > a bit more cumbersome.
> > 
> 
> I *think* I can fix KASAN_SHADOW_END to be 0xFFFF200000000000 on both 48-bit
> and 52-bit VA configurations. Thus I may be able to enable 52-bit VA with
> minimal disruption to the layout of the VA space (i.e. no need to change
> the layout) if I also depend on CONFIG_RELOCATABLE.
> 

Unfortunately, having KASAN_SHADOW_END at 0xFFFF2000000000000 doesn't
work with 52-bit VAs as this would place it in the direct linear map
area.

So I think we need to flip the two halves of the kernel address space in
order to accommodate inline KASAN that operates under multiple VA space
sizes (I couldn't figure out any way to patch the inline KASAN instrumentation).

Cheers,
diff mbox

Patch

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0a912eb3d74f..ba80561c6ed8 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -68,14 +68,15 @@ 
 #define PAGE_OFFSET		(UL(0xffffffffffffffff) - \
 	(UL(1) << VA_BITS) + 1)
 #define PAGE_OFFSET_END		(VA_START)
-#define KIMAGE_VADDR		(MODULES_END)
-#define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
-#define MODULES_VADDR		(VA_START + KASAN_SHADOW_SIZE)
+#define KIMAGE_VSIZE		(SZ_512M)
+#define KIMAGE_VADDR		(UL(0) - SZ_2M - KIMAGE_VSIZE)
 #define MODULES_VSIZE		(SZ_128M)
-#define VMEMMAP_START		(-VMEMMAP_SIZE)
-#define PCI_IO_END		(VMEMMAP_START - SZ_2M)
+#define MODULES_END		(KIMAGE_VADDR)
+#define MODULES_VADDR		(MODULES_END - MODULES_VSIZE)
+#define PCI_IO_END		(MODULES_VADDR - SZ_2M)
 #define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)
-#define FIXADDR_TOP		(PCI_IO_START - SZ_2M)
+#define FIXADDR_TOP		(PCI_IO_START - PGDIR_SIZE)
+#define VMEMMAP_START		(FIXADDR_START - VMEMMAP_SIZE)
 
 #define KERNEL_START      _text
 #define KERNEL_END        _end
@@ -292,10 +293,10 @@  static inline void *phys_to_virt(phys_addr_t x)
 #define _virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 #else
 #define __virt_to_pgoff(kaddr)	(((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page))
-#define __page_to_voff(kaddr)	(((u64)(kaddr) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
+#define __page_to_voff(kaddr)	(((u64)(kaddr) - VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
 
 #define page_to_virt(page)	((void *)((__page_to_voff(page)) | PAGE_OFFSET))
-#define virt_to_page(vaddr)	((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))
+#define virt_to_page(vaddr)	((struct page *)((__virt_to_pgoff(vaddr)) + VMEMMAP_START))
 
 #define _virt_addr_valid(kaddr)	pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \
 					   + PHYS_OFFSET) >> PAGE_SHIFT)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 054b37143a50..e8b4dcc11fed 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -30,8 +30,8 @@ 
  * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space
  *	and fixed mappings
  */
-#define VMALLOC_START		(MODULES_END)
-#define VMALLOC_END		(- PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
+#define VMALLOC_START		(VA_START + KASAN_SHADOW_SIZE)
+#define VMALLOC_END		(FIXADDR_TOP - PUD_SIZE)
 
 #define vmemmap			((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
 
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index b7b09c0fc50d..e5d1b5f432fe 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -36,17 +36,19 @@  static const struct addr_marker address_markers[] = {
 	{ KASAN_SHADOW_START,		"Kasan shadow start" },
 	{ KASAN_SHADOW_END,		"Kasan shadow end" },
 #endif
-	{ MODULES_VADDR,		"Modules start" },
-	{ MODULES_END,			"Modules end" },
 	{ VMALLOC_START,		"vmalloc() Area" },
 	{ VMALLOC_END,			"vmalloc() End" },
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	{ VMEMMAP_START,		"vmemmap start" },
+	{ VMEMMAP_START + VMEMMAP_SIZE,	"vmemmap end"},
+#endif
 	{ FIXADDR_START,		"Fixmap start" },
 	{ FIXADDR_TOP,			"Fixmap end" },
 	{ PCI_IO_START,			"PCI I/O start" },
 	{ PCI_IO_END,			"PCI I/O end" },
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-	{ VMEMMAP_START,		"vmemmap" },
-#endif
+	{ MODULES_VADDR,		"Modules start" },
+	{ MODULES_END,			"Modules end" },
+	{ KIMAGE_VADDR,			"kImage start"},
 	{ -1,				NULL },
 };