diff mbox

[kvm-unit-tests,RFC,10/10] x86: vm: Redefine virtual memory layout

Message ID 1483288652-18983-11-git-send-email-agordeev@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Alexander Gordeev Jan. 1, 2017, 4:37 p.m. UTC
Currently virt_to_phys() and phys_to_virt() return passed
address, because identity paging is used.

With this change the virtual memory layout is redefined
in three regions as follows (PO stands for PAGE_OFFSET):

                0..&edata           code, stack, data
               PO..<PO + RAM size>  dynamic RAM pages
  <PO + RAM size>..<VA limit>       MMIO, virtual mappings

As result, virt_to_phys() and phys_to_virt() functions apply
PAGE_OFFSET to passed addresses.

Furthermore 1:1 mapping at 3GB of physical memory is removed
and memory-mapped hardware devices should be ioremap()-ed and
referred using MMIO accessors.

Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
---
 lib/x86/asm/io.h   | 12 ----------
 lib/x86/asm/page.h |  5 ++++
 lib/x86/vm.c       | 70 +++++++++++++++++++++++++++++++++++-------------------
 3 files changed, 51 insertions(+), 36 deletions(-)
diff mbox

Patch

diff --git a/lib/x86/asm/io.h b/lib/x86/asm/io.h
index 35a5c7347411..4a6447448a77 100644
--- a/lib/x86/asm/io.h
+++ b/lib/x86/asm/io.h
@@ -45,18 +45,6 @@  static inline void outl(uint32_t value, unsigned long port)
     asm volatile("outl %0, %w1" : : "a"(value), "Nd"((unsigned short)port));
 }
 
-#define virt_to_phys virt_to_phys
-static inline unsigned long virt_to_phys(const void *virt)
-{
-    return (unsigned long)virt;
-}
-
-#define phys_to_virt phys_to_virt
-static inline void *phys_to_virt(unsigned long phys)
-{
-    return (void *)phys;
-}
-
 #define ioremap ioremap
 void __iomem *ioremap(phys_addr_t phys_addr, size_t size);
 
diff --git a/lib/x86/asm/page.h b/lib/x86/asm/page.h
index c43bab28ca2e..8e7002776c7a 100644
--- a/lib/x86/asm/page.h
+++ b/lib/x86/asm/page.h
@@ -10,6 +10,8 @@ 
 #include <linux/const.h>
 #include <bitops.h>
 
+#define PAGE_OFFSET	0x40000000
+
 #define PAGE_SHIFT	12
 #define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
@@ -44,5 +46,8 @@ 
 #define PGDIR_BITS(lvl)        (((lvl) - 1) * PGDIR_WIDTH + PAGE_SHIFT)
 #define PGDIR_OFFSET(va, lvl)  (((va) >> PGDIR_BITS(lvl)) & PGDIR_MASK)
 
+#define __va(x)		((void *)((unsigned long)(x) + PAGE_OFFSET))
+#define __pa(x)		((unsigned long)(x) - PAGE_OFFSET)
+
 #endif /* !__ASSEMBLY__ */
 #endif
diff --git a/lib/x86/vm.c b/lib/x86/vm.c
index 85625e5c934e..994c69df94d0 100644
--- a/lib/x86/vm.c
+++ b/lib/x86/vm.c
@@ -2,12 +2,18 @@ 
 #include "vm.h"
 #include "libcflat.h"
 #include "apic.h"
+#include "asm/page.h"
 
-static void *free = 0;
+extern char edata;
+static void *free = &edata;
 static void *vfree_top = 0;
+static unsigned long end_of_memory;
+static int pg_on = 0;
 
 static void free_memory(void *mem, unsigned long size)
 {
+    free = NULL;
+
     while (size >= PAGE_SIZE) {
 	*(void **)mem = free;
 	free = mem;
@@ -35,8 +41,29 @@  void free_page(void *page)
     free = page;
 }
 
-extern char edata;
-static unsigned long end_of_memory;
+static void *alloc_page_no_pg()
+{
+    void *p = free;
+
+    free += PAGE_SIZE;
+
+    return p;
+}
+
+static void *__alloc_page_table()
+{
+    return pg_on ? alloc_page() : alloc_page_no_pg();
+}
+
+static inline unsigned long __virt_to_phys(void *virt)
+{
+    return pg_on ? virt_to_phys(virt) : (unsigned long)virt;
+}
+
+static inline void *__phys_to_virt(unsigned long phys)
+{
+    return pg_on ? phys_to_virt(phys) : (void *)phys;
+}
 
 unsigned long *install_pte(unsigned long *cr3,
 			   int pte_level,
@@ -50,11 +77,11 @@  unsigned long *install_pte(unsigned long *cr3,
     for (level = PAGE_LEVEL; level > pte_level; --level) {
 	offset = PGDIR_OFFSET((unsigned long)virt, level);
 	if (!(pt[offset] & PT_PRESENT_MASK)) {
-	    unsigned long *new_pt = alloc_page();
+	    unsigned long *new_pt = __alloc_page_table();
 	    memset(new_pt, 0, PAGE_SIZE);
-	    pt[offset] = virt_to_phys(new_pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
+	    pt[offset] = __virt_to_phys(new_pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 	}
-	pt = phys_to_virt(pt[offset] & PT_ADDR_MASK);
+	pt = __phys_to_virt(pt[offset] & PT_ADDR_MASK);
     }
     offset = PGDIR_OFFSET((unsigned long)virt, level);
     pt[offset] = pte;
@@ -84,8 +111,7 @@  unsigned long *install_large_page(unsigned long *cr3,
 				  unsigned long phys,
 				  void *virt)
 {
-    return install_pte(cr3, 2, virt,
-		       phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK | PT_PAGE_SIZE_MASK);
+    return install_pte(cr3, 2, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK | PT_PAGE_SIZE_MASK);
 }
 
 unsigned long *install_page(unsigned long *cr3,
@@ -121,26 +147,19 @@  static void setup_mmu_range(unsigned long *cr3, unsigned long start, void *virt,
 	}
 }
 
+#define MAX_PT_NR	(2048 + 4)
+#define PT_START	((unsigned long)&edata)
+#define PT_END		(PT_START + (MAX_PT_NR * PAGE_SIZE))
+
 static void setup_mmu(unsigned long len)
 {
-    unsigned long *cr3 = alloc_page();
+    unsigned long *cr3 = alloc_page_no_pg();
 
     memset(cr3, 0, PAGE_SIZE);
 
-#ifdef __x86_64__
-    if (len < (1ul << 32))
-        len = (1ul << 32);  /* map mmio 1:1 */
-
-    setup_mmu_range(cr3, 0, (void *)0, len);
-#else
-    if (len > (1ul << 31))
-	    len = (1ul << 31);
-
-    /* 0 - 2G memory, 2G-3G valloc area, 3G-4G mmio */
-    setup_mmu_range(cr3, 0, (void *)0, len);
-    setup_mmu_range(cr3, 3ul << 30, (void *)(3ul << 30), (1ul << 30));
-    vfree_top = (void*)(3ul << 30);
-#endif
+    assert(len >= PT_END);
+    setup_mmu_range(cr3, 0, (void *)0, PT_START);
+    setup_mmu_range(cr3, PT_START, phys_to_virt(PT_START), len - PT_START);
 
     write_cr3((unsigned long)cr3);
 #ifndef __x86_64__
@@ -148,6 +167,8 @@  static void setup_mmu(unsigned long len)
 #endif
     write_cr0(X86_CR0_PG |X86_CR0_PE | X86_CR0_WP);
 
+    pg_on = 1;
+
     printf("paging enabled\n");
     printf("cr0 = %lx\n", read_cr0());
     printf("cr3 = %lx\n", read_cr3());
@@ -158,8 +179,9 @@  void setup_vm()
 {
     assert(!end_of_memory);
     end_of_memory = fwcfg_get_u64(FW_CFG_RAM_SIZE);
-    free_memory(&edata, end_of_memory - (unsigned long)&edata);
+    end_of_memory -= 0x20 * PAGE_SIZE;	/* s3 ACPI tables hack */
     setup_mmu(end_of_memory);
+    free_memory(phys_to_virt(PT_END), end_of_memory - PT_END);
     ioremap_apic();
 }