@@ -123,16 +123,25 @@ void arch_mm_preinit(void *p)
* table at offset in previous level MFN (pref_l_mfn). pt_pfn is a guest
* PFN.
*/
+static pgentry_t pt_prot[PAGETABLE_LEVELS] = {
+ L1_PROT,
+ L2_PROT,
+ L3_PROT,
+#if defined(__x86_64__)
+ L4_PROT,
+#endif
+};
+
static void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn,
unsigned long offset, unsigned long level)
{
- pgentry_t *tab = pt_base;
+ pgentry_t *tab;
unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn);
- pgentry_t prot_e, prot_t;
+#ifdef CONFIG_PARAVIRT
mmu_update_t mmu_updates[1];
int rc;
+#endif
- prot_e = prot_t = 0;
DEBUG("Allocating new L%d pt frame for pfn=%lx, "
"prev_l_mfn=%lx, offset=%lx",
level, *pt_pfn, prev_l_mfn, offset);
@@ -140,30 +149,12 @@ static void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn,
/* We need to clear the page, otherwise we might fail to map it
as a page table page */
memset((void*) pt_page, 0, PAGE_SIZE);
-
- switch ( level )
- {
- case L1_FRAME:
- prot_e = L1_PROT;
- prot_t = L2_PROT;
- break;
- case L2_FRAME:
- prot_e = L2_PROT;
- prot_t = L3_PROT;
- break;
-#if defined(__x86_64__)
- case L3_FRAME:
- prot_e = L3_PROT;
- prot_t = L4_PROT;
- break;
-#endif
- default:
- printk("new_pt_frame() called with invalid level number %lu\n", level);
- do_exit();
- break;
- }
+ ASSERT(level >= 1 && level <= PAGETABLE_LEVELS);
+
+#ifdef CONFIG_PARAVIRT
/* Make PFN a page table page */
+ tab = pt_base;
#if defined(__x86_64__)
tab = pte_to_virt(tab[l4_table_offset(pt_page)]);
#endif
@@ -172,7 +163,7 @@ static void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn,
mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) +
sizeof(pgentry_t) * l1_table_offset(pt_page);
mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT |
- (prot_e & ~_PAGE_RW);
+ (pt_prot[level - 1] & ~_PAGE_RW);
if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 )
{
@@ -184,13 +175,18 @@ static void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn,
/* Hook the new page table page into the hierarchy */
mmu_updates[0].ptr =
((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset;
- mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t;
+ mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT |
+ pt_prot[level];
if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 )
{
printk("ERROR: mmu_update failed with rc=%d\n", rc);
do_exit();
}
+#else
+ tab = mfn_to_virt(prev_l_mfn);
+ tab[offset] = (*pt_pfn << PAGE_SHIFT) | pt_prot[level];
+#endif
*pt_pfn += 1;
}
@@ -202,12 +198,14 @@ static void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
{
unsigned long start_address, end_address;
unsigned long pfn_to_map, pt_pfn = *start_pfn;
- static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
pgentry_t *tab = pt_base, page;
unsigned long pt_mfn = pfn_to_mfn(virt_to_pfn(pt_base));
unsigned long offset;
+#ifdef CONFIG_PARAVIRT
+ static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
int count = 0;
int rc;
+#endif
/* Be conservative: even if we know there will be more pages already
mapped, start the loop at the very beginning. */
@@ -225,6 +223,10 @@ static void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
((unsigned long)pfn_to_virt(*max_pfn) -
(unsigned long)&_text)>>20);
}
+#else
+ /* Round up to next 2MB boundary as we are using 2MB pages on HVMlite. */
+ pfn_to_map = (pfn_to_map + L1_PAGETABLE_ENTRIES - 1) &
+ ~(L1_PAGETABLE_ENTRIES - 1);
#endif
start_address = (unsigned long)pfn_to_virt(pfn_to_map);
@@ -257,6 +259,7 @@ static void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
pt_mfn = pte_to_mfn(page);
tab = to_virt(mfn_to_pfn(pt_mfn) << PAGE_SHIFT);
offset = l2_table_offset(start_address);
+#ifdef CONFIG_PARAVIRT
/* Need new L1 pt frame */
if ( !(tab[offset] & _PAGE_PRESENT) )
new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME);
@@ -288,6 +291,12 @@ static void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
count = 0;
}
start_address += PAGE_SIZE;
+#else
+ if ( !(tab[offset] & _PAGE_PRESENT) )
+ tab[offset] = (pgentry_t)pfn_to_map << PAGE_SHIFT |
+ L2_PROT | _PAGE_PSE;
+ start_address += 1UL << L2_PAGETABLE_SHIFT;
+#endif
}
*start_pfn = pt_pfn;
@@ -302,16 +311,19 @@ static void set_readonly(void *text, void *etext)
unsigned long start_address =
((unsigned long) text + PAGE_SIZE - 1) & PAGE_MASK;
unsigned long end_address = (unsigned long) etext;
- static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
pgentry_t *tab = pt_base, page;
unsigned long mfn = pfn_to_mfn(virt_to_pfn(pt_base));
unsigned long offset;
+ unsigned long page_size = PAGE_SIZE;
+#ifdef CONFIG_PARAVIRT
+ static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
int count = 0;
int rc;
+#endif
printk("setting %p-%p readonly\n", text, etext);
- while ( start_address + PAGE_SIZE <= end_address )
+ while ( start_address + page_size <= end_address )
{
tab = pt_base;
mfn = pfn_to_mfn(virt_to_pfn(pt_base));
@@ -327,26 +339,34 @@ static void set_readonly(void *text, void *etext)
mfn = pte_to_mfn(page);
tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT);
offset = l2_table_offset(start_address);
- page = tab[offset];
- mfn = pte_to_mfn(page);
- tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT);
+ if ( !(tab[offset] & _PAGE_PSE) )
+ {
+ page = tab[offset];
+ mfn = pte_to_mfn(page);
+ tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT);
- offset = l1_table_offset(start_address);
+ offset = l1_table_offset(start_address);
+ }
if ( start_address != (unsigned long)&shared_info )
{
+#ifdef CONFIG_PARAVIRT
mmu_updates[count].ptr =
((pgentry_t)mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset;
mmu_updates[count].val = tab[offset] & ~_PAGE_RW;
count++;
+#else
+ tab[offset] &= ~_PAGE_RW;
+#endif
}
else
printk("skipped %lx\n", start_address);
- start_address += PAGE_SIZE;
+ start_address += page_size;
+#ifdef CONFIG_PARAVIRT
if ( count == L1_PAGETABLE_ENTRIES ||
- start_address + PAGE_SIZE > end_address )
+ start_address + page_size > end_address )
{
rc = HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF);
if ( rc < 0 )
@@ -356,8 +376,13 @@ static void set_readonly(void *text, void *etext)
}
count = 0;
}
+#else
+ if ( start_address == (1UL << L2_PAGETABLE_SHIFT) )
+ page_size = 1UL << L2_PAGETABLE_SHIFT;
+#endif
}
+#ifdef CONFIG_PARAVIRT
{
mmuext_op_t op = {
.cmd = MMUEXT_TLB_FLUSH_ALL,
@@ -365,6 +390,9 @@ static void set_readonly(void *text, void *etext)
int count;
HYPERVISOR_mmuext_op(&op, 1, &count, DOMID_SELF);
}
+#else
+ write_cr3((unsigned long)pt_base);
+#endif
}
/*
@@ -394,6 +422,8 @@ static pgentry_t *get_pgt(unsigned long va)
offset = l2_table_offset(va);
if ( !(tab[offset] & _PAGE_PRESENT) )
return NULL;
+ if ( tab[offset] & _PAGE_PSE )
+ return &tab[offset];
mfn = pte_to_mfn(tab[offset]);
tab = mfn_to_virt(mfn);
offset = l1_table_offset(va);
@@ -448,6 +478,9 @@ pgentry_t *need_pgt(unsigned long va)
new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME);
}
ASSERT(tab[offset] & _PAGE_PRESENT);
+ if ( tab[offset] & _PAGE_PSE )
+ return &tab[offset];
+
pt_mfn = pte_to_mfn(tab[offset]);
tab = mfn_to_virt(pt_mfn);
@@ -524,8 +557,6 @@ int do_map_frames(unsigned long va,
{
pgentry_t *pgt = NULL;
unsigned long done = 0;
- unsigned long i;
- int rc;
if ( !mfns )
{
@@ -539,6 +570,9 @@ int do_map_frames(unsigned long va,
memset(err, 0x00, n * sizeof(int));
while ( done < n )
{
+#ifdef CONFIG_PARAVIRT
+ unsigned long i;
+ int rc;
unsigned long todo;
if ( err )
@@ -578,6 +612,17 @@ int do_map_frames(unsigned long va,
}
}
done += todo;
+#else
+ if ( !pgt || !(va & L1_MASK) )
+ pgt = need_pgt(va & ~L1_MASK);
+ if ( !pgt )
+ return -ENOMEM;
+
+ ASSERT(!(*pgt & _PAGE_PSE));
+ pgt[l1_table_offset(va)] = (pgentry_t)
+ (((mfns[done * stride] + done * incr) << PAGE_SHIFT) | prot);
+ done++;
+#endif
}
return 0;
@@ -609,16 +654,21 @@ void *map_frames_ex(const unsigned long *mfns, unsigned long n,
#define UNMAP_BATCH ((STACK_SIZE / 2) / sizeof(multicall_entry_t))
int unmap_frames(unsigned long va, unsigned long num_frames)
{
+#ifdef CONFIG_PARAVIRT
int n = UNMAP_BATCH;
multicall_entry_t call[n];
int ret;
int i;
+#else
+ pgentry_t *pgt;
+#endif
ASSERT(!((unsigned long)va & ~PAGE_MASK));
DEBUG("va=%p, num=0x%lx\n", va, num_frames);
while ( num_frames ) {
+#ifdef CONFIG_PARAVIRT
if ( n > num_frames )
n = num_frames;
@@ -653,6 +703,17 @@ int unmap_frames(unsigned long va, unsigned long num_frames)
}
}
num_frames -= n;
+#else
+ pgt = get_pgt(va);
+ if ( pgt )
+ {
+ ASSERT(!(*pgt & _PAGE_PSE));
+ *pgt = 0;
+ invlpg(va);
+ }
+ va += PAGE_SIZE;
+ num_frames--;
+#endif
}
return 0;
}
@@ -662,14 +723,24 @@ int unmap_frames(unsigned long va, unsigned long num_frames)
*/
static void clear_bootstrap(void)
{
+#ifdef CONFIG_PARAVIRT
pte_t nullpte = { };
int rc;
+#else
+ pgentry_t *pgt;
+#endif
/* Use first page as the CoW zero page */
memset(&_text, 0, PAGE_SIZE);
mfn_zero = virt_to_mfn((unsigned long) &_text);
+#ifdef CONFIG_PARAVIRT
if ( (rc = HYPERVISOR_update_va_mapping(0, nullpte, UVMF_INVLPG)) )
printk("Unable to unmap NULL page. rc=%d\n", rc);
+#else
+ pgt = get_pgt((unsigned long)&_text);
+ *pgt = 0;
+ invlpg((unsigned long)&_text);
+#endif
}
#ifdef CONFIG_PARAVIRT
@@ -121,7 +121,9 @@ void page_walk(unsigned long virt_address)
static int handle_cow(unsigned long addr) {
pgentry_t *tab = pt_base, page;
unsigned long new_page;
+#ifdef CONFIG_PARAVIRT
int rc;
+#endif
#if defined(__x86_64__)
page = tab[l4_table_offset(addr)];
@@ -137,6 +139,8 @@ static int handle_cow(unsigned long addr) {
page = tab[l2_table_offset(addr)];
if (!(page & _PAGE_PRESENT))
return 0;
+ if ( page & _PAGE_PSE )
+ return 0;
tab = pte_to_virt(page);
page = tab[l1_table_offset(addr)];
@@ -149,12 +153,18 @@ static int handle_cow(unsigned long addr) {
new_page = alloc_pages(0);
memset((void*) new_page, 0, PAGE_SIZE);
+#ifdef CONFIG_PARAVIRT
rc = HYPERVISOR_update_va_mapping(addr & PAGE_MASK, __pte(virt_to_mach(new_page) | L1_PROT), UVMF_INVLPG);
if (!rc)
return 1;
printk("Map zero page to %lx failed: %d.\n", addr, rc);
return 0;
+#else
+ tab[l1_table_offset(addr)] = virt_to_mach(new_page) | L1_PROT;
+ invlpg(addr);
+ return 1;
+#endif
}
static void do_stack_walk(unsigned long frame_base)
@@ -78,6 +78,8 @@
#define L2_PAGETABLE_ENTRIES 512
#define L3_PAGETABLE_ENTRIES 4
+#define PAGETABLE_LEVELS 3
+
#define PADDR_BITS 44
#define PADDR_MASK ((1ULL << PADDR_BITS)-1)
@@ -110,6 +112,8 @@ typedef uint64_t pgentry_t;
#define L3_PAGETABLE_ENTRIES 512
#define L4_PAGETABLE_ENTRIES 512
+#define PAGETABLE_LEVELS 4
+
/* These are page-table limitations. Current CPUs support only 40-bit phys. */
#define PADDR_BITS 52
#define VADDR_BITS 48
@@ -206,6 +206,15 @@ static inline int irqs_disabled(void)
*/
typedef struct { volatile int counter; } atomic_t;
+static inline void write_cr3(unsigned long cr3)
+{
+ asm volatile( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+}
+
+static inline void invlpg(unsigned long va)
+{
+ asm volatile ( "invlpg %0": : "m" (*(const char *)(va)) : "memory" );
+}
/************************** i386 *******************************/
#ifdef __INSIDE_MINIOS__