@@ -29,17 +29,21 @@
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm);
+static inline unsigned long pte_alloc_one_virt(struct mm_struct *mm);
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte);
+
#if CONFIG_PGTABLE_LEVELS > 2
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return (pmd_t *)__get_free_page(PGALLOC_GFP);
+ return (pmd_t *)pte_alloc_one_virt(mm);
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
{
BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
- free_page((unsigned long)pmdp);
+ pte_free(mm, virt_to_page(pmdp));
}
static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
@@ -62,13 +66,13 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return (pud_t *)__get_free_page(PGALLOC_GFP);
+ return (pud_t *)pte_alloc_one_virt(mm);
}
static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
{
BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
- free_page((unsigned long)pudp);
+ pte_free(mm, virt_to_page(pudp));
}
static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
@@ -93,7 +97,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm)
{
- return (pte_t *)__get_free_page(PGALLOC_GFP);
+ return (pte_t *)pte_alloc_one_virt(mm);
}
static inline pgtable_t
@@ -111,19 +115,41 @@ pte_alloc_one(struct mm_struct *mm)
return pte;
}
+static inline unsigned long
+pte_alloc_one_virt(struct mm_struct *mm)
+{
+ pgtable_t ptr;
+
+ ptr = pte_alloc_one(mm);
+ if (!ptr)
+ return 0;
+
+ return (unsigned long) page_to_virt(ptr);
+}
+
/*
* Free a PTE table.
*/
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
{
if (ptep)
- free_page((unsigned long)ptep);
+ pte_free(mm, virt_to_page(ptep));
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
- pgtable_page_dtor(pte);
- __free_page(pte);
+ /*
+ * Some page table pages might have come from memblock either
+ * with vmemmap_alloc_block (during vmemmap_populate) or with
+ * early_pgtable_alloc (during __create_pgd_mapping). These
+ * pages should be freed with free_reserved_page() instead.
+ */
+ if (PageReserved(pte)) {
+ free_reserved_page(pte);
+ } else {
+ pgtable_page_dtor(pte);
+ __free_page(pte);
+ }
}
static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
@@ -31,7 +31,7 @@ static struct kmem_cache *pgd_cache __ro_after_init;
pgd_t *pgd_alloc(struct mm_struct *mm)
{
if (PGD_SIZE == PAGE_SIZE)
- return (pgd_t *)__get_free_page(PGALLOC_GFP);
+ return (pgd_t *)pte_alloc_one_virt(mm);
else
return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
}
@@ -39,7 +39,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
if (PGD_SIZE == PAGE_SIZE)
- free_page((unsigned long)pgd);
+ pte_free(mm, virt_to_page(pgd));
else
kmem_cache_free(pgd_cache, pgd);
}
Currently there are inconsistencies during page table page's allocation, initialization and destruction. Not all those pages go through standard constructors pgtable_page_ctor/pgtable_page_dtor which is very much essential for proper initialization and accounting. Not going through proper pgtable_page_ctor/pgtable_page_dtor cycle makes them vulnerable for bad page state errors during page table tear down process. This makes all levels of page table pages either for the user space or for the kernel mappings go through these standard constructs. While here pte_free() has been modified to accommodate pages which might not have been allocated through buddy system but instead came from memblock [1] directly during early boot process. Those pages must not go through the pgtable_page_dtor() and instead are freed with free_reserved_page(). PGD based page table page which is allocated from 'pgd_cache' kmem cache in certain situations (PGD_SIZE != PAGE_SIZE ) is excluded from being cycled through pgtable_page_[ctor/dtor]. This is not a problem because this slab never gets freed back to buddy allocator. [1] Page table page allocation from memblock (a) early_pgtable_alloc (passed with __create_pgd_mapping) (b) vmemmap_pgd|pud_populate (vmemmap_populate) Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com> --- arch/arm64/include/asm/pgalloc.h | 42 ++++++++++++++++++++++++++------ arch/arm64/mm/pgd.c | 4 +-- 2 files changed, 36 insertions(+), 10 deletions(-)