diff mbox series

[v3,08/21] mm/vmemmap: Initialize page table lock for vmemmap

Message ID 20201108141113.65450-9-songmuchun@bytedance.com (mailing list archive)
State New, archived
Headers show
Series Free some vmemmap pages of hugetlb page | expand

Commit Message

Muchun Song Nov. 8, 2020, 2:11 p.m. UTC
In the register_page_bootmem_memmap, the slab allocator is not ready
yet. So when ALLOC_SPLIT_PTLOCKS, we use init_mm.page_table_lock.
otherwise we use per page table lock(page->ptl). In the later patch,
we will use the vmemmap page table lock to guard the splitting of
the vmemmap huge PMD.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 arch/x86/mm/init_64.c |  2 ++
 include/linux/mm.h    | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

Comments

Oscar Salvador Nov. 9, 2020, 6:11 p.m. UTC | #1
On Sun, Nov 08, 2020 at 10:11:00PM +0800, Muchun Song wrote:
> In the register_page_bootmem_memmap, the slab allocator is not ready
> yet. So when ALLOC_SPLIT_PTLOCKS, we use init_mm.page_table_lock.
> otherwise we use per page table lock(page->ptl). In the later patch,
> we will use the vmemmap page table lock to guard the splitting of
> the vmemmap huge PMD.

I am not sure about this one.
Grabbing init_mm's pagetable lock for specific hugetlb operations does not
seem like a good idea, and we do not know how contented is that one.

I think a better fit would be to find another hook to initialize
page_table_lock at a later stage.
Anyway, we do not need till we are going to perform an operation
on the range, right?

Unless I am missing something, this should be doable in hugetlb_init.

hugetlb_init is part from a init_call that gets called during do_initcalls.
At this time, slab is fully operative.

start_kernel
 kmem_cache_init_late
 kmem_cache_init_late
 ...
 arch_call_rest_init
  rest_init
   kernel_init_freeable
    do_basic_setup
     do_initcalls
      hugetlb_init
Muchun Song Nov. 10, 2020, 5:17 a.m. UTC | #2
On Tue, Nov 10, 2020 at 2:11 AM Oscar Salvador <osalvador@suse.de> wrote:
>
> On Sun, Nov 08, 2020 at 10:11:00PM +0800, Muchun Song wrote:
> > In the register_page_bootmem_memmap, the slab allocator is not ready
> > yet. So when ALLOC_SPLIT_PTLOCKS, we use init_mm.page_table_lock.
> > otherwise we use per page table lock(page->ptl). In the later patch,
> > we will use the vmemmap page table lock to guard the splitting of
> > the vmemmap huge PMD.
>
> I am not sure about this one.
> Grabbing init_mm's pagetable lock for specific hugetlb operations does not
> seem like a good idea, and we do not know how contented is that one.

These APIs are used to guard the operations on vmemmap page tables.
For now, it is only for specific hugetlb operations. But maybe in the future,
someone also wants to modify the vmemmap page tables, he also can
use these APIs. Yeah, we do not know how contented is init_mm's pagetable
lock. Grabbing this one may not be a good idea.

>
> I think a better fit would be to find another hook to initialize
> page_table_lock at a later stage.
> Anyway, we do not need till we are going to perform an operation
> on the range, right?

Yeah. You are right.

>
> Unless I am missing something, this should be doable in hugetlb_init.
>
> hugetlb_init is part from a init_call that gets called during do_initcalls.
> At this time, slab is fully operative.

If we initialize the page_table_lock in the hugetlb_init, we need to
walk the vmemmap page tables again. But the vmemmap pages
size is small, maybe the overhead of this is also small. And doing
this in hugetlb_init can make the code cleaner. Thanks very much.


>
> start_kernel
>  kmem_cache_init_late
>  kmem_cache_init_late
>  ...
>  arch_call_rest_init
>   rest_init
>    kernel_init_freeable
>     do_basic_setup
>      do_initcalls
>       hugetlb_init
>
> --
> Oscar Salvador
> SUSE L3



--
Yours,
Muchun
diff mbox series

Patch

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0435bee2e172..a010101bbe24 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1610,6 +1610,8 @@  void register_page_bootmem_memmap(unsigned long section_nr,
 		}
 		get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
 
+		vmemmap_ptlock_init(pud_page(*pud));
+
 		if (!boot_cpu_has(X86_FEATURE_PSE)) {
 			next = (addr + PAGE_SIZE) & PAGE_MASK;
 			pmd = pmd_offset(pud, addr);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a12354e63e49..ce429614d1ab 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2169,6 +2169,26 @@  static inline bool ptlock_init(struct page *page)
 	return true;
 }
 
+#if ALLOC_SPLIT_PTLOCKS
+static inline void vmemmap_ptlock_init(struct page *page)
+{
+}
+#else
+static inline void vmemmap_ptlock_init(struct page *page)
+{
+	/*
+	 * prep_new_page() initialize page->private (and therefore page->ptl)
+	 * with 0. Make sure nobody took it in use in between.
+	 *
+	 * It can happen if arch try to use slab for page table allocation:
+	 * slab code uses page->{s_mem, counters}, which share storage with
+	 * page->ptl.
+	 */
+	VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page);
+	spin_lock_init(ptlock_ptr(page));
+}
+#endif
+
 #else	/* !USE_SPLIT_PTE_PTLOCKS */
 /*
  * We use mm->page_table_lock to guard all pagetable pages of the mm.
@@ -2180,6 +2200,7 @@  static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_init(struct page *page) { return true; }
 static inline void ptlock_free(struct page *page) {}
+static inline void vmemmap_ptlock_init(struct page *page) {}
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 
 static inline void pgtable_init(void)
@@ -2244,6 +2265,18 @@  static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 	return ptlock_ptr(pmd_to_page(pmd));
 }
 
+#if ALLOC_SPLIT_PTLOCKS
+static inline spinlock_t *vmemmap_pmd_lockptr(pmd_t *pmd)
+{
+	return &init_mm.page_table_lock;
+}
+#else
+static inline spinlock_t *vmemmap_pmd_lockptr(pmd_t *pmd)
+{
+	return ptlock_ptr(pmd_to_page(pmd));
+}
+#endif
+
 static inline bool pmd_ptlock_init(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -2269,6 +2302,11 @@  static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 	return &mm->page_table_lock;
 }
 
+static inline spinlock_t *vmemmap_pmd_lockptr(pmd_t *pmd)
+{
+	return &init_mm.page_table_lock;
+}
+
 static inline bool pmd_ptlock_init(struct page *page) { return true; }
 static inline void pmd_ptlock_free(struct page *page) {}
 
@@ -2283,6 +2321,13 @@  static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
 	return ptl;
 }
 
+static inline spinlock_t *vmemmap_pmd_lock(pmd_t *pmd)
+{
+	spinlock_t *ptl = vmemmap_pmd_lockptr(pmd);
+	spin_lock(ptl);
+	return ptl;
+}
+
 static inline bool pgtable_pmd_page_ctor(struct page *page)
 {
 	if (!pmd_ptlock_init(page))