@@ -6280,6 +6280,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
break;
}
+ hugetlb_walker_lock();
/*
* Some archs (sparc64, sh*) have multiple pte_ts to
* each hugepage. We have to make sure we get the
@@ -6304,6 +6305,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
!hugetlbfs_pagecache_present(h, vma, vaddr)) {
if (pte)
spin_unlock(ptl);
+ hugetlb_walker_unlock();
remainder = 0;
break;
}
@@ -6325,6 +6327,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (pte)
spin_unlock(ptl);
+ hugetlb_walker_unlock();
+
if (flags & FOLL_WRITE)
fault_flags |= FAULT_FLAG_WRITE;
else if (unshare)
@@ -6367,6 +6371,15 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
continue;
}
+ /*
+ * When reach here, it means the pteval is not absent, so
+ * anyone who wants to free and invalidate the pgtable page
+ * (aka, pte*) should need to first unmap the entries which
+ * relies on the pgtable lock. Since we're holding it,
+ * we're safe even without the walker lock anymore.
+ */
+ hugetlb_walker_unlock();
+
pfn_offset = (vaddr & ~huge_page_mask(h)) >> PAGE_SHIFT;
page = pte_page(huge_ptep_get(pte));
Hugetlb walker lock makes sure the pte_t* won't go away from under us. Some trick is used to release the walker lock slightly earlier when we found present pte. Signed-off-by: Peter Xu <peterx@redhat.com> --- mm/hugetlb.c | 13 +++++++++++++ 1 file changed, 13 insertions(+)