@@ -570,28 +570,33 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
}
#endif
-static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+static int smaps_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->vma;
pte_t *pte;
spinlock_t *ptl;
- ptl = pmd_trans_huge_lock(pmd, vma);
+ ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
- smaps_pmd_entry(pmd, addr, walk);
+ if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+ walk->action = ACTION_AGAIN;
+ spin_unlock(ptl);
+ return 0;
+ }
+ smaps_pmd_entry(pmdp, addr, walk);
spin_unlock(ptl);
goto out;
}
- if (pmd_trans_unstable(pmd))
+ if (pmd_trans_unstable(&pmd))
goto out;
/*
* The mmap_lock held all the way back in m_start() is what
* keeps khugepaged out of here and from collapsing things
* in here.
*/
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ pte = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE)
smaps_pte_entry(pte, addr, walk);
pte_unmap_unlock(pte - 1, ptl);
@@ -1091,7 +1096,7 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
}
#endif
-static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
+static int clear_refs_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
struct clear_refs_private *cp = walk->private;
@@ -1100,20 +1105,25 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
spinlock_t *ptl;
struct page *page;
- ptl = pmd_trans_huge_lock(pmd, vma);
+ ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
+ if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+ walk->action = ACTION_AGAIN;
+ spin_unlock(ptl);
+ return 0;
+ }
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
- clear_soft_dirty_pmd(vma, addr, pmd);
+ clear_soft_dirty_pmd(vma, addr, pmdp);
goto out;
}
- if (!pmd_present(*pmd))
+ if (!pmd_present(pmd))
goto out;
- page = pmd_page(*pmd);
+ page = pmd_page(pmd);
/* Clear accessed and referenced bits. */
- pmdp_test_and_clear_young(vma, addr, pmd);
+ pmdp_test_and_clear_young(vma, addr, pmdp);
test_and_clear_page_young(page);
ClearPageReferenced(page);
out:
@@ -1121,10 +1131,10 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
return 0;
}
- if (pmd_trans_unstable(pmd))
+ if (pmd_trans_unstable(&pmd))
return 0;
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ pte = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
@@ -1388,8 +1398,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
return make_pme(frame, flags);
}
-static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+static int pagemap_pmd_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->vma;
struct pagemapread *pm = walk->private;
@@ -1401,9 +1411,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
u64 flags = 0, frame = 0;
- pmd_t pmd = *pmdp;
struct page *page = NULL;
+ if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+ walk->action = ACTION_AGAIN;
+ spin_unlock(ptl);
+ return 0;
+ }
+
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -1456,7 +1471,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
return err;
}
- if (pmd_trans_unstable(pmdp))
+ if (pmd_trans_unstable(&pmd))
return 0;
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -1768,7 +1783,7 @@ static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
}
#endif
-static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
+static int gather_pte_stats(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
struct numa_maps *md = walk->private;
@@ -1778,22 +1793,28 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
pte_t *pte;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- ptl = pmd_trans_huge_lock(pmd, vma);
+ ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
struct page *page;
- page = can_gather_numa_stats_pmd(*pmd, vma, addr);
+ if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+ walk->action = ACTION_AGAIN;
+ spin_unlock(ptl);
+ return 0;
+ }
+
+ page = can_gather_numa_stats_pmd(pmd, vma, addr);
if (page)
- gather_stats(page, md, pmd_dirty(*pmd),
+ gather_stats(page, md, pmd_dirty(pmd),
HPAGE_PMD_SIZE/PAGE_SIZE);
spin_unlock(ptl);
return 0;
}
- if (pmd_trans_unstable(pmd))
+ if (pmd_trans_unstable(&pmd))
return 0;
#endif
- orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
do {
struct page *page = can_gather_numa_stats(*pte, vma, addr);
if (!page)
@@ -41,7 +41,7 @@ struct mm_walk_ops {
unsigned long next, struct mm_walk *walk);
int (*pud_entry)(pud_t pud, pud_t *pudp, unsigned long addr,
unsigned long next, struct mm_walk *walk);
- int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
+ int (*pmd_entry)(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
unsigned long next, struct mm_walk *walk);
int (*pte_entry)(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk);
@@ -183,14 +183,14 @@ static long madvise_behavior(struct vm_area_struct *vma,
}
#ifdef CONFIG_SWAP
-static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
+static int swapin_walk_pmd_entry(pmd_t pmd, pmd_t *pmdp, unsigned long start,
unsigned long end, struct mm_walk *walk)
{
pte_t *orig_pte;
struct vm_area_struct *vma = walk->private;
unsigned long index;
- if (pmd_none_or_trans_huge_or_clear_bad(pmd))
+ if (pmd_none_or_trans_huge_or_clear_bad(&pmd))
return 0;
for (index = start; index != end; index += PAGE_SIZE) {
@@ -199,7 +199,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
struct page *page;
spinlock_t *ptl;
- orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
+ orig_pte = pte_offset_map_lock(vma->vm_mm, pmdp, start, &ptl);
pte = *(orig_pte + ((index - start) / PAGE_SIZE));
pte_unmap_unlock(orig_pte, ptl);
@@ -304,7 +304,7 @@ static long madvise_willneed(struct vm_area_struct *vma,
return 0;
}
-static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
+static int madvise_cold_or_pageout_pte_range(pmd_t pmd, pmd_t *pmdp,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
@@ -322,26 +322,29 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
return -EINTR;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pmd_trans_huge(*pmd)) {
- pmd_t orig_pmd;
+ if (pmd_trans_huge(pmd)) {
unsigned long next = pmd_addr_end(addr, end);
tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
- ptl = pmd_trans_huge_lock(pmd, vma);
+ ptl = pmd_trans_huge_lock(pmdp, vma);
if (!ptl)
return 0;
- orig_pmd = *pmd;
- if (is_huge_zero_pmd(orig_pmd))
+ if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+ walk->action = ACTION_AGAIN;
+ goto huge_unlock;
+ }
+
+ if (is_huge_zero_pmd(pmd))
goto huge_unlock;
- if (unlikely(!pmd_present(orig_pmd))) {
+ if (unlikely(!pmd_present(pmd))) {
VM_BUG_ON(thp_migration_supported() &&
- !is_pmd_migration_entry(orig_pmd));
+ !is_pmd_migration_entry(pmd));
goto huge_unlock;
}
- page = pmd_page(orig_pmd);
+ page = pmd_page(pmd);
/* Do not interfere with other mappings of this page */
if (page_mapcount(page) != 1)
@@ -361,12 +364,12 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
return 0;
}
- if (pmd_young(orig_pmd)) {
- pmdp_invalidate(vma, addr, pmd);
- orig_pmd = pmd_mkold(orig_pmd);
+ if (pmd_young(pmd)) {
+ pmdp_invalidate(vma, addr, pmdp);
+ pmd = pmd_mkold(pmd);
- set_pmd_at(mm, addr, pmd, orig_pmd);
- tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+ set_pmd_at(mm, addr, pmdp, pmd);
+ tlb_remove_pmd_tlb_entry(tlb, pmdp, addr);
}
ClearPageReferenced(page);
@@ -388,11 +391,11 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
}
regular_page:
- if (pmd_trans_unstable(pmd))
+ if (pmd_trans_unstable(&pmd))
return 0;
#endif
tlb_change_page_size(tlb, PAGE_SIZE);
- orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl);
flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
for (; addr < end; pte++, addr += PAGE_SIZE) {
@@ -424,12 +427,12 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
if (split_huge_page(page)) {
unlock_page(page);
put_page(page);
- pte_offset_map_lock(mm, pmd, addr, &ptl);
+ pte_offset_map_lock(mm, pmdp, addr, &ptl);
break;
}
unlock_page(page);
put_page(page);
- pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
pte--;
addr -= PAGE_SIZE;
continue;
@@ -566,7 +569,7 @@ static long madvise_pageout(struct vm_area_struct *vma,
return 0;
}
-static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
+static int madvise_free_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
@@ -580,15 +583,15 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long next;
next = pmd_addr_end(addr, end);
- if (pmd_trans_huge(*pmd))
- if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next))
+ if (pmd_trans_huge(pmd))
+ if (madvise_free_huge_pmd(tlb, vma, pmdp, addr, next))
goto next;
- if (pmd_trans_unstable(pmd))
+ if (pmd_trans_unstable(&pmd))
return 0;
tlb_change_page_size(tlb, PAGE_SIZE);
- orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ orig_pte = pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -634,12 +637,12 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
if (split_huge_page(page)) {
unlock_page(page);
put_page(page);
- pte_offset_map_lock(mm, pmd, addr, &ptl);
+ pte_offset_map_lock(mm, pmdp, addr, &ptl);
goto out;
}
unlock_page(page);
put_page(page);
- pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
pte--;
addr -= PAGE_SIZE;
continue;
@@ -5827,7 +5827,7 @@ static inline enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
}
#endif
-static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
+static int mem_cgroup_count_precharge_pte_range(pmd_t pmd, pmd_t *pmdp,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
@@ -5835,22 +5835,27 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
pte_t *pte;
spinlock_t *ptl;
- ptl = pmd_trans_huge_lock(pmd, vma);
+ ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
+ if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+ walk->action = ACTION_AGAIN;
+ spin_unlock(ptl);
+ return 0;
+ }
/*
* Note their can not be MC_TARGET_DEVICE for now as we do not
* support transparent huge page with MEMORY_DEVICE_PRIVATE but
* this might change.
*/
- if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
+ if (get_mctgt_type_thp(vma, addr, pmd, NULL) == MC_TARGET_PAGE)
mc.precharge += HPAGE_PMD_NR;
spin_unlock(ptl);
return 0;
}
- if (pmd_trans_unstable(pmd))
+ if (pmd_trans_unstable(&pmd))
return 0;
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ pte = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE)
if (get_mctgt_type(vma, addr, *pte, NULL))
mc.precharge++; /* increment precharge temporarily */
@@ -6023,7 +6028,7 @@ static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
mem_cgroup_clear_mc();
}
-static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
+static int mem_cgroup_move_charge_pte_range(pmd_t pmd, pmd_t *pmdp,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
@@ -6035,13 +6040,18 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
union mc_target target;
struct page *page;
- ptl = pmd_trans_huge_lock(pmd, vma);
+ ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
+ if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+ walk->action = ACTION_AGAIN;
+ spin_unlock(ptl);
+ return 0;
+ }
if (mc.precharge < HPAGE_PMD_NR) {
spin_unlock(ptl);
return 0;
}
- target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
+ target_type = get_mctgt_type_thp(vma, addr, pmd, &target);
if (target_type == MC_TARGET_PAGE) {
page = target.page;
if (!isolate_lru_page(page)) {
@@ -6066,10 +6076,10 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
return 0;
}
- if (pmd_trans_unstable(pmd))
+ if (pmd_trans_unstable(&pmd))
return 0;
retry:
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ pte = pte_offset_map_lock(vma->vm_mm, pmdp, addr, &ptl);
for (; addr != end; addr += PAGE_SIZE) {
pte_t ptent = *(pte++);
bool device = false;
@@ -516,7 +516,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
* -EIO - only MPOL_MF_STRICT was specified and an existing page was already
* on a node that does not follow the policy.
*/
-static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
+static int queue_pages_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->vma;
@@ -528,18 +528,23 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
pte_t *pte;
spinlock_t *ptl;
- ptl = pmd_trans_huge_lock(pmd, vma);
+ ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
- ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
+ if (memcmp(pmdp, &pmd, sizeof(pmd)) != 0) {
+ walk->action = ACTION_AGAIN;
+ spin_unlock(ptl);
+ return 0;
+ }
+ ret = queue_pages_pmd(pmdp, ptl, addr, end, walk);
if (ret != 2)
return ret;
}
/* THP was split, fall through to pte walk */
- if (pmd_trans_unstable(pmd))
+ if (pmd_trans_unstable(&pmd))
return 0;
- pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
if (!pte_present(*pte))
continue;
@@ -96,8 +96,8 @@ static int mincore_unmapped_range(unsigned long addr, unsigned long end,
return 0;
}
-static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+static int mincore_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
{
spinlock_t *ptl;
struct vm_area_struct *vma = walk->vma;
@@ -105,19 +105,19 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
unsigned char *vec = walk->private;
int nr = (end - addr) >> PAGE_SHIFT;
- ptl = pmd_trans_huge_lock(pmd, vma);
+ ptl = pmd_trans_huge_lock(pmdp, vma);
if (ptl) {
memset(vec, 1, nr);
spin_unlock(ptl);
goto out;
}
- if (pmd_trans_unstable(pmd)) {
+ if (pmd_trans_unstable(&pmd)) {
__mincore_unmapped_range(addr, end, vma, vec);
goto out;
}
- ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ ptep = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
for (; addr != end; ptep++, addr += PAGE_SIZE) {
pte_t pte = *ptep;
@@ -61,17 +61,19 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
static int walk_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- pmd_t *pmd;
+ pmd_t *pmdp;
+ pmd_t pmd;
unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
int err = 0;
int depth = real_depth(3);
- pmd = pmd_offset(&pud, addr);
+ pmdp = pmd_offset(&pud, addr);
do {
again:
+ pmd = READ_ONCE(*pmdp);
next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
+ if (pmd_none(pmd) || (!walk->vma && !walk->no_vma)) {
if (ops->pte_hole)
err = ops->pte_hole(addr, next, depth, walk);
if (err)
@@ -86,7 +88,7 @@ static int walk_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
* needs to know about pmd_trans_huge() pmds
*/
if (ops->pmd_entry)
- err = ops->pmd_entry(pmd, addr, next, walk);
+ err = ops->pmd_entry(pmd, pmdp, addr, next, walk);
if (err)
break;
@@ -97,21 +99,22 @@ static int walk_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
* Check this here so we only break down trans_huge
* pages when we _need_ to
*/
- if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) ||
+ if ((!walk->vma && (pmd_leaf(pmd) || !pmd_present(pmd))) ||
walk->action == ACTION_CONTINUE ||
!(ops->pte_entry))
continue;
if (walk->vma) {
- split_huge_pmd(walk->vma, pmd, addr);
- if (pmd_trans_unstable(pmd))
+ split_huge_pmd(walk->vma, pmdp, addr);
+ pmd = READ_ONCE(*pmdp);
+ if (pmd_trans_unstable(&pmd))
goto again;
}
- err = walk_pte_range(pmd, addr, next, walk);
+ err = walk_pte_range(pmdp, addr, next, walk);
if (err)
break;
- } while (pmd++, addr = next, addr != end);
+ } while (pmdp++, addr = next, addr != end);
return err;
}