diff mbox series

[v2,1/7] mm/thp: fix __split_huge_pmd_locked() for migration PMD

Message ID 20200902165830.5367-2-rcampbell@nvidia.com
State New
Headers show
Series mm/hmm/nouveau: add THP migration to migrate_vma_* | expand

Commit Message

Ralph Campbell Sept. 2, 2020, 4:58 p.m. UTC
A migrating transparent huge page has to already be unmapped. Otherwise,
the page could be modified while it is being copied to a new page and
data could be lost. The function __split_huge_pmd() checks for a PMD
migration entry before calling __split_huge_pmd_locked() leading one to
think that __split_huge_pmd_locked() can handle splitting a migrating PMD.
However, the code always increments the page->_mapcount and adjusts the
memory control group accounting assuming the page is mapped.
Also, if the PMD entry is a migration PMD entry, the call to
is_huge_zero_pmd(*pmd) is incorrect because it calls pmd_pfn(pmd) instead
of migration_entry_to_pfn(pmd_to_swp_entry(pmd)).
Fix these problems by checking for a PMD migration entry.

Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
---
 mm/huge_memory.c | 42 +++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

Comments

Yang Shi Sept. 2, 2020, 8:29 p.m. UTC | #1
On Wed, Sep 2, 2020 at 9:58 AM Ralph Campbell <rcampbell@nvidia.com> wrote:
>
> A migrating transparent huge page has to already be unmapped. Otherwise,
> the page could be modified while it is being copied to a new page and
> data could be lost. The function __split_huge_pmd() checks for a PMD
> migration entry before calling __split_huge_pmd_locked() leading one to
> think that __split_huge_pmd_locked() can handle splitting a migrating PMD.
> However, the code always increments the page->_mapcount and adjusts the
> memory control group accounting assuming the page is mapped.
> Also, if the PMD entry is a migration PMD entry, the call to
> is_huge_zero_pmd(*pmd) is incorrect because it calls pmd_pfn(pmd) instead
> of migration_entry_to_pfn(pmd_to_swp_entry(pmd)).
> Fix these problems by checking for a PMD migration entry.

Thanks for catching this. The fix looks good to me. Reviewed-by: Yang
Shi <shy828301@gmail.com>

I think this fix can go separately with the series.
>
> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
> ---
>  mm/huge_memory.c | 42 +++++++++++++++++++++++-------------------
>  1 file changed, 23 insertions(+), 19 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 2a468a4acb0a..606d712d9505 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2023,7 +2023,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>                 put_page(page);
>                 add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
>                 return;
> -       } else if (is_huge_zero_pmd(*pmd)) {
> +       } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
>                 /*
>                  * FIXME: Do we want to invalidate secondary mmu by calling
>                  * mmu_notifier_invalidate_range() see comments below inside
> @@ -2117,30 +2117,34 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>                 pte = pte_offset_map(&_pmd, addr);
>                 BUG_ON(!pte_none(*pte));
>                 set_pte_at(mm, addr, pte, entry);
> -               atomic_inc(&page[i]._mapcount);
> -               pte_unmap(pte);
> -       }
> -
> -       /*
> -        * Set PG_double_map before dropping compound_mapcount to avoid
> -        * false-negative page_mapped().
> -        */
> -       if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) {
> -               for (i = 0; i < HPAGE_PMD_NR; i++)
> +               if (!pmd_migration)
>                         atomic_inc(&page[i]._mapcount);
> +               pte_unmap(pte);
>         }
>
> -       lock_page_memcg(page);
> -       if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
> -               /* Last compound_mapcount is gone. */
> -               __dec_lruvec_page_state(page, NR_ANON_THPS);
> -               if (TestClearPageDoubleMap(page)) {
> -                       /* No need in mapcount reference anymore */
> +       if (!pmd_migration) {
> +               /*
> +                * Set PG_double_map before dropping compound_mapcount to avoid
> +                * false-negative page_mapped().
> +                */
> +               if (compound_mapcount(page) > 1 &&
> +                   !TestSetPageDoubleMap(page)) {
>                         for (i = 0; i < HPAGE_PMD_NR; i++)
> -                               atomic_dec(&page[i]._mapcount);
> +                               atomic_inc(&page[i]._mapcount);
> +               }
> +
> +               lock_page_memcg(page);
> +               if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
> +                       /* Last compound_mapcount is gone. */
> +                       __dec_lruvec_page_state(page, NR_ANON_THPS);
> +                       if (TestClearPageDoubleMap(page)) {
> +                               /* No need in mapcount reference anymore */
> +                               for (i = 0; i < HPAGE_PMD_NR; i++)
> +                                       atomic_dec(&page[i]._mapcount);
> +                       }
>                 }
> +               unlock_page_memcg(page);
>         }
> -       unlock_page_memcg(page);
>
>         smp_wmb(); /* make pte visible before pmd */
>         pmd_populate(mm, pmd, pgtable);
> --
> 2.20.1
>
>
Zi Yan Sept. 2, 2020, 9:47 p.m. UTC | #2
On 2 Sep 2020, at 12:58, Ralph Campbell wrote:

> A migrating transparent huge page has to already be unmapped. Otherwise,
> the page could be modified while it is being copied to a new page and
> data could be lost. The function __split_huge_pmd() checks for a PMD
> migration entry before calling __split_huge_pmd_locked() leading one to
> think that __split_huge_pmd_locked() can handle splitting a migrating PMD.
> However, the code always increments the page->_mapcount and adjusts the
> memory control group accounting assuming the page is mapped.
> Also, if the PMD entry is a migration PMD entry, the call to
> is_huge_zero_pmd(*pmd) is incorrect because it calls pmd_pfn(pmd) instead
> of migration_entry_to_pfn(pmd_to_swp_entry(pmd)).
> Fix these problems by checking for a PMD migration entry.
>
> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>

Thanks for the fix. You can add Reviewed-by: Zi Yan <ziy@nvidia.com>

I think you also want to add the Fixes tag and cc stable.

Fixes 84c3fc4e9c56 (“mm: thp: check pmd migration entry in common path”)
cc: stable@vger.kernel.org # 4.14+

> ---
>  mm/huge_memory.c | 42 +++++++++++++++++++++++-------------------
>  1 file changed, 23 insertions(+), 19 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 2a468a4acb0a..606d712d9505 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2023,7 +2023,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>  		put_page(page);
>  		add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
>  		return;
> -	} else if (is_huge_zero_pmd(*pmd)) {
> +	} else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
>  		/*
>  		 * FIXME: Do we want to invalidate secondary mmu by calling
>  		 * mmu_notifier_invalidate_range() see comments below inside
> @@ -2117,30 +2117,34 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>  		pte = pte_offset_map(&_pmd, addr);
>  		BUG_ON(!pte_none(*pte));
>  		set_pte_at(mm, addr, pte, entry);
> -		atomic_inc(&page[i]._mapcount);
> -		pte_unmap(pte);
> -	}
> -
> -	/*
> -	 * Set PG_double_map before dropping compound_mapcount to avoid
> -	 * false-negative page_mapped().
> -	 */
> -	if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) {
> -		for (i = 0; i < HPAGE_PMD_NR; i++)
> +		if (!pmd_migration)
>  			atomic_inc(&page[i]._mapcount);
> +		pte_unmap(pte);
>  	}
>
> -	lock_page_memcg(page);
> -	if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
> -		/* Last compound_mapcount is gone. */
> -		__dec_lruvec_page_state(page, NR_ANON_THPS);
> -		if (TestClearPageDoubleMap(page)) {
> -			/* No need in mapcount reference anymore */
> +	if (!pmd_migration) {
> +		/*
> +		 * Set PG_double_map before dropping compound_mapcount to avoid
> +		 * false-negative page_mapped().
> +		 */
> +		if (compound_mapcount(page) > 1 &&
> +		    !TestSetPageDoubleMap(page)) {
>  			for (i = 0; i < HPAGE_PMD_NR; i++)
> -				atomic_dec(&page[i]._mapcount);
> +				atomic_inc(&page[i]._mapcount);
> +		}
> +
> +		lock_page_memcg(page);
> +		if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
> +			/* Last compound_mapcount is gone. */
> +			__dec_lruvec_page_state(page, NR_ANON_THPS);
> +			if (TestClearPageDoubleMap(page)) {
> +				/* No need in mapcount reference anymore */
> +				for (i = 0; i < HPAGE_PMD_NR; i++)
> +					atomic_dec(&page[i]._mapcount);
> +			}
>  		}
> +		unlock_page_memcg(page);
>  	}
> -	unlock_page_memcg(page);
>
>  	smp_wmb(); /* make pte visible before pmd */
>  	pmd_populate(mm, pmd, pgtable);
> -- 
> 2.20.1


—
Best Regards,
Yan Zi
Ralph Campbell Sept. 2, 2020, 10:02 p.m. UTC | #3
On 9/2/20 2:47 PM, Zi Yan wrote:
> On 2 Sep 2020, at 12:58, Ralph Campbell wrote:
> 
>> A migrating transparent huge page has to already be unmapped. Otherwise,
>> the page could be modified while it is being copied to a new page and
>> data could be lost. The function __split_huge_pmd() checks for a PMD
>> migration entry before calling __split_huge_pmd_locked() leading one to
>> think that __split_huge_pmd_locked() can handle splitting a migrating PMD.
>> However, the code always increments the page->_mapcount and adjusts the
>> memory control group accounting assuming the page is mapped.
>> Also, if the PMD entry is a migration PMD entry, the call to
>> is_huge_zero_pmd(*pmd) is incorrect because it calls pmd_pfn(pmd) instead
>> of migration_entry_to_pfn(pmd_to_swp_entry(pmd)).
>> Fix these problems by checking for a PMD migration entry.
>>
>> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
> 
> Thanks for the fix. You can add Reviewed-by: Zi Yan <ziy@nvidia.com>
> 
> I think you also want to add the Fixes tag and cc stable.
> 
> Fixes 84c3fc4e9c56 (“mm: thp: check pmd migration entry in common path”)
> cc: stable@vger.kernel.org # 4.14+

Thanks, I'll add these.
Kirill A. Shutemov Sept. 3, 2020, 12:54 p.m. UTC | #4
On Wed, Sep 02, 2020 at 09:58:24AM -0700, Ralph Campbell wrote:
> A migrating transparent huge page has to already be unmapped. Otherwise,
> the page could be modified while it is being copied to a new page and
> data could be lost. The function __split_huge_pmd() checks for a PMD
> migration entry before calling __split_huge_pmd_locked() leading one to
> think that __split_huge_pmd_locked() can handle splitting a migrating PMD.
> However, the code always increments the page->_mapcount and adjusts the
> memory control group accounting assuming the page is mapped.
> Also, if the PMD entry is a migration PMD entry, the call to
> is_huge_zero_pmd(*pmd) is incorrect because it calls pmd_pfn(pmd) instead
> of migration_entry_to_pfn(pmd_to_swp_entry(pmd)).
> Fix these problems by checking for a PMD migration entry.
> 
> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>

Hm. Could you remind me what codepath splits migration PMD? Maybe it
should wait until migration is complete? We could avoid a lot of
complexity this way.
diff mbox series

Patch

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2a468a4acb0a..606d712d9505 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2023,7 +2023,7 @@  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		put_page(page);
 		add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
 		return;
-	} else if (is_huge_zero_pmd(*pmd)) {
+	} else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
 		/*
 		 * FIXME: Do we want to invalidate secondary mmu by calling
 		 * mmu_notifier_invalidate_range() see comments below inside
@@ -2117,30 +2117,34 @@  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		pte = pte_offset_map(&_pmd, addr);
 		BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, addr, pte, entry);
-		atomic_inc(&page[i]._mapcount);
-		pte_unmap(pte);
-	}
-
-	/*
-	 * Set PG_double_map before dropping compound_mapcount to avoid
-	 * false-negative page_mapped().
-	 */
-	if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) {
-		for (i = 0; i < HPAGE_PMD_NR; i++)
+		if (!pmd_migration)
 			atomic_inc(&page[i]._mapcount);
+		pte_unmap(pte);
 	}
 
-	lock_page_memcg(page);
-	if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
-		/* Last compound_mapcount is gone. */
-		__dec_lruvec_page_state(page, NR_ANON_THPS);
-		if (TestClearPageDoubleMap(page)) {
-			/* No need in mapcount reference anymore */
+	if (!pmd_migration) {
+		/*
+		 * Set PG_double_map before dropping compound_mapcount to avoid
+		 * false-negative page_mapped().
+		 */
+		if (compound_mapcount(page) > 1 &&
+		    !TestSetPageDoubleMap(page)) {
 			for (i = 0; i < HPAGE_PMD_NR; i++)
-				atomic_dec(&page[i]._mapcount);
+				atomic_inc(&page[i]._mapcount);
+		}
+
+		lock_page_memcg(page);
+		if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
+			/* Last compound_mapcount is gone. */
+			__dec_lruvec_page_state(page, NR_ANON_THPS);
+			if (TestClearPageDoubleMap(page)) {
+				/* No need in mapcount reference anymore */
+				for (i = 0; i < HPAGE_PMD_NR; i++)
+					atomic_dec(&page[i]._mapcount);
+			}
 		}
+		unlock_page_memcg(page);
 	}
-	unlock_page_memcg(page);
 
 	smp_wmb(); /* make pte visible before pmd */
 	pmd_populate(mm, pmd, pgtable);