diff mbox series

[v2,18/26] khugepaged: skip collapse if uffd-wp detected

Message ID 20190212025632.28946-19-peterx@redhat.com (mailing list archive)
State New, archived
Headers show
Series userfaultfd: write protection support | expand

Commit Message

Peter Xu Feb. 12, 2019, 2:56 a.m. UTC
Don't collapse the huge PMD if there is any userfault write protected
small PTEs.  The problem is that the write protection is in small page
granularity and there's no way to keep all these write protection
information if the small pages are going to be merged into a huge PMD.

The same thing needs to be considered for swap entries and migration
entries.  So do the check as well disregarding khugepaged_max_ptes_swap.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/trace/events/huge_memory.h |  1 +
 mm/khugepaged.c                    | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)

Comments

Jerome Glisse Feb. 21, 2019, 6:17 p.m. UTC | #1
On Tue, Feb 12, 2019 at 10:56:24AM +0800, Peter Xu wrote:
> Don't collapse the huge PMD if there is any userfault write protected
> small PTEs.  The problem is that the write protection is in small page
> granularity and there's no way to keep all these write protection
> information if the small pages are going to be merged into a huge PMD.
> 
> The same thing needs to be considered for swap entries and migration
> entries.  So do the check as well disregarding khugepaged_max_ptes_swap.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>

Reviewed-by: Jérôme Glisse <jglisse@redhat.com>

> ---
>  include/trace/events/huge_memory.h |  1 +
>  mm/khugepaged.c                    | 23 +++++++++++++++++++++++
>  2 files changed, 24 insertions(+)
> 
> diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
> index dd4db334bd63..2d7bad9cb976 100644
> --- a/include/trace/events/huge_memory.h
> +++ b/include/trace/events/huge_memory.h
> @@ -13,6 +13,7 @@
>  	EM( SCAN_PMD_NULL,		"pmd_null")			\
>  	EM( SCAN_EXCEED_NONE_PTE,	"exceed_none_pte")		\
>  	EM( SCAN_PTE_NON_PRESENT,	"pte_non_present")		\
> +	EM( SCAN_PTE_UFFD_WP,		"pte_uffd_wp")			\
>  	EM( SCAN_PAGE_RO,		"no_writable_page")		\
>  	EM( SCAN_LACK_REFERENCED_PAGE,	"lack_referenced_page")		\
>  	EM( SCAN_PAGE_NULL,		"page_null")			\
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 4f017339ddb2..396c7e4da83e 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -29,6 +29,7 @@ enum scan_result {
>  	SCAN_PMD_NULL,
>  	SCAN_EXCEED_NONE_PTE,
>  	SCAN_PTE_NON_PRESENT,
> +	SCAN_PTE_UFFD_WP,
>  	SCAN_PAGE_RO,
>  	SCAN_LACK_REFERENCED_PAGE,
>  	SCAN_PAGE_NULL,
> @@ -1123,6 +1124,15 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>  		pte_t pteval = *_pte;
>  		if (is_swap_pte(pteval)) {
>  			if (++unmapped <= khugepaged_max_ptes_swap) {
> +				/*
> +				 * Always be strict with uffd-wp
> +				 * enabled swap entries.  Please see
> +				 * comment below for pte_uffd_wp().
> +				 */
> +				if (pte_swp_uffd_wp(pteval)) {
> +					result = SCAN_PTE_UFFD_WP;
> +					goto out_unmap;
> +				}
>  				continue;
>  			} else {
>  				result = SCAN_EXCEED_SWAP_PTE;
> @@ -1142,6 +1152,19 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>  			result = SCAN_PTE_NON_PRESENT;
>  			goto out_unmap;
>  		}
> +		if (pte_uffd_wp(pteval)) {
> +			/*
> +			 * Don't collapse the page if any of the small
> +			 * PTEs are armed with uffd write protection.
> +			 * Here we can also mark the new huge pmd as
> +			 * write protected if any of the small ones is
> +			 * marked but that could bring uknown
> +			 * userfault messages that falls outside of
> +			 * the registered range.  So, just be simple.
> +			 */
> +			result = SCAN_PTE_UFFD_WP;
> +			goto out_unmap;
> +		}
>  		if (pte_write(pteval))
>  			writable = true;
>  
> -- 
> 2.17.1
>
Mike Rapoport Feb. 25, 2019, 6:50 p.m. UTC | #2
On Tue, Feb 12, 2019 at 10:56:24AM +0800, Peter Xu wrote:
> Don't collapse the huge PMD if there is any userfault write protected
> small PTEs.  The problem is that the write protection is in small page
> granularity and there's no way to keep all these write protection
> information if the small pages are going to be merged into a huge PMD.
> 
> The same thing needs to be considered for swap entries and migration
> entries.  So do the check as well disregarding khugepaged_max_ptes_swap.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>

Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>

> ---
>  include/trace/events/huge_memory.h |  1 +
>  mm/khugepaged.c                    | 23 +++++++++++++++++++++++
>  2 files changed, 24 insertions(+)
> 
> diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
> index dd4db334bd63..2d7bad9cb976 100644
> --- a/include/trace/events/huge_memory.h
> +++ b/include/trace/events/huge_memory.h
> @@ -13,6 +13,7 @@
>  	EM( SCAN_PMD_NULL,		"pmd_null")			\
>  	EM( SCAN_EXCEED_NONE_PTE,	"exceed_none_pte")		\
>  	EM( SCAN_PTE_NON_PRESENT,	"pte_non_present")		\
> +	EM( SCAN_PTE_UFFD_WP,		"pte_uffd_wp")			\
>  	EM( SCAN_PAGE_RO,		"no_writable_page")		\
>  	EM( SCAN_LACK_REFERENCED_PAGE,	"lack_referenced_page")		\
>  	EM( SCAN_PAGE_NULL,		"page_null")			\
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 4f017339ddb2..396c7e4da83e 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -29,6 +29,7 @@ enum scan_result {
>  	SCAN_PMD_NULL,
>  	SCAN_EXCEED_NONE_PTE,
>  	SCAN_PTE_NON_PRESENT,
> +	SCAN_PTE_UFFD_WP,
>  	SCAN_PAGE_RO,
>  	SCAN_LACK_REFERENCED_PAGE,
>  	SCAN_PAGE_NULL,
> @@ -1123,6 +1124,15 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>  		pte_t pteval = *_pte;
>  		if (is_swap_pte(pteval)) {
>  			if (++unmapped <= khugepaged_max_ptes_swap) {
> +				/*
> +				 * Always be strict with uffd-wp
> +				 * enabled swap entries.  Please see
> +				 * comment below for pte_uffd_wp().
> +				 */
> +				if (pte_swp_uffd_wp(pteval)) {
> +					result = SCAN_PTE_UFFD_WP;
> +					goto out_unmap;
> +				}
>  				continue;
>  			} else {
>  				result = SCAN_EXCEED_SWAP_PTE;
> @@ -1142,6 +1152,19 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
>  			result = SCAN_PTE_NON_PRESENT;
>  			goto out_unmap;
>  		}
> +		if (pte_uffd_wp(pteval)) {
> +			/*
> +			 * Don't collapse the page if any of the small
> +			 * PTEs are armed with uffd write protection.
> +			 * Here we can also mark the new huge pmd as
> +			 * write protected if any of the small ones is
> +			 * marked but that could bring uknown
> +			 * userfault messages that falls outside of
> +			 * the registered range.  So, just be simple.
> +			 */
> +			result = SCAN_PTE_UFFD_WP;
> +			goto out_unmap;
> +		}
>  		if (pte_write(pteval))
>  			writable = true;
> 
> -- 
> 2.17.1
>
diff mbox series

Patch

diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index dd4db334bd63..2d7bad9cb976 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -13,6 +13,7 @@ 
 	EM( SCAN_PMD_NULL,		"pmd_null")			\
 	EM( SCAN_EXCEED_NONE_PTE,	"exceed_none_pte")		\
 	EM( SCAN_PTE_NON_PRESENT,	"pte_non_present")		\
+	EM( SCAN_PTE_UFFD_WP,		"pte_uffd_wp")			\
 	EM( SCAN_PAGE_RO,		"no_writable_page")		\
 	EM( SCAN_LACK_REFERENCED_PAGE,	"lack_referenced_page")		\
 	EM( SCAN_PAGE_NULL,		"page_null")			\
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 4f017339ddb2..396c7e4da83e 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -29,6 +29,7 @@  enum scan_result {
 	SCAN_PMD_NULL,
 	SCAN_EXCEED_NONE_PTE,
 	SCAN_PTE_NON_PRESENT,
+	SCAN_PTE_UFFD_WP,
 	SCAN_PAGE_RO,
 	SCAN_LACK_REFERENCED_PAGE,
 	SCAN_PAGE_NULL,
@@ -1123,6 +1124,15 @@  static int khugepaged_scan_pmd(struct mm_struct *mm,
 		pte_t pteval = *_pte;
 		if (is_swap_pte(pteval)) {
 			if (++unmapped <= khugepaged_max_ptes_swap) {
+				/*
+				 * Always be strict with uffd-wp
+				 * enabled swap entries.  Please see
+				 * comment below for pte_uffd_wp().
+				 */
+				if (pte_swp_uffd_wp(pteval)) {
+					result = SCAN_PTE_UFFD_WP;
+					goto out_unmap;
+				}
 				continue;
 			} else {
 				result = SCAN_EXCEED_SWAP_PTE;
@@ -1142,6 +1152,19 @@  static int khugepaged_scan_pmd(struct mm_struct *mm,
 			result = SCAN_PTE_NON_PRESENT;
 			goto out_unmap;
 		}
+		if (pte_uffd_wp(pteval)) {
+			/*
+			 * Don't collapse the page if any of the small
+			 * PTEs are armed with uffd write protection.
+			 * Here we can also mark the new huge pmd as
+			 * write protected if any of the small ones is
+			 * marked but that could bring uknown
+			 * userfault messages that falls outside of
+			 * the registered range.  So, just be simple.
+			 */
+			result = SCAN_PTE_UFFD_WP;
+			goto out_unmap;
+		}
 		if (pte_write(pteval))
 			writable = true;