diff mbox series

[v2,3/6] mm: userfaultfd: support UFFDIO_POISON for hugetlbfs

Message ID 20230629205040.665834-3-axelrasmussen@google.com (mailing list archive)
State New
Headers show
Series [v2,1/6] mm: userfaultfd: add new UFFDIO_POISON ioctl | expand

Commit Message

Axel Rasmussen June 29, 2023, 8:50 p.m. UTC
The behavior here is the same as it is for anon/shmem. This is done
separately because hugetlb pte marker handling is a bit different.

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
---
 mm/hugetlb.c     | 33 +++++++++++++++++++++++++++++++--
 mm/userfaultfd.c |  6 +-----
 2 files changed, 32 insertions(+), 7 deletions(-)

Comments

Peter Xu July 4, 2023, 8:59 p.m. UTC | #1
On Thu, Jun 29, 2023 at 01:50:37PM -0700, Axel Rasmussen wrote:
> The behavior here is the same as it is for anon/shmem. This is done
> separately because hugetlb pte marker handling is a bit different.
> 
> Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
> ---
>  mm/hugetlb.c     | 33 +++++++++++++++++++++++++++++++--
>  mm/userfaultfd.c |  6 +-----
>  2 files changed, 32 insertions(+), 7 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 38711d49e4db..05abe88986b6 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -6090,14 +6090,24 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>  	}
>  
>  	entry = huge_ptep_get(ptep);
> -	/* PTE markers should be handled the same way as none pte */
> -	if (huge_pte_none_mostly(entry))
> +	if (huge_pte_none_mostly(entry)) {
> +		if (is_pte_marker(entry)) {
> +			unsigned long marker = pte_marker_get(pte_to_swp_entry(entry));
> +
> +			if (marker & PTE_MARKER_UFFD_POISON) {
> +				ret = VM_FAULT_HWPOISON_LARGE;
> +				goto out_mutex;
> +			}
> +		}
>  		/*
> +		 * Other PTE markers should be handled the same way as none PTE.
> +		 *
>  		 * hugetlb_no_page will drop vma lock and hugetlb fault
>  		 * mutex internally, which make us return immediately.
>  		 */
>  		return hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
>  				      entry, flags);
> +	}
>  
>  	ret = 0;
>  
> @@ -6253,6 +6263,25 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
>  	int writable;
>  	bool folio_in_pagecache = false;
>  
> +	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) {
> +		ptl = huge_pte_lock(h, dst_mm, dst_pte);
> +
> +		/* Don't overwrite any existing PTEs (even markers) */
> +		if (!huge_pte_none(huge_ptep_get(dst_pte))) {
> +			spin_unlock(ptl);
> +			return -EEXIST;
> +		}
> +
> +		_dst_pte = make_pte_marker(PTE_MARKER_UFFD_POISON);
> +		set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
> +
> +		/* No need to invalidate - it was non-present before */
> +		update_mmu_cache(dst_vma, dst_addr, dst_pte);
> +
> +		spin_unlock(ptl);
> +		return 0;
> +	}
> +
>  	if (is_continue) {
>  		ret = -EFAULT;
>  		folio = filemap_lock_folio(mapping, idx);
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 87b62ca1e09e..4436cae1c7a8 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -381,12 +381,8 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
>  	 * supported by hugetlb.  A PMD_SIZE huge pages may exist as used
>  	 * by THP.  Since we can not reliably insert a zero page, this
>  	 * feature is not supported.
> -	 *
> -	 * PTE marker handling for hugetlb is a bit special, so for now
> -	 * UFFDIO_POISON is not supported.
>  	 */
> -	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE) ||
> -	    uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) {
> +	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) {
>  		mmap_read_unlock(dst_mm);
>  		return -EINVAL;

If we have the last patch declaring the feature bits and so on, IIUC we
don'tt need this change back and forth.  Other than that looks good.

Thanks,

>  	}
> -- 
> 2.41.0.255.g8b1d071c50-goog
>
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 38711d49e4db..05abe88986b6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6090,14 +6090,24 @@  vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	entry = huge_ptep_get(ptep);
-	/* PTE markers should be handled the same way as none pte */
-	if (huge_pte_none_mostly(entry))
+	if (huge_pte_none_mostly(entry)) {
+		if (is_pte_marker(entry)) {
+			unsigned long marker = pte_marker_get(pte_to_swp_entry(entry));
+
+			if (marker & PTE_MARKER_UFFD_POISON) {
+				ret = VM_FAULT_HWPOISON_LARGE;
+				goto out_mutex;
+			}
+		}
 		/*
+		 * Other PTE markers should be handled the same way as none PTE.
+		 *
 		 * hugetlb_no_page will drop vma lock and hugetlb fault
 		 * mutex internally, which make us return immediately.
 		 */
 		return hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
 				      entry, flags);
+	}
 
 	ret = 0;
 
@@ -6253,6 +6263,25 @@  int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 	int writable;
 	bool folio_in_pagecache = false;
 
+	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) {
+		ptl = huge_pte_lock(h, dst_mm, dst_pte);
+
+		/* Don't overwrite any existing PTEs (even markers) */
+		if (!huge_pte_none(huge_ptep_get(dst_pte))) {
+			spin_unlock(ptl);
+			return -EEXIST;
+		}
+
+		_dst_pte = make_pte_marker(PTE_MARKER_UFFD_POISON);
+		set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+
+		/* No need to invalidate - it was non-present before */
+		update_mmu_cache(dst_vma, dst_addr, dst_pte);
+
+		spin_unlock(ptl);
+		return 0;
+	}
+
 	if (is_continue) {
 		ret = -EFAULT;
 		folio = filemap_lock_folio(mapping, idx);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 87b62ca1e09e..4436cae1c7a8 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -381,12 +381,8 @@  static __always_inline ssize_t mfill_atomic_hugetlb(
 	 * supported by hugetlb.  A PMD_SIZE huge pages may exist as used
 	 * by THP.  Since we can not reliably insert a zero page, this
 	 * feature is not supported.
-	 *
-	 * PTE marker handling for hugetlb is a bit special, so for now
-	 * UFFDIO_POISON is not supported.
 	 */
-	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE) ||
-	    uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) {
+	if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) {
 		mmap_read_unlock(dst_mm);
 		return -EINVAL;
 	}