diff mbox series

[v5,19/26] hugetlb/userfaultfd: Handle UFFDIO_WRITEPROTECT

Message ID 20210715201630.211865-1-peterx@redhat.com (mailing list archive)
State New
Headers show
Series userfaultfd-wp: Support shmem and hugetlbfs | expand

Commit Message

Peter Xu July 15, 2021, 8:16 p.m. UTC
This starts from passing cp_flags into hugetlb_change_protection() so hugetlb
will be able to handle MM_CP_UFFD_WP[_RESOLVE] requests.

huge_pte_clear_uffd_wp() is introduced to handle the case where the
UFFDIO_WRITEPROTECT is requested upon migrating huge page entries.

Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/linux/hugetlb.h |  6 ++++--
 mm/hugetlb.c            | 13 ++++++++++++-
 mm/mprotect.c           |  3 ++-
 mm/userfaultfd.c        |  8 ++++++++
 4 files changed, 26 insertions(+), 4 deletions(-)

Comments

kernel test robot July 21, 2021, 8:24 a.m. UTC | #1
Hi Peter,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on kselftest/next]
[also build test ERROR on linus/master v5.14-rc2 next-20210720]
[cannot apply to hnaz-linux-mm/master asm-generic/master arm64/for-next/core linux/master tip/x86/core]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Peter-Xu/userfaultfd-wp-Support-shmem-and-hugetlbfs/20210716-041947
base:   https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next
config: s390-randconfig-r023-20210716 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 5d5b08761f944d5b9822d582378333cc4b36a0a7)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install s390 cross compiling tool for clang build
        # apt-get install binutils-s390x-linux-gnu
        # https://github.com/0day-ci/linux/commit/23779145f29982887db86a44763fa794325c479f
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Peter-Xu/userfaultfd-wp-Support-shmem-and-hugetlbfs/20210716-041947
        git checkout 23779145f29982887db86a44763fa794325c479f
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=s390 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from mm/hugetlb.c:19:
   In file included from include/linux/memblock.h:14:
   In file included from arch/s390/include/asm/dma.h:5:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:464:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __raw_readb(PCI_IOBASE + addr);
                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:477:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:36:59: note: expanded from macro '__le16_to_cpu'
   #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
                                                             ^
   include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
   #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
                                                        ^
   In file included from mm/hugetlb.c:19:
   In file included from include/linux/memblock.h:14:
   In file included from arch/s390/include/asm/dma.h:5:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:490:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
                                                           ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:34:59: note: expanded from macro '__le32_to_cpu'
   #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
                                                             ^
   include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
   #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
                                                        ^
   In file included from mm/hugetlb.c:19:
   In file included from include/linux/memblock.h:14:
   In file included from arch/s390/include/asm/dma.h:5:
   In file included from arch/s390/include/asm/io.h:75:
   include/asm-generic/io.h:501:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writeb(value, PCI_IOBASE + addr);
                               ~~~~~~~~~~ ^
   include/asm-generic/io.h:511:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:521:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
                                                         ~~~~~~~~~~ ^
   include/asm-generic/io.h:609:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsb(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:617:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsw(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:625:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           readsl(PCI_IOBASE + addr, buffer, count);
                  ~~~~~~~~~~ ^
   include/asm-generic/io.h:634:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesb(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
   include/asm-generic/io.h:643:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesw(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
   include/asm-generic/io.h:652:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
           writesl(PCI_IOBASE + addr, buffer, count);
                   ~~~~~~~~~~ ^
   mm/hugetlb.c:5063:29: error: implicit declaration of function 'huge_pte_uffd_wp' [-Werror,-Wimplicit-function-declaration]
           if (userfaultfd_wp(vma) && huge_pte_uffd_wp(huge_ptep_get(ptep)) &&
                                      ^
   mm/hugetlb.c:5301:14: error: implicit declaration of function 'huge_pte_mkuffd_wp' [-Werror,-Wimplicit-function-declaration]
                   _dst_pte = huge_pte_mkuffd_wp(_dst_pte);
                              ^
   mm/hugetlb.c:5301:14: note: did you mean 'pte_mkuffd_wp'?
   include/asm-generic/pgtable_uffd.h:18:30: note: 'pte_mkuffd_wp' declared here
   static __always_inline pte_t pte_mkuffd_wp(pte_t pte)
                                ^
   mm/hugetlb.c:5301:12: error: assigning to 'pte_t' from incompatible type 'int'
                   _dst_pte = huge_pte_mkuffd_wp(_dst_pte);
                            ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   mm/hugetlb.c:5595:11: error: implicit declaration of function 'huge_pte_mkuffd_wp' [-Werror,-Wimplicit-function-declaration]
                                   pte = huge_pte_mkuffd_wp(huge_pte_wrprotect(pte));
                                         ^
   mm/hugetlb.c:5595:9: error: assigning to 'pte_t' from incompatible type 'int'
                                   pte = huge_pte_mkuffd_wp(huge_pte_wrprotect(pte));
                                       ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> mm/hugetlb.c:5597:11: error: implicit declaration of function 'huge_pte_clear_uffd_wp' [-Werror,-Wimplicit-function-declaration]
                                   pte = huge_pte_clear_uffd_wp(pte);
                                         ^
   mm/hugetlb.c:5597:11: note: did you mean 'pte_clear_uffd_wp'?
   include/asm-generic/pgtable_uffd.h:28:30: note: 'pte_clear_uffd_wp' declared here
   static __always_inline pte_t pte_clear_uffd_wp(pte_t pte)
                                ^
   mm/hugetlb.c:5597:9: error: assigning to 'pte_t' from incompatible type 'int'
                                   pte = huge_pte_clear_uffd_wp(pte);
                                       ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   12 warnings and 7 errors generated.


vim +/huge_pte_clear_uffd_wp +5597 mm/hugetlb.c

  5520	
  5521	unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
  5522			unsigned long address, unsigned long end,
  5523			pgprot_t newprot, unsigned long cp_flags)
  5524	{
  5525		struct mm_struct *mm = vma->vm_mm;
  5526		unsigned long start = address;
  5527		pte_t *ptep;
  5528		pte_t pte;
  5529		struct hstate *h = hstate_vma(vma);
  5530		unsigned long pages = 0;
  5531		bool shared_pmd = false;
  5532		struct mmu_notifier_range range;
  5533		bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
  5534		bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
  5535	
  5536		/*
  5537		 * In the case of shared PMDs, the area to flush could be beyond
  5538		 * start/end.  Set range.start/range.end to cover the maximum possible
  5539		 * range if PMD sharing is possible.
  5540		 */
  5541		mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA,
  5542					0, vma, mm, start, end);
  5543		adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
  5544	
  5545		BUG_ON(address >= end);
  5546		flush_cache_range(vma, range.start, range.end);
  5547	
  5548		mmu_notifier_invalidate_range_start(&range);
  5549		i_mmap_lock_write(vma->vm_file->f_mapping);
  5550		for (; address < end; address += huge_page_size(h)) {
  5551			spinlock_t *ptl;
  5552			ptep = huge_pte_offset(mm, address, huge_page_size(h));
  5553			if (!ptep)
  5554				continue;
  5555			ptl = huge_pte_lock(h, mm, ptep);
  5556			if (huge_pmd_unshare(mm, vma, &address, ptep)) {
  5557				pages++;
  5558				spin_unlock(ptl);
  5559				shared_pmd = true;
  5560				continue;
  5561			}
  5562			pte = huge_ptep_get(ptep);
  5563			if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
  5564				spin_unlock(ptl);
  5565				continue;
  5566			}
  5567			if (unlikely(is_hugetlb_entry_migration(pte))) {
  5568				swp_entry_t entry = pte_to_swp_entry(pte);
  5569	
  5570				if (is_writable_migration_entry(entry)) {
  5571					pte_t newpte;
  5572	
  5573					entry = make_readable_migration_entry(
  5574								swp_offset(entry));
  5575					newpte = swp_entry_to_pte(entry);
  5576					if (uffd_wp)
  5577						newpte = pte_swp_mkuffd_wp(newpte);
  5578					else if (uffd_wp_resolve)
  5579						newpte = pte_swp_clear_uffd_wp(newpte);
  5580					set_huge_swap_pte_at(mm, address, ptep,
  5581							     newpte, huge_page_size(h));
  5582					pages++;
  5583				}
  5584				spin_unlock(ptl);
  5585				continue;
  5586			}
  5587			if (!huge_pte_none(pte)) {
  5588				pte_t old_pte;
  5589				unsigned int shift = huge_page_shift(hstate_vma(vma));
  5590	
  5591				old_pte = huge_ptep_modify_prot_start(vma, address, ptep);
  5592				pte = pte_mkhuge(huge_pte_modify(old_pte, newprot));
  5593				pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
  5594				if (uffd_wp)
  5595					pte = huge_pte_mkuffd_wp(huge_pte_wrprotect(pte));
  5596				else if (uffd_wp_resolve)
> 5597					pte = huge_pte_clear_uffd_wp(pte);
  5598				huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
  5599				pages++;
  5600			}
  5601			spin_unlock(ptl);
  5602		}
  5603		/*
  5604		 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
  5605		 * may have cleared our pud entry and done put_page on the page table:
  5606		 * once we release i_mmap_rwsem, another task can do the final put_page
  5607		 * and that page table be reused and filled with junk.  If we actually
  5608		 * did unshare a page of pmds, flush the range corresponding to the pud.
  5609		 */
  5610		if (shared_pmd)
  5611			flush_hugetlb_tlb_range(vma, range.start, range.end);
  5612		else
  5613			flush_hugetlb_tlb_range(vma, start, end);
  5614		/*
  5615		 * No need to call mmu_notifier_invalidate_range() we are downgrading
  5616		 * page table protection not changing it to point to a new page.
  5617		 *
  5618		 * See Documentation/vm/mmu_notifier.rst
  5619		 */
  5620		i_mmap_unlock_write(vma->vm_file->f_mapping);
  5621		mmu_notifier_invalidate_range_end(&range);
  5622	
  5623		return pages << h->order;
  5624	}
  5625	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index fcdbf9f46d85..e19ca363803d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -205,7 +205,8 @@  struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
 int pmd_huge(pmd_t pmd);
 int pud_huge(pud_t pud);
 unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
-		unsigned long address, unsigned long end, pgprot_t newprot);
+		unsigned long address, unsigned long end, pgprot_t newprot,
+		unsigned long cp_flags);
 
 bool is_hugetlb_entry_migration(pte_t pte);
 void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
@@ -372,7 +373,8 @@  static inline void move_hugetlb_state(struct page *oldpage,
 
 static inline unsigned long hugetlb_change_protection(
 			struct vm_area_struct *vma, unsigned long address,
-			unsigned long end, pgprot_t newprot)
+			unsigned long end, pgprot_t newprot,
+			unsigned long cp_flags)
 {
 	return 0;
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 880cb2137d04..4edb3ee885ea 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5519,7 +5519,8 @@  long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
-		unsigned long address, unsigned long end, pgprot_t newprot)
+		unsigned long address, unsigned long end,
+		pgprot_t newprot, unsigned long cp_flags)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long start = address;
@@ -5529,6 +5530,8 @@  unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 	unsigned long pages = 0;
 	bool shared_pmd = false;
 	struct mmu_notifier_range range;
+	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
+	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
 
 	/*
 	 * In the case of shared PMDs, the area to flush could be beyond
@@ -5570,6 +5573,10 @@  unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 				entry = make_readable_migration_entry(
 							swp_offset(entry));
 				newpte = swp_entry_to_pte(entry);
+				if (uffd_wp)
+					newpte = pte_swp_mkuffd_wp(newpte);
+				else if (uffd_wp_resolve)
+					newpte = pte_swp_clear_uffd_wp(newpte);
 				set_huge_swap_pte_at(mm, address, ptep,
 						     newpte, huge_page_size(h));
 				pages++;
@@ -5584,6 +5591,10 @@  unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 			old_pte = huge_ptep_modify_prot_start(vma, address, ptep);
 			pte = pte_mkhuge(huge_pte_modify(old_pte, newprot));
 			pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
+			if (uffd_wp)
+				pte = huge_pte_mkuffd_wp(huge_pte_wrprotect(pte));
+			else if (uffd_wp_resolve)
+				pte = huge_pte_clear_uffd_wp(pte);
 			huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
 			pages++;
 		}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 3fcb87b59696..96f4df023439 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -426,7 +426,8 @@  unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
 	BUG_ON((cp_flags & MM_CP_UFFD_WP_ALL) == MM_CP_UFFD_WP_ALL);
 
 	if (is_vm_hugetlb_page(vma))
-		pages = hugetlb_change_protection(vma, start, end, newprot);
+		pages = hugetlb_change_protection(vma, start, end, newprot,
+						  cp_flags);
 	else
 		pages = change_protection_range(vma, start, end, newprot,
 						cp_flags);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 501d6b9f7a5a..7ba721aca1c5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -695,6 +695,7 @@  int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
 			unsigned long len, bool enable_wp, bool *mmap_changing)
 {
 	struct vm_area_struct *dst_vma;
+	unsigned long page_mask;
 	pgprot_t newprot;
 	int err;
 
@@ -731,6 +732,13 @@  int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
 	if (!vma_is_anonymous(dst_vma))
 		goto out_unlock;
 
+	if (is_vm_hugetlb_page(dst_vma)) {
+		err = -EINVAL;
+		page_mask = vma_kernel_pagesize(dst_vma) - 1;
+		if ((start & page_mask) || (len & page_mask))
+			goto out_unlock;
+	}
+
 	if (enable_wp)
 		newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
 	else