diff mbox series

[2/3] hugetlb: take hugetlb vma_lock when clearing vma_lock->vma pointer

Message ID 20221005011707.514612-3-mike.kravetz@oracle.com (mailing list archive)
State New
Headers show
Series hugetlb: fixes for new vma lock series | expand

Commit Message

Mike Kravetz Oct. 5, 2022, 1:17 a.m. UTC
hugetlb file truncation/hole punch code may need to back out and take
locks in order in the routine hugetlb_unmap_file_folio().  This code
could race with vma freeing as pointed out in [1] and result in
accessing a stale vma pointer.  To address this, take the vma_lock when
clearing the vma_lock->vma pointer.

[1] https://lore.kernel.org/linux-mm/01f10195-7088-4462-6def-909549c75ef4@huawei.com/

Fixes: "hugetlb: use new vma_lock for pmd sharing synchronization"
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 mm/hugetlb.c | 38 ++++++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 10 deletions(-)

Comments

kernel test robot Oct. 5, 2022, 3:48 a.m. UTC | #1
Hi Mike,

I love your patch! Yet something to improve:

[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on next-20221004]
[cannot apply to linus/master v6.0]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Mike-Kravetz/hugetlb-fixes-for-new-vma-lock-series/20221005-091913
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: s390-defconfig
compiler: s390-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/49523e6ee01b312c4eebea201b3ac31836fb1227
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Mike-Kravetz/hugetlb-fixes-for-new-vma-lock-series/20221005-091913
        git checkout 49523e6ee01b312c4eebea201b3ac31836fb1227
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   s390-linux-ld: mm/hugetlb.o: in function `__unmap_hugepage_range_final':
>> mm/hugetlb.c:5196: undefined reference to `__hugetlb_vma_unlock_write_free'
   pahole: .tmp_vmlinux.btf: No such file or directory
   .btf.vmlinux.bin.o: file not recognized: file format not recognized


vim +5196 mm/hugetlb.c

  5177	
  5178	void __unmap_hugepage_range_final(struct mmu_gather *tlb,
  5179				  struct vm_area_struct *vma, unsigned long start,
  5180				  unsigned long end, struct page *ref_page,
  5181				  zap_flags_t zap_flags)
  5182	{
  5183		hugetlb_vma_lock_write(vma);
  5184		i_mmap_lock_write(vma->vm_file->f_mapping);
  5185	
  5186		__unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
  5187	
  5188		/*
  5189		 * Unlock and free the vma lock before releasing i_mmap_rwsem.  When
  5190		 * the vma_lock is freed, this makes the vma ineligible for pmd
  5191		 * sharing.  And, i_mmap_rwsem is required to set up pmd sharing.
  5192		 * This is important as page tables for this unmapped range will
  5193		 * be asynchrously deleted.  If the page tables are shared, there
  5194		 * will be issues when accessed by someone else.
  5195		 */
> 5196		__hugetlb_vma_unlock_write_free(vma);
  5197	
  5198		i_mmap_unlock_write(vma->vm_file->f_mapping);
  5199	}
  5200
kernel test robot Oct. 5, 2022, 6:58 a.m. UTC | #2
Hi Mike,

I love your patch! Perhaps something to improve:

[auto build test WARNING on akpm-mm/mm-everything]
[also build test WARNING on next-20221004]
[cannot apply to linus/master v6.0]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Mike-Kravetz/hugetlb-fixes-for-new-vma-lock-series/20221005-091913
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: i386-randconfig-a003-20221003
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/49523e6ee01b312c4eebea201b3ac31836fb1227
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Mike-Kravetz/hugetlb-fixes-for-new-vma-lock-series/20221005-091913
        git checkout 49523e6ee01b312c4eebea201b3ac31836fb1227
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> mm/hugetlb.c:6945:6: warning: no previous prototype for function '__hugetlb_vma_unlock_write_put' [-Wmissing-prototypes]
   void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
        ^
   mm/hugetlb.c:6945:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
   ^
   static 
   1 warning generated.


vim +/__hugetlb_vma_unlock_write_put +6945 mm/hugetlb.c

  6944	
> 6945	void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
  6946	{
  6947		struct vm_area_struct *vma = vma_lock->vma;
  6948	
  6949		/*
  6950		 * vma_lock structure may or not be released as a result of put,
  6951		 * it certainly will no longer be attached to vma so clear pointer.
  6952		 * Semaphore synchronizes access to vma_lock->vma field.
  6953		 */
  6954		vma_lock->vma = NULL;
  6955		vma->vm_private_data = NULL;
  6956		up_write(&vma_lock->rw_sema);
  6957		kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
  6958	}
  6959
Mike Kravetz Oct. 6, 2022, 3:30 a.m. UTC | #3
To address build issues:


From b6d359f77d32c7734fe4f00806a0ed1e99925534 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Wed, 5 Oct 2022 20:26:19 -0700
Subject: [PATCH 2/3] hugetlb: take hugetlb vma_lock when clearing
 vma_lock->vma pointer

hugetlb file truncation/hole punch code may need to back out and take
locks in order in the routine hugetlb_unmap_file_folio().  This code
could race with vma freeing as pointed out in [1] and result in
accessing a stale vma pointer.  To address this, take the vma_lock when
clearing the vma_lock->vma pointer.

[1] https://lore.kernel.org/linux-mm/01f10195-7088-4462-6def-909549c75ef4@huawei.com/

Fixes: "hugetlb: use new vma_lock for pmd sharing synchronization"
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
---
 mm/hugetlb.c | 42 ++++++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 38c45aafe00f..e9194ca6d44b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -93,6 +93,7 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
 static int hugetlb_acct_memory(struct hstate *h, long delta);
 static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
 static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
+static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
 
 static inline bool subpool_is_free(struct hugepage_subpool *spool)
 {
@@ -5192,8 +5193,7 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
 	 * be asynchrously deleted.  If the page tables are shared, there
 	 * will be issues when accessed by someone else.
 	 */
-	hugetlb_vma_unlock_write(vma);
-	hugetlb_vma_lock_free(vma);
+	__hugetlb_vma_unlock_write_free(vma);
 
 	i_mmap_unlock_write(vma->vm_file->f_mapping);
 }
@@ -6942,6 +6942,30 @@ void hugetlb_vma_lock_release(struct kref *kref)
 	kfree(vma_lock);
 }
 
+void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
+{
+	struct vm_area_struct *vma = vma_lock->vma;
+
+	/*
+	 * vma_lock structure may or not be released as a result of put,
+	 * it certainly will no longer be attached to vma so clear pointer.
+	 * Semaphore synchronizes access to vma_lock->vma field.
+	 */
+	vma_lock->vma = NULL;
+	vma->vm_private_data = NULL;
+	up_write(&vma_lock->rw_sema);
+	kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
+}
+
+static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
+{
+	if (__vma_shareable_flags_pmd(vma)) {
+		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+		__hugetlb_vma_unlock_write_put(vma_lock);
+	}
+}
+
 static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
 {
 	/*
@@ -6953,14 +6977,8 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
 	if (vma->vm_private_data) {
 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
 
-		/*
-		 * vma_lock structure may or not be released, but it
-		 * certainly will no longer be attached to vma so clear
-		 * pointer.
-		 */
-		vma_lock->vma = NULL;
-		kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
-		vma->vm_private_data = NULL;
+		down_write(&vma_lock->rw_sema);
+		__hugetlb_vma_unlock_write_put(vma_lock);
 	}
 }
 
@@ -7111,6 +7129,10 @@ void hugetlb_vma_lock_release(struct kref *kref)
 {
 }
 
+static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
+{
+}
+
 static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
 {
 }
Miaohe Lin Oct. 15, 2022, 1:32 a.m. UTC | #4
On 2022/10/6 11:30, Mike Kravetz wrote:
> To address build issues:
> 
> 
>>From b6d359f77d32c7734fe4f00806a0ed1e99925534 Mon Sep 17 00:00:00 2001
> From: Mike Kravetz <mike.kravetz@oracle.com>
> Date: Wed, 5 Oct 2022 20:26:19 -0700
> Subject: [PATCH 2/3] hugetlb: take hugetlb vma_lock when clearing
>  vma_lock->vma pointer
> 
> hugetlb file truncation/hole punch code may need to back out and take
> locks in order in the routine hugetlb_unmap_file_folio().  This code
> could race with vma freeing as pointed out in [1] and result in
> accessing a stale vma pointer.  To address this, take the vma_lock when
> clearing the vma_lock->vma pointer.
> 
> [1] https://lore.kernel.org/linux-mm/01f10195-7088-4462-6def-909549c75ef4@huawei.com/
> 
> Fixes: "hugetlb: use new vma_lock for pmd sharing synchronization"
> Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>

This patch looks good to me. Thanks.

Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>

Thanks,
Miaohe Lin
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0129d371800c..388a32b089bd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -93,6 +93,7 @@  struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
 static int hugetlb_acct_memory(struct hstate *h, long delta);
 static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
 static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
+static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
 
 static inline bool subpool_is_free(struct hugepage_subpool *spool)
 {
@@ -5188,8 +5189,7 @@  void __unmap_hugepage_range_final(struct mmu_gather *tlb,
 	 * be asynchrously deleted.  If the page tables are shared, there
 	 * will be issues when accessed by someone else.
 	 */
-	hugetlb_vma_unlock_write(vma);
-	hugetlb_vma_lock_free(vma);
+	__hugetlb_vma_unlock_write_free(vma);
 
 	i_mmap_unlock_write(vma->vm_file->f_mapping);
 }
@@ -6894,6 +6894,30 @@  void hugetlb_vma_lock_release(struct kref *kref)
 	kfree(vma_lock);
 }
 
+void __hugetlb_vma_unlock_write_put(struct hugetlb_vma_lock *vma_lock)
+{
+	struct vm_area_struct *vma = vma_lock->vma;
+
+	/*
+	 * vma_lock structure may or not be released as a result of put,
+	 * it certainly will no longer be attached to vma so clear pointer.
+	 * Semaphore synchronizes access to vma_lock->vma field.
+	 */
+	vma_lock->vma = NULL;
+	vma->vm_private_data = NULL;
+	up_write(&vma_lock->rw_sema);
+	kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
+}
+
+void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma)
+{
+	if (__vma_shareable_flags_pmd(vma)) {
+		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
+
+		__hugetlb_vma_unlock_write_put(vma_lock);
+	}
+}
+
 static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
 {
 	/*
@@ -6905,14 +6929,8 @@  static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
 	if (vma->vm_private_data) {
 		struct hugetlb_vma_lock *vma_lock = vma->vm_private_data;
 
-		/*
-		 * vma_lock structure may or not be released, but it
-		 * certainly will no longer be attached to vma so clear
-		 * pointer.
-		 */
-		vma_lock->vma = NULL;
-		kref_put(&vma_lock->refs, hugetlb_vma_lock_release);
-		vma->vm_private_data = NULL;
+		down_write(&vma_lock->rw_sema);
+		__hugetlb_vma_unlock_write_put(vma_lock);
 	}
 }