diff mbox series

[RFC,17/24] x86 fault handler: implement range locking

Message ID 20200224203057.162467-18-walken@google.com (mailing list archive)
State New, archived
Headers show
Series Fine grained MM locking | expand

Commit Message

Michel Lespinasse Feb. 24, 2020, 8:30 p.m. UTC
Change the x86 fault handler to implement range locking.

Initially we try to lock a pmd sized range around the faulting address,
which is appropriate for anon vmas. After finding the correct vma for
the faulting address, we verify that it is anonymous and fall back to
a coarse grained lock if necessary. If a fine grained lock is workable,
we copy the vma of record into a pseudo-vma and release the mm_vma_lock
before handling the fault.

Signed-off-by: Michel Lespinasse <walken@google.com>
---
 arch/x86/mm/fault.c | 40 ++++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git arch/x86/mm/fault.c arch/x86/mm/fault.c
index 52333272e14e..1e37284d373c 100644
--- arch/x86/mm/fault.c
+++ arch/x86/mm/fault.c
@@ -941,6 +941,7 @@  bad_area(struct pt_regs *regs, unsigned long error_code,
 	 unsigned long address, struct vm_area_struct *vma,
 	 struct mm_lock_range *range)
 {
+	struct mm_struct *mm;
 	u32 pkey = 0;
 	int si_code = SEGV_MAPERR;
 
@@ -984,7 +985,10 @@  bad_area(struct pt_regs *regs, unsigned long error_code,
 	 * Something tried to access memory that isn't in our memory map..
 	 * Fix it, but check if it's kernel or user first..
 	 */
-	mm_read_range_unlock(current->mm, range);
+	mm = current->mm;
+	if (!mm_range_is_coarse(range))
+		mm_vma_unlock(mm);
+	mm_read_range_unlock(mm, range);
 
 	__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
 }
@@ -1278,7 +1282,7 @@  void do_user_addr_fault(struct pt_regs *regs,
 			unsigned long hw_error_code,
 			unsigned long address)
 {
-	struct mm_lock_range *range;
+	struct mm_lock_range pmd_range, *range;
 	struct vm_area_struct pvma, *vma;
 	struct task_struct *tsk;
 	struct mm_struct *mm;
@@ -1363,7 +1367,10 @@  void do_user_addr_fault(struct pt_regs *regs,
 	}
 #endif
 
-	range = mm_coarse_lock_range();
+	mm_init_lock_range(&pmd_range,
+			   address & PMD_MASK,
+			   (address & PMD_MASK) + PMD_SIZE);
+	range = &pmd_range;
 
 	/*
 	 * Kernel-mode access to the user address space should only occur
@@ -1397,6 +1404,8 @@  void do_user_addr_fault(struct pt_regs *regs,
 		might_sleep();
 	}
 
+	if (!mm_range_is_coarse(range))
+		mm_vma_lock(mm);
 	vma = find_vma(mm, address);
 	if (unlikely(!vma)) {
 		bad_area(regs, hw_error_code, address, NULL, range);
@@ -1408,6 +1417,10 @@  void do_user_addr_fault(struct pt_regs *regs,
 		bad_area(regs, hw_error_code, address, NULL, range);
 		return;
 	}
+	/*
+	 * Note that if range is fine grained, we can still safely call
+	 * expand_stack as we are protected by the mm_vma_lock().
+	 */
 	if (unlikely(expand_stack(vma, address))) {
 		bad_area(regs, hw_error_code, address, NULL, range);
 		return;
@@ -1423,23 +1436,34 @@  void do_user_addr_fault(struct pt_regs *regs,
 		return;
 	}
 
-	if (vma_is_anonymous(vma)) {
+	if (!mm_range_is_coarse(range)) {
 		/*
 		 * Allocate anon_vma if needed.
 		 * This needs to operate on the vma of record.
 		 */
 		fault = prepare_mm_fault(vma, flags);
-		if (fault)
-			goto got_fault;
 
 		/*
 		 * Copy vma attributes into a pseudo-vma.
-		 * This will be required when using fine grained locks.
+		 * The vma of record is only valid until mm_vma_unlock().
 		 */
 		pvma = *vma;
 		vma = &pvma;
-	}
+		mm_vma_unlock(mm);
 
+		if (fault)
+			goto got_fault;
+
+		/*
+		 * Fall back to locking the entire MM
+		 * when operating on file vma.
+		 */
+		if (!vma_is_anonymous(vma)) {
+			mm_read_range_unlock(mm, range);
+			range = mm_coarse_lock_range();
+			goto retry;
+		}
+	}
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo