[v6,07/12] mmap locking API: add mmap_read_trylock_non_owner()
diff mbox series

Message ID 20200520052908.204642-8-walken@google.com
State New
Headers show
Series
  • Add a new mmap locking API wrapping mmap_sem calls
Related show

Commit Message

Michel Lespinasse May 20, 2020, 5:29 a.m. UTC
Add a couple APIs used by kernel/bpf/stackmap.c only:
- mmap_read_trylock_non_owner()
- mmap_read_unlock_non_owner() (may be called from a work queue).

It's still not ideal that bpf/stackmap subverts the lock ownership
in this way. Thanks to Peter Zijlstra for suggesting this API as the
least-ugly way of addressing this in the short term.

Signed-off-by: Michel Lespinasse <walken@google.com>
Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mmap_lock.h | 14 ++++++++++++++
 kernel/bpf/stackmap.c     | 17 +++++------------
 2 files changed, 19 insertions(+), 12 deletions(-)

Comments

Davidlohr Bueso May 22, 2020, 2:15 p.m. UTC | #1
On Tue, 19 May 2020, Michel Lespinasse wrote:

>Add a couple APIs used by kernel/bpf/stackmap.c only:
>- mmap_read_trylock_non_owner()
>- mmap_read_unlock_non_owner() (may be called from a work queue).
>
>It's still not ideal that bpf/stackmap subverts the lock ownership
>in this way. Thanks to Peter Zijlstra for suggesting this API as the
>least-ugly way of addressing this in the short term.
>
>Signed-off-by: Michel Lespinasse <walken@google.com>
>Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
>Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

Sigh, bpf, but ok.

Reviewed-by: Davidlohr Bueso <dbueso@suse.de>

>---
> include/linux/mmap_lock.h | 14 ++++++++++++++
> kernel/bpf/stackmap.c     | 17 +++++------------
> 2 files changed, 19 insertions(+), 12 deletions(-)
>
>diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
>index a757cb30ae77..d1826ce42f00 100644
>--- a/include/linux/mmap_lock.h
>+++ b/include/linux/mmap_lock.h
>@@ -56,4 +56,18 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
> 	up_read(&mm->mmap_sem);
> }
>
>+static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
>+{
>+	if (down_read_trylock(&mm->mmap_sem)) {
>+		rwsem_release(&mm->mmap_sem.dep_map, _RET_IP_);
>+		return true;
>+	}
>+	return false;
>+}
>+
>+static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
>+{
>+	up_read_non_owner(&mm->mmap_sem);
>+}
>+
> #endif /* _LINUX_MMAP_LOCK_H */
>diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
>index 11d41f0c7005..998968659892 100644
>--- a/kernel/bpf/stackmap.c
>+++ b/kernel/bpf/stackmap.c
>@@ -33,7 +33,7 @@ struct bpf_stack_map {
> /* irq_work to run up_read() for build_id lookup in nmi context */
> struct stack_map_irq_work {
> 	struct irq_work irq_work;
>-	struct rw_semaphore *sem;
>+	struct mm_struct *mm;
> };
>
> static void do_up_read(struct irq_work *entry)
>@@ -44,8 +44,7 @@ static void do_up_read(struct irq_work *entry)
> 		return;
>
> 	work = container_of(entry, struct stack_map_irq_work, irq_work);
>-	up_read_non_owner(work->sem);
>-	work->sem = NULL;
>+	mmap_read_unlock_non_owner(work->mm);
> }
>
> static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
>@@ -317,7 +316,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
> 	 * with build_id.
> 	 */
> 	if (!user || !current || !current->mm || irq_work_busy ||
>-	    mmap_read_trylock(current->mm) == 0) {
>+	    !mmap_read_trylock_non_owner(current->mm)) {
> 		/* cannot access current->mm, fall back to ips */
> 		for (i = 0; i < trace_nr; i++) {
> 			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
>@@ -342,16 +341,10 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
> 	}
>
> 	if (!work) {
>-		mmap_read_unlock(current->mm);
>+		mmap_read_unlock_non_owner(current->mm);
> 	} else {
>-		work->sem = &current->mm->mmap_sem;
>+		work->mm = current->mm;
> 		irq_work_queue(&work->irq_work);
>-		/*
>-		 * The irq_work will release the mmap_sem with
>-		 * up_read_non_owner(). The rwsem_release() is called
>-		 * here to release the lock from lockdep's perspective.
>-		 */
>-		rwsem_release(&current->mm->mmap_sem.dep_map, _RET_IP_);
> 	}
> }
>
>-- 
>2.26.2.761.g0e0b3e54be-goog
>

Patch
diff mbox series

diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index a757cb30ae77..d1826ce42f00 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -56,4 +56,18 @@  static inline void mmap_read_unlock(struct mm_struct *mm)
 	up_read(&mm->mmap_sem);
 }
 
+static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
+{
+	if (down_read_trylock(&mm->mmap_sem)) {
+		rwsem_release(&mm->mmap_sem.dep_map, _RET_IP_);
+		return true;
+	}
+	return false;
+}
+
+static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
+{
+	up_read_non_owner(&mm->mmap_sem);
+}
+
 #endif /* _LINUX_MMAP_LOCK_H */
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 11d41f0c7005..998968659892 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -33,7 +33,7 @@  struct bpf_stack_map {
 /* irq_work to run up_read() for build_id lookup in nmi context */
 struct stack_map_irq_work {
 	struct irq_work irq_work;
-	struct rw_semaphore *sem;
+	struct mm_struct *mm;
 };
 
 static void do_up_read(struct irq_work *entry)
@@ -44,8 +44,7 @@  static void do_up_read(struct irq_work *entry)
 		return;
 
 	work = container_of(entry, struct stack_map_irq_work, irq_work);
-	up_read_non_owner(work->sem);
-	work->sem = NULL;
+	mmap_read_unlock_non_owner(work->mm);
 }
 
 static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
@@ -317,7 +316,7 @@  static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 	 * with build_id.
 	 */
 	if (!user || !current || !current->mm || irq_work_busy ||
-	    mmap_read_trylock(current->mm) == 0) {
+	    !mmap_read_trylock_non_owner(current->mm)) {
 		/* cannot access current->mm, fall back to ips */
 		for (i = 0; i < trace_nr; i++) {
 			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
@@ -342,16 +341,10 @@  static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 	}
 
 	if (!work) {
-		mmap_read_unlock(current->mm);
+		mmap_read_unlock_non_owner(current->mm);
 	} else {
-		work->sem = &current->mm->mmap_sem;
+		work->mm = current->mm;
 		irq_work_queue(&work->irq_work);
-		/*
-		 * The irq_work will release the mmap_sem with
-		 * up_read_non_owner(). The rwsem_release() is called
-		 * here to release the lock from lockdep's perspective.
-		 */
-		rwsem_release(&current->mm->mmap_sem.dep_map, _RET_IP_);
 	}
 }