diff mbox series

[6/6] drivers/IB,core: reduce scope of mmap_sem

Message ID 20190121174220.10583-7-dave@stgolabs.net (mailing list archive)
State Superseded
Delegated to: Jason Gunthorpe
Headers show
Series mm: make pinned_vm atomic and simplify users | expand

Commit Message

Davidlohr Bueso Jan. 21, 2019, 5:42 p.m. UTC
ib_umem_get() uses gup_longterm() and relies on the lock to
stabilze the vma_list, so we cannot really get rid of mmap_sem
altogether, but now that the counter is atomic, we can get of
some complexity that mmap_sem brings with only pinned_vm.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 drivers/infiniband/core/umem.c | 41 ++---------------------------------------
 1 file changed, 2 insertions(+), 39 deletions(-)

Comments

Jason Gunthorpe Jan. 21, 2019, 6:32 p.m. UTC | #1
On Mon, Jan 21, 2019 at 09:42:20AM -0800, Davidlohr Bueso wrote:
> ib_umem_get() uses gup_longterm() and relies on the lock to
> stabilze the vma_list, so we cannot really get rid of mmap_sem
> altogether, but now that the counter is atomic, we can get of
> some complexity that mmap_sem brings with only pinned_vm.
> 
> Reviewed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
> ---
>  drivers/infiniband/core/umem.c | 41 ++---------------------------------------
>  1 file changed, 2 insertions(+), 39 deletions(-)

I think this addresses my comment..

Considering that it is almost all infiniband, I'd rather it go it go
through the RDMA tree with an ack from mm people? Please advise..

Thanks,
Jason
Davidlohr Bueso Jan. 21, 2019, 7:12 p.m. UTC | #2
On Mon, 21 Jan 2019, Jason Gunthorpe wrote:

>On Mon, Jan 21, 2019 at 09:42:20AM -0800, Davidlohr Bueso wrote:
>> ib_umem_get() uses gup_longterm() and relies on the lock to
>> stabilze the vma_list, so we cannot really get rid of mmap_sem
>> altogether, but now that the counter is atomic, we can get of
>> some complexity that mmap_sem brings with only pinned_vm.
>>
>> Reviewed-by: Ira Weiny <ira.weiny@intel.com>
>> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
>> ---
>>  drivers/infiniband/core/umem.c | 41 ++---------------------------------------
>>  1 file changed, 2 insertions(+), 39 deletions(-)
>
>I think this addresses my comment..
>
>Considering that it is almost all infiniband, I'd rather it go it go
>through the RDMA tree with an ack from mm people? Please advise..

Yeah also Cc'ing Willy who I forgot to add for v2.

>
>Thanks,
>Jason
Christoph Lameter (Ampere) Jan. 21, 2019, 9:53 p.m. UTC | #3
On Mon, 21 Jan 2019, Davidlohr Bueso wrote:

> ib_umem_get() uses gup_longterm() and relies on the lock to
> stabilze the vma_list, so we cannot really get rid of mmap_sem
> altogether, but now that the counter is atomic, we can get of
> some complexity that mmap_sem brings with only pinned_vm.

Reviewd-by: Christoph Lameter <cl@linux.com>
diff mbox series

Patch

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 678abe1afcba..b69d3efa8712 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -165,15 +165,12 @@  struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 
 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
-	down_write(&mm->mmap_sem);
-	new_pinned = atomic64_read(&mm->pinned_vm) + npages;
+	new_pinned = atomic64_add_return(npages, &mm->pinned_vm);
 	if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
-		up_write(&mm->mmap_sem);
+		atomic64_sub(npages, &mm->pinned_vm);
 		ret = -ENOMEM;
 		goto out;
 	}
-	atomic64_set(&mm->pinned_vm, new_pinned);
-	up_write(&mm->mmap_sem);
 
 	cur_base = addr & PAGE_MASK;
 
@@ -233,9 +230,7 @@  struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 umem_release:
 	__ib_umem_release(context->device, umem, 0);
 vma:
-	down_write(&mm->mmap_sem);
 	atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
-	up_write(&mm->mmap_sem);
 out:
 	if (vma_list)
 		free_page((unsigned long) vma_list);
@@ -258,25 +253,12 @@  static void __ib_umem_release_tail(struct ib_umem *umem)
 		kfree(umem);
 }
 
-static void ib_umem_release_defer(struct work_struct *work)
-{
-	struct ib_umem *umem = container_of(work, struct ib_umem, work);
-
-	down_write(&umem->owning_mm->mmap_sem);
-	atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
-	up_write(&umem->owning_mm->mmap_sem);
-
-	__ib_umem_release_tail(umem);
-}
-
 /**
  * ib_umem_release - release memory pinned with ib_umem_get
  * @umem: umem struct to release
  */
 void ib_umem_release(struct ib_umem *umem)
 {
-	struct ib_ucontext *context = umem->context;
-
 	if (umem->is_odp) {
 		ib_umem_odp_release(to_ib_umem_odp(umem));
 		__ib_umem_release_tail(umem);
@@ -285,26 +267,7 @@  void ib_umem_release(struct ib_umem *umem)
 
 	__ib_umem_release(umem->context->device, umem, 1);
 
-	/*
-	 * We may be called with the mm's mmap_sem already held.  This
-	 * can happen when a userspace munmap() is the call that drops
-	 * the last reference to our file and calls our release
-	 * method.  If there are memory regions to destroy, we'll end
-	 * up here and not be able to take the mmap_sem.  In that case
-	 * we defer the vm_locked accounting a workqueue.
-	 */
-	if (context->closing) {
-		if (!down_write_trylock(&umem->owning_mm->mmap_sem)) {
-			INIT_WORK(&umem->work, ib_umem_release_defer);
-			queue_work(ib_wq, &umem->work);
-			return;
-		}
-	} else {
-		down_write(&umem->owning_mm->mmap_sem);
-	}
 	atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
-	up_write(&umem->owning_mm->mmap_sem);
-
 	__ib_umem_release_tail(umem);
 }
 EXPORT_SYMBOL(ib_umem_release);