diff mbox series

[RFC,05/19] RMDA/siw: Convert to use vm_account

Message ID 4f8b1d54ab5d6909c46ba5470065f8326f9fcc62.1674538665.git-series.apopple@nvidia.com (mailing list archive)
State New
Headers show
Series mm: Introduce a cgroup to limit the amount of locked and pinned memory | expand

Commit Message

Alistair Popple Jan. 24, 2023, 5:42 a.m. UTC
Convert to using a vm_account structure to account pinned memory to
both the mm and the pins cgroup.

Signed-off-by: Alistair Popple <apopple@nvidia.com>
Cc: Bernard Metzler <bmt@zurich.ibm.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: linux-rdma@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 drivers/infiniband/sw/siw/siw.h       |  2 +-
 drivers/infiniband/sw/siw/siw_mem.c   | 20 ++++++--------------
 drivers/infiniband/sw/siw/siw_verbs.c | 15 ---------------
 3 files changed, 7 insertions(+), 30 deletions(-)

Comments

Jason Gunthorpe Jan. 24, 2023, 2:37 p.m. UTC | #1
On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote:

> @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
>  	if (!umem)
>  		return ERR_PTR(-ENOMEM);
>  
> -	mm_s = current->mm;
> -	umem->owning_mm = mm_s;
>  	umem->writable = writable;
>  
> -	mmgrab(mm_s);
> +	vm_account_init_current(&umem->vm_account);
>  
>  	if (writable)
>  		foll_flags |= FOLL_WRITE;
>  
> -	mmap_read_lock(mm_s);
> +	mmap_read_lock(current->mm);
>  
> -	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> -
> -	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
> +	if (vm_account_pinned(&umem->vm_account, num_pages)) {
>  		rv = -ENOMEM;
>  		goto out_sem_up;
>  	}
> @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
>  				goto out_sem_up;
>  
>  			umem->num_pages += rv;
> -			atomic64_add(rv, &mm_s->pinned_vm);

Also fixes the race bug

Jason
Bernard Metzler Jan. 24, 2023, 3:22 p.m. UTC | #2
> -----Original Message-----
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, 24 January 2023 15:37
> To: Alistair Popple <apopple@nvidia.com>
> Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux-
> kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com;
> hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com; daniel@ffwll.ch;
> Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>;
> linux-rdma@vger.kernel.org
> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use
> vm_account
> 
> On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote:
> 
> > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
> bool writable)
> >  	if (!umem)
> >  		return ERR_PTR(-ENOMEM);
> >
> > -	mm_s = current->mm;
> > -	umem->owning_mm = mm_s;
> >  	umem->writable = writable;
> >
> > -	mmgrab(mm_s);
> > +	vm_account_init_current(&umem->vm_account);
> >
> >  	if (writable)
> >  		foll_flags |= FOLL_WRITE;
> >
> > -	mmap_read_lock(mm_s);
> > +	mmap_read_lock(current->mm);
> >
> > -	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> > -
> > -	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
> > +	if (vm_account_pinned(&umem->vm_account, num_pages)) {
> >  		rv = -ENOMEM;
> >  		goto out_sem_up;
> >  	}
> > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
> bool writable)
> >  				goto out_sem_up;
> >
> >  			umem->num_pages += rv;
> > -			atomic64_add(rv, &mm_s->pinned_vm);
> 
> Also fixes the race bug
> 
True! Should have used atomic64_add_return() in the first place...

Bernard.
Bernard Metzler Jan. 24, 2023, 3:56 p.m. UTC | #3
> -----Original Message-----
> From: Jason Gunthorpe <jgg@nvidia.com>
> Sent: Tuesday, 24 January 2023 15:37
> To: Alistair Popple <apopple@nvidia.com>
> Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux-
> kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com;
> hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com; daniel@ffwll.ch;
> Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>;
> linux-rdma@vger.kernel.org
> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use
> vm_account
> 
> On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote:
> 
> > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
> bool writable)
> >  	if (!umem)
> >  		return ERR_PTR(-ENOMEM);
> >
> > -	mm_s = current->mm;
> > -	umem->owning_mm = mm_s;
> >  	umem->writable = writable;
> >
> > -	mmgrab(mm_s);
> > +	vm_account_init_current(&umem->vm_account);
> >
> >  	if (writable)
> >  		foll_flags |= FOLL_WRITE;
> >
> > -	mmap_read_lock(mm_s);
> > +	mmap_read_lock(current->mm);
> >
> > -	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> > -
> > -	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
> > +	if (vm_account_pinned(&umem->vm_account, num_pages)) {
> >  		rv = -ENOMEM;
> >  		goto out_sem_up;
> >  	}
> > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
> bool writable)
> >  				goto out_sem_up;
> >
> >  			umem->num_pages += rv;
> > -			atomic64_add(rv, &mm_s->pinned_vm);
> 
> Also fixes the race bug

But introduces another one. In that loop, umem->num_pages keeps the
number of pages currently pinned, not the target number. The current
patch uses that umem->num_pages to call vm_unaccount_pinned() in
siw_umem_release(). Bailing out before all pages are pinned would
mess up that accounting during release. Maybe introduce another
parameter to siw_umem_release(), or better have another umem member
'umem->num_pages_accounted' for correct accounting during release.

Bernard.
> 
> Jason
Alistair Popple Jan. 30, 2023, 11:34 a.m. UTC | #4
Bernard Metzler <BMT@zurich.ibm.com> writes:

>> -----Original Message-----
>> From: Jason Gunthorpe <jgg@nvidia.com>
>> Sent: Tuesday, 24 January 2023 15:37
>> To: Alistair Popple <apopple@nvidia.com>
>> Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux-
>> kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com;
>> hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com; daniel@ffwll.ch;
>> Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>;
>> linux-rdma@vger.kernel.org
>> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use
>> vm_account
>> 
>> On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote:
>> 
>> > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
>> bool writable)
>> >  	if (!umem)
>> >  		return ERR_PTR(-ENOMEM);
>> >
>> > -	mm_s = current->mm;
>> > -	umem->owning_mm = mm_s;
>> >  	umem->writable = writable;
>> >
>> > -	mmgrab(mm_s);
>> > +	vm_account_init_current(&umem->vm_account);
>> >
>> >  	if (writable)
>> >  		foll_flags |= FOLL_WRITE;
>> >
>> > -	mmap_read_lock(mm_s);
>> > +	mmap_read_lock(current->mm);
>> >
>> > -	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>> > -
>> > -	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
>> > +	if (vm_account_pinned(&umem->vm_account, num_pages)) {
>> >  		rv = -ENOMEM;
>> >  		goto out_sem_up;
>> >  	}
>> > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
>> bool writable)
>> >  				goto out_sem_up;
>> >
>> >  			umem->num_pages += rv;
>> > -			atomic64_add(rv, &mm_s->pinned_vm);
>> 
>> Also fixes the race bug
>
> But introduces another one. In that loop, umem->num_pages keeps the
> number of pages currently pinned, not the target number. The current
> patch uses that umem->num_pages to call vm_unaccount_pinned() in
> siw_umem_release(). Bailing out before all pages are pinned would
> mess up that accounting during release. Maybe introduce another
> parameter to siw_umem_release(), or better have another umem member
> 'umem->num_pages_accounted' for correct accounting during release.

Yes, I see the problem thanks for pointing it out. Will fix for the next
version.

> Bernard.
>> 
>> Jason
Bernard Metzler Jan. 30, 2023, 1:27 p.m. UTC | #5
> -----Original Message-----
> From: Alistair Popple <apopple@nvidia.com>
> Sent: Monday, 30 January 2023 12:35
> To: Bernard Metzler <BMT@zurich.ibm.com>
> Cc: Jason Gunthorpe <jgg@nvidia.com>; linux-mm@kvack.org;
> cgroups@vger.kernel.org; linux-kernel@vger.kernel.org; jhubbard@nvidia.com;
> tjmercier@google.com; hannes@cmpxchg.org; surenb@google.com;
> mkoutny@suse.com; daniel@ffwll.ch; Leon Romanovsky <leon@kernel.org>;
> linux-rdma@vger.kernel.org
> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use
> vm_account
> 
> 
> Bernard Metzler <BMT@zurich.ibm.com> writes:
> 
> >> -----Original Message-----
> >> From: Jason Gunthorpe <jgg@nvidia.com>
> >> Sent: Tuesday, 24 January 2023 15:37
> >> To: Alistair Popple <apopple@nvidia.com>
> >> Cc: linux-mm@kvack.org; cgroups@vger.kernel.org; linux-
> >> kernel@vger.kernel.org; jhubbard@nvidia.com; tjmercier@google.com;
> >> hannes@cmpxchg.org; surenb@google.com; mkoutny@suse.com;
> daniel@ffwll.ch;
> >> Bernard Metzler <BMT@zurich.ibm.com>; Leon Romanovsky <leon@kernel.org>;
> >> linux-rdma@vger.kernel.org
> >> Subject: [EXTERNAL] Re: [RFC PATCH 05/19] RMDA/siw: Convert to use
> >> vm_account
> >>
> >> On Tue, Jan 24, 2023 at 04:42:34PM +1100, Alistair Popple wrote:
> >>
> >> > @@ -385,20 +382,16 @@ struct siw_umem *siw_umem_get(u64 start, u64
> len,
> >> bool writable)
> >> >  	if (!umem)
> >> >  		return ERR_PTR(-ENOMEM);
> >> >
> >> > -	mm_s = current->mm;
> >> > -	umem->owning_mm = mm_s;
> >> >  	umem->writable = writable;
> >> >
> >> > -	mmgrab(mm_s);
> >> > +	vm_account_init_current(&umem->vm_account);
> >> >
> >> >  	if (writable)
> >> >  		foll_flags |= FOLL_WRITE;
> >> >
> >> > -	mmap_read_lock(mm_s);
> >> > +	mmap_read_lock(current->mm);
> >> >
> >> > -	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
> >> > -
> >> > -	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
> >> > +	if (vm_account_pinned(&umem->vm_account, num_pages)) {
> >> >  		rv = -ENOMEM;
> >> >  		goto out_sem_up;
> >> >  	}
> >> > @@ -429,7 +422,6 @@ struct siw_umem *siw_umem_get(u64 start, u64 len,
> >> bool writable)
> >> >  				goto out_sem_up;
> >> >
> >> >  			umem->num_pages += rv;
> >> > -			atomic64_add(rv, &mm_s->pinned_vm);
> >>
> >> Also fixes the race bug
> >
> > But introduces another one. In that loop, umem->num_pages keeps the
> > number of pages currently pinned, not the target number. The current
> > patch uses that umem->num_pages to call vm_unaccount_pinned() in
> > siw_umem_release(). Bailing out before all pages are pinned would
> > mess up that accounting during release. Maybe introduce another
> > parameter to siw_umem_release(), or better have another umem member
> > 'umem->num_pages_accounted' for correct accounting during release.
> 
> Yes, I see the problem thanks for pointing it out. Will fix for the next
> version.

Thank you! Let me send a patch to the original code,
just checking if not all pages are pinned and fix the
counter accordingly. Maybe you can go from there..?

Thank you,
Bernard.
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 2f3a9cd..0c4a3ec 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -124,7 +124,7 @@  struct siw_umem {
 	int num_pages;
 	bool writable;
 	u64 fp_addr; /* First page base address */
-	struct mm_struct *owning_mm;
+	struct vm_account vm_account;
 };
 
 struct siw_pble {
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index b2b33dd..9c53fc3 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -68,7 +68,6 @@  static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
 
 void siw_umem_release(struct siw_umem *umem, bool dirty)
 {
-	struct mm_struct *mm_s = umem->owning_mm;
 	int i, num_pages = umem->num_pages;
 
 	for (i = 0; num_pages; i++) {
@@ -79,9 +78,9 @@  void siw_umem_release(struct siw_umem *umem, bool dirty)
 		kfree(umem->page_chunk[i].plist);
 		num_pages -= to_free;
 	}
-	atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
+	vm_unaccount_pinned(&umem->vm_account, umem->num_pages);
+	vm_account_release(&umem->vm_account);
 
-	mmdrop(mm_s);
 	kfree(umem->page_chunk);
 	kfree(umem);
 }
@@ -365,9 +364,7 @@  struct siw_pbl *siw_pbl_alloc(u32 num_buf)
 struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 {
 	struct siw_umem *umem;
-	struct mm_struct *mm_s;
 	u64 first_page_va;
-	unsigned long mlock_limit;
 	unsigned int foll_flags = FOLL_LONGTERM;
 	int num_pages, num_chunks, i, rv = 0;
 
@@ -385,20 +382,16 @@  struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 	if (!umem)
 		return ERR_PTR(-ENOMEM);
 
-	mm_s = current->mm;
-	umem->owning_mm = mm_s;
 	umem->writable = writable;
 
-	mmgrab(mm_s);
+	vm_account_init_current(&umem->vm_account);
 
 	if (writable)
 		foll_flags |= FOLL_WRITE;
 
-	mmap_read_lock(mm_s);
+	mmap_read_lock(current->mm);
 
-	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
+	if (vm_account_pinned(&umem->vm_account, num_pages)) {
 		rv = -ENOMEM;
 		goto out_sem_up;
 	}
@@ -429,7 +422,6 @@  struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 				goto out_sem_up;
 
 			umem->num_pages += rv;
-			atomic64_add(rv, &mm_s->pinned_vm);
 			first_page_va += rv * PAGE_SIZE;
 			nents -= rv;
 			got += rv;
@@ -437,7 +429,7 @@  struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 		num_pages -= got;
 	}
 out_sem_up:
-	mmap_read_unlock(mm_s);
+	mmap_read_unlock(current->mm);
 
 	if (rv > 0)
 		return umem;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 906fde1..8fab009 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1321,8 +1321,6 @@  struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
 	struct siw_umem *umem = NULL;
 	struct siw_ureq_reg_mr ureq;
 	struct siw_device *sdev = to_siw_dev(pd->device);
-
-	unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK);
 	int rv;
 
 	siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n",
@@ -1338,19 +1336,6 @@  struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
 		rv = -EINVAL;
 		goto err_out;
 	}
-	if (mem_limit != RLIM_INFINITY) {
-		unsigned long num_pages =
-			(PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
-		mem_limit >>= PAGE_SHIFT;
-
-		if (num_pages > mem_limit - current->mm->locked_vm) {
-			siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
-				   num_pages, mem_limit,
-				   current->mm->locked_vm);
-			rv = -ENOMEM;
-			goto err_out;
-		}
-	}
 	umem = siw_umem_get(start, len, ib_access_writable(rights));
 	if (IS_ERR(umem)) {
 		rv = PTR_ERR(umem);