diff mbox series

[RFC] IB/core: Ensure an invalidate_range callback on ODP MR

Message ID 20190304184108.29340-1-ira.weiny@intel.com (mailing list archive)
State Superseded
Headers show
Series [RFC] IB/core: Ensure an invalidate_range callback on ODP MR | expand

Commit Message

Ira Weiny March 4, 2019, 6:41 p.m. UTC
From: Ira Weiny <ira.weiny@intel.com>

No device supports ODP MR without an invalidate_range callback.

Warn on any any device which attempts to support ODP without
supplying this callback.

Then we can remove the checks for the callback within the code.

This stems from the discussion

https://www.spinics.net/lists/linux-rdma/msg76460.html

...which concluded this code was no longer necessary.

CC: Haggai Eran <haggaie@mellanox.com>
CC: Artemy Kovalyov <artemyko@mellanox.com>
CC: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/infiniband/core/umem.c     |  5 +++++
 drivers/infiniband/core/umem_odp.c | 11 ++++-------
 2 files changed, 9 insertions(+), 7 deletions(-)

Comments

John Hubbard March 6, 2019, 12:06 a.m. UTC | #1
On 3/4/19 10:41 AM, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> No device supports ODP MR without an invalidate_range callback.
> 
> Warn on any any device which attempts to support ODP without
> supplying this callback.
> 
> Then we can remove the checks for the callback within the code.
> 
> This stems from the discussion
> 
> https://www.spinics.net/lists/linux-rdma/msg76460.html
> 
> ...which concluded this code was no longer necessary.
> 
> CC: Haggai Eran <haggaie@mellanox.com>
> CC: Artemy Kovalyov <artemyko@mellanox.com>
> CC: John Hubbard <jhubbard@nvidia.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> ---
>  drivers/infiniband/core/umem.c     |  5 +++++
>  drivers/infiniband/core/umem_odp.c | 11 ++++-------
>  2 files changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
> index fe5551562dbc..89a7d57f9fa5 100644
> --- a/drivers/infiniband/core/umem.c
> +++ b/drivers/infiniband/core/umem.c
> @@ -138,6 +138,11 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>  	mmgrab(mm);
>  
>  	if (access & IB_ACCESS_ON_DEMAND) {
> +		if (WARN_ON_ONCE(!context->invalidate_range)) {
> +			ret = -EINVAL;
> +			goto umem_kfree;
> +		}
> +
>  		ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
>  		if (ret)
>  			goto umem_kfree;
> diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
> index 6013cf0b8f4f..aa7f95633581 100644
> --- a/drivers/infiniband/core/umem_odp.c
> +++ b/drivers/infiniband/core/umem_odp.c
> @@ -240,7 +240,7 @@ static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
>  	per_mm->mm = mm;
>  	per_mm->umem_tree = RB_ROOT_CACHED;
>  	init_rwsem(&per_mm->umem_rwsem);
> -	per_mm->active = ctx->invalidate_range;
> +	per_mm->active = true;
>  
>  	rcu_read_lock();
>  	per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
> @@ -539,11 +539,10 @@ static int ib_umem_odp_map_dma_single_page(
>  	}
>  
>  out:
> -	/* On Demand Paging - avoid pinning the page */
> -	if (umem->context->invalidate_range || !stored_page)
> +	if (!stored_page)
>  		put_page(page);

Hi Ira,

This would introduce a change in behavior, because previously, the first
side of the "or" phrase was effectively always true. In other words, the
code has, so far, been just doing this:

		put_page();

...unconditionally. (Again, because "umem->context->invalidate_range" was
always true. The "!stored_page" never got even evaluated.)

In other words, if a == true, you cannot replace "a || b" with "b". You have
to replace it with "true".

I don't know what the !stored_page is about without looking more closely,
but to avoid regressions you'd be safely with just doing an unconditional
put_page(), I think.


thanks,
Haggai Eran March 6, 2019, 7:53 a.m. UTC | #2
On 3/6/2019 2:06 AM, John Hubbard wrote:
> On 3/4/19 10:41 AM, ira.weiny@intel.com wrote:
>> From: Ira Weiny <ira.weiny@intel.com>
>>
>> No device supports ODP MR without an invalidate_range callback.
>>
>> Warn on any any device which attempts to support ODP without
>> supplying this callback.
>>
>> Then we can remove the checks for the callback within the code.
>>
>> This stems from the discussion
>>
>> https://www.spinics.net/lists/linux-rdma/msg76460.html
>>
>> ...which concluded this code was no longer necessary.
>>
>> CC: Haggai Eran <haggaie@mellanox.com>
>> CC: Artemy Kovalyov <artemyko@mellanox.com>
>> CC: John Hubbard <jhubbard@nvidia.com>
>> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
>> ---
>>   drivers/infiniband/core/umem.c     |  5 +++++
>>   drivers/infiniband/core/umem_odp.c | 11 ++++-------
>>   2 files changed, 9 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
>> index fe5551562dbc..89a7d57f9fa5 100644
>> --- a/drivers/infiniband/core/umem.c
>> +++ b/drivers/infiniband/core/umem.c
>> @@ -138,6 +138,11 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
>>   	mmgrab(mm);
>>   
>>   	if (access & IB_ACCESS_ON_DEMAND) {
>> +		if (WARN_ON_ONCE(!context->invalidate_range)) {
>> +			ret = -EINVAL;
>> +			goto umem_kfree;
>> +		}
>> +
>>   		ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
>>   		if (ret)
>>   			goto umem_kfree;
>> diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
>> index 6013cf0b8f4f..aa7f95633581 100644
>> --- a/drivers/infiniband/core/umem_odp.c
>> +++ b/drivers/infiniband/core/umem_odp.c
>> @@ -240,7 +240,7 @@ static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
>>   	per_mm->mm = mm;
>>   	per_mm->umem_tree = RB_ROOT_CACHED;
>>   	init_rwsem(&per_mm->umem_rwsem);
>> -	per_mm->active = ctx->invalidate_range;
>> +	per_mm->active = true;
>>   
>>   	rcu_read_lock();
>>   	per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
>> @@ -539,11 +539,10 @@ static int ib_umem_odp_map_dma_single_page(
>>   	}
>>   
>>   out:
>> -	/* On Demand Paging - avoid pinning the page */
>> -	if (umem->context->invalidate_range || !stored_page)
>> +	if (!stored_page)
>>   		put_page(page);
> 
> Hi Ira,
> 
> This would introduce a change in behavior, because previously, the first
> side of the "or" phrase was effectively always true. In other words, the
> code has, so far, been just doing this:
> 
> 		put_page();
> 
> ...unconditionally. (Again, because "umem->context->invalidate_range" was
> always true. The "!stored_page" never got even evaluated.)
> 
> In other words, if a == true, you cannot replace "a || b" with "b". You have
> to replace it with "true".
> 
> I don't know what the !stored_page is about without looking more closely,
> but to avoid regressions you'd be safely with just doing an unconditional
> put_page(), I think.

Right. stored_page is there for on-demand pinning, to release the 
reference on the page in case the page was already in the page list. You 
can also remove stored_page along with this patch.

> @@ -748,9 +747,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
>  				 */
>  				set_page_dirty(head_page);
>  			}
> -			/* on demand pinning support */
> -			if (!umem->context->invalidate_range)
> -				put_page(page);
> +			put_page(page);
>  			umem_odp->page_list[idx] = NULL;
>  			umem_odp->dma_list[idx] = 0;
>  			umem->npages--;

I think this is also wrong. If invalidate range is always expected to be 
valid when calling unmap_dmap_pages, then put_page(page) is never 
called, so you can just remove it.

Regards,
Haggai
Jason Gunthorpe March 6, 2019, 6:28 p.m. UTC | #3
On Wed, Mar 06, 2019 at 03:28:19PM +0000, Weiny, Ira wrote:
> > On 3/6/2019 2:06 AM, John Hubbard wrote:
> > > On 3/4/19 10:41 AM, ira.weiny@intel.com wrote:
> > >> From: Ira Weiny <ira.weiny@intel.com>
> > >>
> > >> No device supports ODP MR without an invalidate_range callback.
> > >>
> > >> Warn on any any device which attempts to support ODP without
> > >> supplying this callback.
> > >>
> > >> Then we can remove the checks for the callback within the code.
> > >>
> > >> This stems from the discussion
> > >>
> > >> https://www.spinics.net/lists/linux-rdma/msg76460.html
> > >>
> > >> ...which concluded this code was no longer necessary.
> > >>
> > >> CC: Haggai Eran <haggaie@mellanox.com>
> > >> CC: Artemy Kovalyov <artemyko@mellanox.com>
> > >> CC: John Hubbard <jhubbard@nvidia.com>
> > >> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > >>   drivers/infiniband/core/umem.c     |  5 +++++
> > >>   drivers/infiniband/core/umem_odp.c | 11 ++++-------
> > >>   2 files changed, 9 insertions(+), 7 deletions(-)
> > >>
> > >> diff --git a/drivers/infiniband/core/umem.c
> > >> b/drivers/infiniband/core/umem.c index fe5551562dbc..89a7d57f9fa5
> > >> 100644
> > >> +++ b/drivers/infiniband/core/umem.c
> > >> @@ -138,6 +138,11 @@ struct ib_umem *ib_umem_get(struct ib_udata
> > *udata, unsigned long addr,
> > >>   	mmgrab(mm);
> > >>
> > >>   	if (access & IB_ACCESS_ON_DEMAND) {
> > >> +		if (WARN_ON_ONCE(!context->invalidate_range)) {
> > >> +			ret = -EINVAL;
> > >> +			goto umem_kfree;
> > >> +		}
> > >> +
> > >>   		ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
> > >>   		if (ret)
> > >>   			goto umem_kfree;
> > >> diff --git a/drivers/infiniband/core/umem_odp.c
> > >> b/drivers/infiniband/core/umem_odp.c
> > >> index 6013cf0b8f4f..aa7f95633581 100644
> > >> +++ b/drivers/infiniband/core/umem_odp.c
> > >> @@ -240,7 +240,7 @@ static struct ib_ucontext_per_mm
> > *alloc_per_mm(struct ib_ucontext *ctx,
> > >>   	per_mm->mm = mm;
> > >>   	per_mm->umem_tree = RB_ROOT_CACHED;
> > >>   	init_rwsem(&per_mm->umem_rwsem);
> > >> -	per_mm->active = ctx->invalidate_range;
> > >> +	per_mm->active = true;
> > >>
> > >>   	rcu_read_lock();
> > >>   	per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
> > >> @@ -539,11 +539,10 @@ static int
> > ib_umem_odp_map_dma_single_page(
> > >>   	}
> > >>
> > >>   out:
> > >> -	/* On Demand Paging - avoid pinning the page */
> > >> -	if (umem->context->invalidate_range || !stored_page)
> > >> +	if (!stored_page)
> > >>   		put_page(page);
> > >
> > > Hi Ira,
> > >
> > > This would introduce a change in behavior, because previously, the
> > > first side of the "or" phrase was effectively always true. In other
> > > words, the code has, so far, been just doing this:
> > >
> > > 		put_page();
> > >
> > > ...unconditionally. (Again, because "umem->context->invalidate_range"
> > > was always true. The "!stored_page" never got even evaluated.)
> > >
> > > In other words, if a == true, you cannot replace "a || b" with "b".
> > > You have to replace it with "true".
> > >
> > > I don't know what the !stored_page is about without looking more
> > > closely, but to avoid regressions you'd be safely with just doing an
> > > unconditional put_page(), I think.
> > 
> > Right. stored_page is there for on-demand pinning, to release the reference
> > on the page in case the page was already in the page list. You can also
> > remove stored_page along with this patch.
> 
> Good catch I kind of glossed over this one.  Thanks.
> 
> > 
> > > @@ -748,9 +747,7 @@ void ib_umem_odp_unmap_dma_pages(struct
> > ib_umem_odp *umem_odp, u64 virt,
> > >  				 */
> > >  				set_page_dirty(head_page);
> > >  			}
> > > -			/* on demand pinning support */
> > > -			if (!umem->context->invalidate_range)
> > > -				put_page(page);
> > > +			put_page(page);
> > >  			umem_odp->page_list[idx] = NULL;
> > >  			umem_odp->dma_list[idx] = 0;
> > >  			umem->npages--;
> > 
> > I think this is also wrong. If invalidate range is always expected to be valid
> > when calling unmap_dmap_pages, then put_page(page) is never called, so
> > you can just remove it.
> 
> I agree with the analysis of the logic but then where is the
> put_page which matches the get_user_pages_remote() call in
> ib_umem_odp_map_dmap_pages()?

Isn't it in ib_umem_odp_map_dma_pages (for tail pages) and
ib_umem_odp_map_dma_single_page (for the head page)?

> Effectively, I believe it was a bug to not call put_page here.  I
> suppose I should have put that in the commit message...  :-/

Separate patch. :)

Jason
Ira Weiny March 6, 2019, 7:25 p.m. UTC | #4
> On Wed, Mar 06, 2019 at 03:28:19PM +0000, Weiny, Ira wrote:
> > > On 3/6/2019 2:06 AM, John Hubbard wrote:
> > > > On 3/4/19 10:41 AM, ira.weiny@intel.com wrote:
> > > >> From: Ira Weiny <ira.weiny@intel.com>
> > > >>
> > > >> No device supports ODP MR without an invalidate_range callback.
> > > >>
> > > >> Warn on any any device which attempts to support ODP without
> > > >> supplying this callback.
> > > >>
> > > >> Then we can remove the checks for the callback within the code.
> > > >>
> > > >> This stems from the discussion
> > > >>
> > > >> https://www.spinics.net/lists/linux-rdma/msg76460.html
> > > >>
> > > >> ...which concluded this code was no longer necessary.
> > > >>
> > > >> CC: Haggai Eran <haggaie@mellanox.com>
> > > >> CC: Artemy Kovalyov <artemyko@mellanox.com>
> > > >> CC: John Hubbard <jhubbard@nvidia.com>
> > > >> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > > >>   drivers/infiniband/core/umem.c     |  5 +++++
> > > >>   drivers/infiniband/core/umem_odp.c | 11 ++++-------
> > > >>   2 files changed, 9 insertions(+), 7 deletions(-)
> > > >>
> > > >> diff --git a/drivers/infiniband/core/umem.c
> > > >> b/drivers/infiniband/core/umem.c index
> fe5551562dbc..89a7d57f9fa5
> > > >> 100644
> > > >> +++ b/drivers/infiniband/core/umem.c
> > > >> @@ -138,6 +138,11 @@ struct ib_umem *ib_umem_get(struct
> ib_udata
> > > *udata, unsigned long addr,
> > > >>   	mmgrab(mm);
> > > >>
> > > >>   	if (access & IB_ACCESS_ON_DEMAND) {
> > > >> +		if (WARN_ON_ONCE(!context->invalidate_range)) {
> > > >> +			ret = -EINVAL;
> > > >> +			goto umem_kfree;
> > > >> +		}
> > > >> +
> > > >>   		ret = ib_umem_odp_get(to_ib_umem_odp(umem),
> access);
> > > >>   		if (ret)
> > > >>   			goto umem_kfree;
> > > >> diff --git a/drivers/infiniband/core/umem_odp.c
> > > >> b/drivers/infiniband/core/umem_odp.c
> > > >> index 6013cf0b8f4f..aa7f95633581 100644
> > > >> +++ b/drivers/infiniband/core/umem_odp.c
> > > >> @@ -240,7 +240,7 @@ static struct ib_ucontext_per_mm
> > > *alloc_per_mm(struct ib_ucontext *ctx,
> > > >>   	per_mm->mm = mm;
> > > >>   	per_mm->umem_tree = RB_ROOT_CACHED;
> > > >>   	init_rwsem(&per_mm->umem_rwsem);
> > > >> -	per_mm->active = ctx->invalidate_range;
> > > >> +	per_mm->active = true;
> > > >>
> > > >>   	rcu_read_lock();
> > > >>   	per_mm->tgid = get_task_pid(current->group_leader,
> > > >> PIDTYPE_PID); @@ -539,11 +539,10 @@ static int
> > > ib_umem_odp_map_dma_single_page(
> > > >>   	}
> > > >>
> > > >>   out:
> > > >> -	/* On Demand Paging - avoid pinning the page */
> > > >> -	if (umem->context->invalidate_range || !stored_page)
> > > >> +	if (!stored_page)
> > > >>   		put_page(page);
> > > >
> > > > Hi Ira,
> > > >
> > > > This would introduce a change in behavior, because previously, the
> > > > first side of the "or" phrase was effectively always true. In
> > > > other words, the code has, so far, been just doing this:
> > > >
> > > > 		put_page();
> > > >
> > > > ...unconditionally. (Again, because "umem->context->invalidate_range"
> > > > was always true. The "!stored_page" never got even evaluated.)
> > > >
> > > > In other words, if a == true, you cannot replace "a || b" with "b".
> > > > You have to replace it with "true".
> > > >
> > > > I don't know what the !stored_page is about without looking more
> > > > closely, but to avoid regressions you'd be safely with just doing
> > > > an unconditional put_page(), I think.
> > >
> > > Right. stored_page is there for on-demand pinning, to release the
> > > reference on the page in case the page was already in the page list.
> > > You can also remove stored_page along with this patch.
> >
> > Good catch I kind of glossed over this one.  Thanks.
> >
> > >
> > > > @@ -748,9 +747,7 @@ void ib_umem_odp_unmap_dma_pages(struct
> > > ib_umem_odp *umem_odp, u64 virt,
> > > >  				 */
> > > >  				set_page_dirty(head_page);
> > > >  			}
> > > > -			/* on demand pinning support */
> > > > -			if (!umem->context->invalidate_range)
> > > > -				put_page(page);
> > > > +			put_page(page);
> > > >  			umem_odp->page_list[idx] = NULL;
> > > >  			umem_odp->dma_list[idx] = 0;
> > > >  			umem->npages--;
> > >
> > > I think this is also wrong. If invalidate range is always expected
> > > to be valid when calling unmap_dmap_pages, then put_page(page) is
> > > never called, so you can just remove it.
> >
> > I agree with the analysis of the logic but then where is the put_page
> > which matches the get_user_pages_remote() call in
> > ib_umem_odp_map_dmap_pages()?
> 
> Isn't it in ib_umem_odp_map_dma_pages (for tail pages) and
> ib_umem_odp_map_dma_single_page (for the head page)?

Ok I see it now.

> 
> > Effectively, I believe it was a bug to not call put_page here.  I
> > suppose I should have put that in the commit message...  :-/
> 
> Separate patch. :)

No need now...

Again I can't test but I'll send out another RFC V2.

Ira

> 
> Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index fe5551562dbc..89a7d57f9fa5 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -138,6 +138,11 @@  struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	mmgrab(mm);
 
 	if (access & IB_ACCESS_ON_DEMAND) {
+		if (WARN_ON_ONCE(!context->invalidate_range)) {
+			ret = -EINVAL;
+			goto umem_kfree;
+		}
+
 		ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
 		if (ret)
 			goto umem_kfree;
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6013cf0b8f4f..aa7f95633581 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -240,7 +240,7 @@  static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
 	per_mm->mm = mm;
 	per_mm->umem_tree = RB_ROOT_CACHED;
 	init_rwsem(&per_mm->umem_rwsem);
-	per_mm->active = ctx->invalidate_range;
+	per_mm->active = true;
 
 	rcu_read_lock();
 	per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
@@ -539,11 +539,10 @@  static int ib_umem_odp_map_dma_single_page(
 	}
 
 out:
-	/* On Demand Paging - avoid pinning the page */
-	if (umem->context->invalidate_range || !stored_page)
+	if (!stored_page)
 		put_page(page);
 
-	if (remove_existing_mapping && umem->context->invalidate_range) {
+	if (remove_existing_mapping) {
 		ib_umem_notifier_start_account(umem_odp);
 		umem->context->invalidate_range(
 			umem_odp,
@@ -748,9 +747,7 @@  void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 				 */
 				set_page_dirty(head_page);
 			}
-			/* on demand pinning support */
-			if (!umem->context->invalidate_range)
-				put_page(page);
+			put_page(page);
 			umem_odp->page_list[idx] = NULL;
 			umem_odp->dma_list[idx] = 0;
 			umem->npages--;