diff mbox series

[v4,03/11] mm/gup: Applies counting method to monitor gup_pgd_range

Message ID 20190927234008.11513-4-leonardo@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series Introduces new count-based method for monitoring lockless pagetable walks | expand

Commit Message

Leonardo Bras Sept. 27, 2019, 11:40 p.m. UTC
As decribed, gup_pgd_range is a lockless pagetable walk. So, in order to
monitor against THP split/collapse with the couting method, it's necessary
to bound it with {start,end}_lockless_pgtbl_walk.

There are dummy functions, so it is not going to add any overhead on archs
that don't use this method.

Signed-off-by: Leonardo Bras <leonardo@linux.ibm.com>
---
 mm/gup.c | 8 ++++++++
 1 file changed, 8 insertions(+)

Comments

Kirill A . Shutemov Sept. 30, 2019, 11:09 a.m. UTC | #1
On Fri, Sep 27, 2019 at 08:40:00PM -0300, Leonardo Bras wrote:
> As decribed, gup_pgd_range is a lockless pagetable walk. So, in order to
     ^ typo
Leonardo Bras Sept. 30, 2019, 2:27 p.m. UTC | #2
On Mon, 2019-09-30 at 14:09 +0300, Kirill A. Shutemov wrote:
> On Fri, Sep 27, 2019 at 08:40:00PM -0300, Leonardo Bras wrote:
> > As decribed, gup_pgd_range is a lockless pagetable walk. So, in order to
>      ^ typo
> 
Fixed, thanks!
John Hubbard Sept. 30, 2019, 9:51 p.m. UTC | #3
On 9/27/19 4:40 PM, Leonardo Bras wrote:
> As decribed, gup_pgd_range is a lockless pagetable walk. So, in order to
> monitor against THP split/collapse with the couting method, it's necessary

s/couting/counting/

> to bound it with {start,end}_lockless_pgtbl_walk.
> 
> There are dummy functions, so it is not going to add any overhead on archs
> that don't use this method.
> 
> Signed-off-by: Leonardo Bras <leonardo@linux.ibm.com>
> ---
>  mm/gup.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/mm/gup.c b/mm/gup.c
> index 98f13ab37bac..7105c829cf44 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -2325,6 +2325,7 @@ static bool gup_fast_permitted(unsigned long start, unsigned long end)
>  int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
>  			  struct page **pages)
>  {
> +	struct mm_struct *mm;

I don't think that this local variable adds any value, so let's not use it.
Similar point in a few other patches too.

>  	unsigned long len, end;
>  	unsigned long flags;
>  	int nr = 0;
> @@ -2352,9 +2353,12 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
>  
>  	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
>  	    gup_fast_permitted(start, end)) {
> +		mm = current->mm;
> +		start_lockless_pgtbl_walk(mm);
>  		local_irq_save(flags);
>  		gup_pgd_range(start, end, write ? FOLL_WRITE : 0, pages, &nr);
>  		local_irq_restore(flags);
> +		end_lockless_pgtbl_walk(mm);
>  	}
>  
>  	return nr;
> @@ -2404,6 +2408,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages,
>  			unsigned int gup_flags, struct page **pages)
>  {
>  	unsigned long addr, len, end;
> +	struct mm_struct *mm;

Same here.

>  	int nr = 0, ret = 0;
>  
>  	if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM)))
> @@ -2421,9 +2426,12 @@ int get_user_pages_fast(unsigned long start, int nr_pages,
>  
>  	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
>  	    gup_fast_permitted(start, end)) {
> +		mm = current->mm;
> +		start_lockless_pgtbl_walk(mm);

Minor: I'd like to rename this register_lockless_pgtable_walker().

>  		local_irq_disable();
>  		gup_pgd_range(addr, end, gup_flags, pages, &nr);
>  		local_irq_enable();
> +		end_lockless_pgtbl_walk(mm);

...and deregister_lockless_pgtable_walker().


thanks,
Leonardo Bras Oct. 1, 2019, 5:56 p.m. UTC | #4
On Mon, 2019-09-30 at 14:51 -0700, John Hubbard wrote:
> On 9/27/19 4:40 PM, Leonardo Bras wrote:
> > As decribed, gup_pgd_range is a lockless pagetable walk. So, in order to
> > monitor against THP split/collapse with the couting method, it's necessary
> 
> s/couting/counting/
> 

Thanks, fixed for v5.

> > to bound it with {start,end}_lockless_pgtbl_walk.
> > 
> > There are dummy functions, so it is not going to add any overhead on archs
> > that don't use this method.
> > 
> > Signed-off-by: Leonardo Bras <leonardo@linux.ibm.com>
> > ---
> >  mm/gup.c | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/mm/gup.c b/mm/gup.c
> > index 98f13ab37bac..7105c829cf44 100644
> > --- a/mm/gup.c
> > +++ b/mm/gup.c
> > @@ -2325,6 +2325,7 @@ static bool gup_fast_permitted(unsigned long start, unsigned long end)
> >  int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> >  			  struct page **pages)
> >  {
> > +	struct mm_struct *mm;
> 
> I don't think that this local variable adds any value, so let's not use it.
> Similar point in a few other patches too.

It avoids 1 deference of current->mm, it's a little performance gain.

> 
> >  	unsigned long len, end;
> >  	unsigned long flags;
> >  	int nr = 0;
> > @@ -2352,9 +2353,12 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> >  
> >  	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
> >  	    gup_fast_permitted(start, end)) {
> > +		mm = current->mm;
> > +		start_lockless_pgtbl_walk(mm);
> >  		local_irq_save(flags);
> >  		gup_pgd_range(start, end, write ? FOLL_WRITE : 0, pages, &nr);
> >  		local_irq_restore(flags);
> > +		end_lockless_pgtbl_walk(mm);
> >  	}
> >  
> >  	return nr;
> > @@ -2404,6 +2408,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages,
> >  			unsigned int gup_flags, struct page **pages)
> >  {
> >  	unsigned long addr, len, end;
> > +	struct mm_struct *mm;
> 
> Same here.
> 
> >  	int nr = 0, ret = 0;
> >  
> >  	if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM)))
> > @@ -2421,9 +2426,12 @@ int get_user_pages_fast(unsigned long start, int nr_pages,
> >  
> >  	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
> >  	    gup_fast_permitted(start, end)) {
> > +		mm = current->mm;
> > +		start_lockless_pgtbl_walk(mm);
> 
> Minor: I'd like to rename this register_lockless_pgtable_walker().
> 
> >  		local_irq_disable();
> >  		gup_pgd_range(addr, end, gup_flags, pages, &nr);
> >  		local_irq_enable();
> > +		end_lockless_pgtbl_walk(mm);
> 
> ...and deregister_lockless_pgtable_walker().
> 

I have no problem changing the name, but I don't register/deregister
are good terms for this. 

I would rather use start/finish, begin/end, and so on. Register sounds
like something more complicated than what we are trying to achieve
here. 

> 
> thanks,

Thank you!
John Hubbard Oct. 1, 2019, 7:04 p.m. UTC | #5
On 10/1/19 10:56 AM, Leonardo Bras wrote:
> On Mon, 2019-09-30 at 14:51 -0700, John Hubbard wrote:
>> On 9/27/19 4:40 PM, Leonardo Bras wrote:
...
>>> diff --git a/mm/gup.c b/mm/gup.c
>>> index 98f13ab37bac..7105c829cf44 100644
>>> --- a/mm/gup.c
>>> +++ b/mm/gup.c
>>> @@ -2325,6 +2325,7 @@ static bool gup_fast_permitted(unsigned long start, unsigned long end)
>>>  int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
>>>  			  struct page **pages)
>>>  {
>>> +	struct mm_struct *mm;
>>
>> I don't think that this local variable adds any value, so let's not use it.
>> Similar point in a few other patches too.
> 
> It avoids 1 deference of current->mm, it's a little performance gain.
> 

No, it isn't. :) 

Longer answer: at this level (by which I mean, "wrote the C code, haven't looked
at the generated asm yet, and haven't done a direct perf test yet"), none of us
C programmers are entitled to imagine that we can second guess both the compiler 
and the CPU well enough to claim that  declaring a local pointer variable on the
stack will even *affect* performance, much less know which way it will go!

The compiler at -O2 will *absolutely* optimize away any local variables that
it doesn't need.

And that leads to how kernel programmers routinely decide about that kind of 
variable: "does the variable's added clarity compensate for the extra visual 
noise and for the need to manage the variable?"

Here, and in most (all?) other points in the patchset where you've added an
mm local variable, the answer is no.


>>
...	start_lockless_pgtbl_walk(mm);
>>
>> Minor: I'd like to rename this register_lockless_pgtable_walker().
>>
>>>  		local_irq_disable();
>>>  		gup_pgd_range(addr, end, gup_flags, pages, &nr);
>>>  		local_irq_enable();
>>> +		end_lockless_pgtbl_walk(mm);
>>
>> ...and deregister_lockless_pgtable_walker().
>>
> 
> I have no problem changing the name, but I don't register/deregister
> are good terms for this. 
> 
> I would rather use start/finish, begin/end, and so on. Register sounds
> like something more complicated than what we are trying to achieve
> here. 
> 

OK, well, I don't want to bikeshed on naming more than I usually do, and 
what you have is reasonable, so I'll leave that alone. :)

thanks,
Leonardo Bras Oct. 1, 2019, 7:40 p.m. UTC | #6
On Tue, 2019-10-01 at 12:04 -0700, John Hubbard wrote:
> On 10/1/19 10:56 AM, Leonardo Bras wrote:
> > On Mon, 2019-09-30 at 14:51 -0700, John Hubbard wrote:
> > > On 9/27/19 4:40 PM, Leonardo Bras wrote:
> ...
> > > > diff --git a/mm/gup.c b/mm/gup.c
> > > > index 98f13ab37bac..7105c829cf44 100644
> > > > --- a/mm/gup.c
> > > > +++ b/mm/gup.c
> > > > @@ -2325,6 +2325,7 @@ static bool gup_fast_permitted(unsigned long start, unsigned long end)
> > > >  int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> > > >  			  struct page **pages)
> > > >  {
> > > > +	struct mm_struct *mm;
> > > 
> > > I don't think that this local variable adds any value, so let's not use it.
> > > Similar point in a few other patches too.
> > 
> > It avoids 1 deference of current->mm, it's a little performance gain.
> > 
> 
> No, it isn't. :) 
> 
> Longer answer: at this level (by which I mean, "wrote the C code, haven't looked
> at the generated asm yet, and haven't done a direct perf test yet"), none of us
> C programmers are entitled to imagine that we can second guess both the compiler 
> and the CPU well enough to claim that  declaring a local pointer variable on the
> stack will even *affect* performance, much less know which way it will go!
> 

I did this based on how costly can be 'current', and I could notice
reduction in assembly size most of the time. (powerpc)
But I get what you mean, maybe the (possible) performance gain don't
worth the extra work.

> The compiler at -O2 will *absolutely* optimize away any local variables that
> it doesn't need.
> 
> And that leads to how kernel programmers routinely decide about that kind of 
> variable: "does the variable's added clarity compensate for the extra visual 
> noise and for the need to manage the variable?"

That's a good way to decide it. :)

> 
> Here, and in most (all?) other points in the patchset where you've added an
> mm local variable, the answer is no.
> 

Well, IMHO it's cleaner that way. But I get that other people may
disagree. 

> 
> ...	start_lockless_pgtbl_walk(mm);
> > > Minor: I'd like to rename this register_lockless_pgtable_walker().
> > > 
> > > >  		local_irq_disable();
> > > >  		gup_pgd_range(addr, end, gup_flags, pages, &nr);
> > > >  		local_irq_enable();
> > > > +		end_lockless_pgtbl_walk(mm);
> > > 
> > > ...and deregister_lockless_pgtable_walker().
> > > 
> > 
> > I have no problem changing the name, but I don't register/deregister
> > are good terms for this. 
> > 
> > I would rather use start/finish, begin/end, and so on. Register sounds
> > like something more complicated than what we are trying to achieve
> > here. 
> > 
> 
> OK, well, I don't want to bikeshed on naming more than I usually do, and 
> what you have is reasonable, so I'll leave that alone. :)
> 
> thanks,

Thank for the feedback,
diff mbox series

Patch

diff --git a/mm/gup.c b/mm/gup.c
index 98f13ab37bac..7105c829cf44 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2325,6 +2325,7 @@  static bool gup_fast_permitted(unsigned long start, unsigned long end)
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages)
 {
+	struct mm_struct *mm;
 	unsigned long len, end;
 	unsigned long flags;
 	int nr = 0;
@@ -2352,9 +2353,12 @@  int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 
 	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
 	    gup_fast_permitted(start, end)) {
+		mm = current->mm;
+		start_lockless_pgtbl_walk(mm);
 		local_irq_save(flags);
 		gup_pgd_range(start, end, write ? FOLL_WRITE : 0, pages, &nr);
 		local_irq_restore(flags);
+		end_lockless_pgtbl_walk(mm);
 	}
 
 	return nr;
@@ -2404,6 +2408,7 @@  int get_user_pages_fast(unsigned long start, int nr_pages,
 			unsigned int gup_flags, struct page **pages)
 {
 	unsigned long addr, len, end;
+	struct mm_struct *mm;
 	int nr = 0, ret = 0;
 
 	if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM)))
@@ -2421,9 +2426,12 @@  int get_user_pages_fast(unsigned long start, int nr_pages,
 
 	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) &&
 	    gup_fast_permitted(start, end)) {
+		mm = current->mm;
+		start_lockless_pgtbl_walk(mm);
 		local_irq_disable();
 		gup_pgd_range(addr, end, gup_flags, pages, &nr);
 		local_irq_enable();
+		end_lockless_pgtbl_walk(mm);
 		ret = nr;
 	}