[v3,2/7] mm: thp: introduce transhuge_vma_size_ok() helper

Message ID	20220606214414.736109-3-shy828301@gmail.com (mailing list archive)
State	New
Headers	show Return-Path: <owner-linux-mm@kvack.org> From: Yang Shi <shy828301@gmail.com> To: vbabka@suse.cz, kirill.shutemov@linux.intel.com, willy@infradead.org, akpm@linux-foundation.org Cc: shy828301@gmail.com, linux-mm@kvack.org, linux-kernel@vger.kernel.org Subject: [v3 PATCH 2/7] mm: thp: introduce transhuge_vma_size_ok() helper Date: Mon, 6 Jun 2022 14:44:09 -0700 Message-Id: <20220606214414.736109-3-shy828301@gmail.com> In-Reply-To: <20220606214414.736109-1-shy828301@gmail.com> References: <20220606214414.736109-1-shy828301@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	Cleanup transhuge_xxx helpers \| expand [mm-unstable,v3,0/7] Cleanup transhuge_xxx helpers [v3,1/7] mm: khugepaged: check THP flag in hugepage_vma_check() [v3,2/7] mm: thp: introduce transhuge_vma_size_ok() helper [v3,3/7] mm: khugepaged: remove the redundant anon vma check [v3,4/7] mm: khugepaged: use transhuge_vma_suitable replace open-code [v3,5/7] mm: thp: kill transparent_hugepage_active() [v3,6/7] mm: thp: kill __transhuge_page_enabled() [v3,7/7] mm: khugepaged: reorg some khugepaged helpers

Yang Shi June 6, 2022, 9:44 p.m. UTC

There are couple of places that check whether the vma size is ok for
THP or not, they are open coded and duplicate, introduce
transhuge_vma_size_ok() helper to do the job.

Signed-off-by: Yang Shi <shy828301@gmail.com>
---
 include/linux/huge_mm.h | 17 +++++++++++++++++
 mm/huge_memory.c        |  5 +----
 mm/khugepaged.c         | 12 ++++++------
 3 files changed, 24 insertions(+), 10 deletions(-)

Zach O'Keefe June 9, 2022, 10:21 p.m. UTC | #1

On Mon, Jun 6, 2022 at 2:44 PM Yang Shi <shy828301@gmail.com> wrote:
>
> There are couple of places that check whether the vma size is ok for
> THP or not, they are open coded and duplicate, introduce
> transhuge_vma_size_ok() helper to do the job.
>
> Signed-off-by: Yang Shi <shy828301@gmail.com>
> ---
>  include/linux/huge_mm.h | 17 +++++++++++++++++
>  mm/huge_memory.c        |  5 +----
>  mm/khugepaged.c         | 12 ++++++------
>  3 files changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 648cb3ce7099..a8f61db47f2a 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -116,6 +116,18 @@ extern struct kobj_attribute shmem_enabled_attr;
>
>  extern unsigned long transparent_hugepage_flags;
>
> +/*
> + * The vma size has to be large enough to hold an aligned HPAGE_PMD_SIZE area.
> + */
> +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> +{
> +       if (round_up(vma->vm_start, HPAGE_PMD_SIZE) <
> +           (vma->vm_end & HPAGE_PMD_MASK))
> +               return true;
> +
> +       return false;
> +}

First time coming across round_up() - thanks for that - but for
symmetry, maybe also use round_down() for the end? No strong opinion -
just a suggestion given I've just discovered it.

>  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
>                 unsigned long addr)
>  {
> @@ -345,6 +357,11 @@ static inline bool transparent_hugepage_active(struct vm_area_struct *vma)
>         return false;
>  }
>
> +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> +{
> +       return false;
> +}
> +
>  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
>                 unsigned long addr)
>  {
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 48182c8fe151..36ada544e494 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -71,10 +71,7 @@ unsigned long huge_zero_pfn __read_mostly = ~0UL;
>
>  bool transparent_hugepage_active(struct vm_area_struct *vma)
>  {
> -       /* The addr is used to check if the vma size fits */
> -       unsigned long addr = (vma->vm_end & HPAGE_PMD_MASK) - HPAGE_PMD_SIZE;
> -
> -       if (!transhuge_vma_suitable(vma, addr))
> +       if (!transhuge_vma_size_ok(vma))
>                 return false;
>         if (vma_is_anonymous(vma))
>                 return __transparent_hugepage_enabled(vma);

Do we need a check for vma->vm_pgoff alignment here, after
!vma_is_anonymous(), and now that we don't call
transhuge_vma_suitable()?

> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 84b9cf4b9be9..d0f8020164fc 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -454,6 +454,9 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
>                                 vma->vm_pgoff, HPAGE_PMD_NR))
>                 return false;
>
> +       if (!transhuge_vma_size_ok(vma))
> +               return false;
> +
>         /* Enabled via shmem mount options or sysfs settings. */
>         if (shmem_file(vma->vm_file))
>                 return shmem_huge_enabled(vma);
> @@ -512,9 +515,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
>                           unsigned long vm_flags)
>  {
>         if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
> -           khugepaged_enabled() &&
> -           (((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
> -            (vma->vm_end & HPAGE_PMD_MASK))) {
> +           khugepaged_enabled()) {
>                 if (hugepage_vma_check(vma, vm_flags))
>                         __khugepaged_enter(vma->vm_mm);
>         }
> @@ -2142,10 +2143,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>                         progress++;
>                         continue;
>                 }
> -               hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
> +
> +               hstart = round_up(vma->vm_start, HPAGE_PMD_SIZE);
>                 hend = vma->vm_end & HPAGE_PMD_MASK;
> -               if (hstart >= hend)
> -                       goto skip;
>                 if (khugepaged_scan.address > hend)
>                         goto skip;
>                 if (khugepaged_scan.address < hstart)

Likewise, could do round_down() here (just a suggestion)

> --
> 2.26.3
>
>

Yang Shi June 10, 2022, 12:08 a.m. UTC | #2

On Thu, Jun 9, 2022 at 3:21 PM Zach O'Keefe <zokeefe@google.com> wrote:
>
> On Mon, Jun 6, 2022 at 2:44 PM Yang Shi <shy828301@gmail.com> wrote:
> >
> > There are couple of places that check whether the vma size is ok for
> > THP or not, they are open coded and duplicate, introduce
> > transhuge_vma_size_ok() helper to do the job.
> >
> > Signed-off-by: Yang Shi <shy828301@gmail.com>
> > ---
> >  include/linux/huge_mm.h | 17 +++++++++++++++++
> >  mm/huge_memory.c        |  5 +----
> >  mm/khugepaged.c         | 12 ++++++------
> >  3 files changed, 24 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > index 648cb3ce7099..a8f61db47f2a 100644
> > --- a/include/linux/huge_mm.h
> > +++ b/include/linux/huge_mm.h
> > @@ -116,6 +116,18 @@ extern struct kobj_attribute shmem_enabled_attr;
> >
> >  extern unsigned long transparent_hugepage_flags;
> >
> > +/*
> > + * The vma size has to be large enough to hold an aligned HPAGE_PMD_SIZE area.
> > + */
> > +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> > +{
> > +       if (round_up(vma->vm_start, HPAGE_PMD_SIZE) <
> > +           (vma->vm_end & HPAGE_PMD_MASK))
> > +               return true;
> > +
> > +       return false;
> > +}
>
> First time coming across round_up() - thanks for that - but for
> symmetry, maybe also use round_down() for the end? No strong opinion -
> just a suggestion given I've just discovered it.

Yeah, round_down is fine too.

>
> >  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> >                 unsigned long addr)
> >  {
> > @@ -345,6 +357,11 @@ static inline bool transparent_hugepage_active(struct vm_area_struct *vma)
> >         return false;
> >  }
> >
> > +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> > +{
> > +       return false;
> > +}
> > +
> >  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> >                 unsigned long addr)
> >  {
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > index 48182c8fe151..36ada544e494 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -71,10 +71,7 @@ unsigned long huge_zero_pfn __read_mostly = ~0UL;
> >
> >  bool transparent_hugepage_active(struct vm_area_struct *vma)
> >  {
> > -       /* The addr is used to check if the vma size fits */
> > -       unsigned long addr = (vma->vm_end & HPAGE_PMD_MASK) - HPAGE_PMD_SIZE;
> > -
> > -       if (!transhuge_vma_suitable(vma, addr))
> > +       if (!transhuge_vma_size_ok(vma))
> >                 return false;
> >         if (vma_is_anonymous(vma))
> >                 return __transparent_hugepage_enabled(vma);
>
> Do we need a check for vma->vm_pgoff alignment here, after
> !vma_is_anonymous(), and now that we don't call
> transhuge_vma_suitable()?

Actually I was thinking about this too. But the THPeligible bit shown
by smaps is a little bit ambiguous for file vma. The document says:
"THPeligible" indicates whether the mapping is eligible for allocating
THP pages - 1 if true, 0 otherwise.

Even though it doesn't fulfill the alignment, it is still possible to
get THP allocated, but just can't be PMD mapped. So the old behavior
of THPeligible for file vma seems problematic, or at least doesn't
match the document.

I should elaborate this in the commit log.

>
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index 84b9cf4b9be9..d0f8020164fc 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -454,6 +454,9 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
> >                                 vma->vm_pgoff, HPAGE_PMD_NR))
> >                 return false;
> >
> > +       if (!transhuge_vma_size_ok(vma))
> > +               return false;
> > +
> >         /* Enabled via shmem mount options or sysfs settings. */
> >         if (shmem_file(vma->vm_file))
> >                 return shmem_huge_enabled(vma);
> > @@ -512,9 +515,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
> >                           unsigned long vm_flags)
> >  {
> >         if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
> > -           khugepaged_enabled() &&
> > -           (((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
> > -            (vma->vm_end & HPAGE_PMD_MASK))) {
> > +           khugepaged_enabled()) {
> >                 if (hugepage_vma_check(vma, vm_flags))
> >                         __khugepaged_enter(vma->vm_mm);
> >         }
> > @@ -2142,10 +2143,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
> >                         progress++;
> >                         continue;
> >                 }
> > -               hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
> > +
> > +               hstart = round_up(vma->vm_start, HPAGE_PMD_SIZE);
> >                 hend = vma->vm_end & HPAGE_PMD_MASK;
> > -               if (hstart >= hend)
> > -                       goto skip;
> >                 if (khugepaged_scan.address > hend)
> >                         goto skip;
> >                 if (khugepaged_scan.address < hstart)
>
> Likewise, could do round_down() here (just a suggestion)

Fine to me.

>
> > --
> > 2.26.3
> >
> >

Zach O'Keefe June 10, 2022, 12:51 a.m. UTC | #3

On Thu, Jun 9, 2022 at 5:08 PM Yang Shi <shy828301@gmail.com> wrote:
>
> On Thu, Jun 9, 2022 at 3:21 PM Zach O'Keefe <zokeefe@google.com> wrote:
> >
> > On Mon, Jun 6, 2022 at 2:44 PM Yang Shi <shy828301@gmail.com> wrote:
> > >
> > > There are couple of places that check whether the vma size is ok for
> > > THP or not, they are open coded and duplicate, introduce
> > > transhuge_vma_size_ok() helper to do the job.
> > >
> > > Signed-off-by: Yang Shi <shy828301@gmail.com>
> > > ---
> > >  include/linux/huge_mm.h | 17 +++++++++++++++++
> > >  mm/huge_memory.c        |  5 +----
> > >  mm/khugepaged.c         | 12 ++++++------
> > >  3 files changed, 24 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > > index 648cb3ce7099..a8f61db47f2a 100644
> > > --- a/include/linux/huge_mm.h
> > > +++ b/include/linux/huge_mm.h
> > > @@ -116,6 +116,18 @@ extern struct kobj_attribute shmem_enabled_attr;
> > >
> > >  extern unsigned long transparent_hugepage_flags;
> > >
> > > +/*
> > > + * The vma size has to be large enough to hold an aligned HPAGE_PMD_SIZE area.
> > > + */
> > > +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> > > +{
> > > +       if (round_up(vma->vm_start, HPAGE_PMD_SIZE) <
> > > +           (vma->vm_end & HPAGE_PMD_MASK))
> > > +               return true;
> > > +
> > > +       return false;
> > > +}
> >
> > First time coming across round_up() - thanks for that - but for
> > symmetry, maybe also use round_down() for the end? No strong opinion -
> > just a suggestion given I've just discovered it.
>
> Yeah, round_down is fine too.
>
> >
> > >  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> > >                 unsigned long addr)
> > >  {
> > > @@ -345,6 +357,11 @@ static inline bool transparent_hugepage_active(struct vm_area_struct *vma)
> > >         return false;
> > >  }
> > >
> > > +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> > > +{
> > > +       return false;
> > > +}
> > > +
> > >  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> > >                 unsigned long addr)
> > >  {
> > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > > index 48182c8fe151..36ada544e494 100644
> > > --- a/mm/huge_memory.c
> > > +++ b/mm/huge_memory.c
> > > @@ -71,10 +71,7 @@ unsigned long huge_zero_pfn __read_mostly = ~0UL;
> > >
> > >  bool transparent_hugepage_active(struct vm_area_struct *vma)
> > >  {
> > > -       /* The addr is used to check if the vma size fits */
> > > -       unsigned long addr = (vma->vm_end & HPAGE_PMD_MASK) - HPAGE_PMD_SIZE;
> > > -
> > > -       if (!transhuge_vma_suitable(vma, addr))
> > > +       if (!transhuge_vma_size_ok(vma))
> > >                 return false;
> > >         if (vma_is_anonymous(vma))
> > >                 return __transparent_hugepage_enabled(vma);
> >
> > Do we need a check for vma->vm_pgoff alignment here, after
> > !vma_is_anonymous(), and now that we don't call
> > transhuge_vma_suitable()?
>
> Actually I was thinking about this too. But the THPeligible bit shown
> by smaps is a little bit ambiguous for file vma. The document says:
> "THPeligible" indicates whether the mapping is eligible for allocating
> THP pages - 1 if true, 0 otherwise.
>
> Even though it doesn't fulfill the alignment, it is still possible to
> get THP allocated, but just can't be PMD mapped. So the old behavior
> of THPeligible for file vma seems problematic, or at least doesn't
> match the document.

I think the term "THP" is used ambiguously. Often, but not always, in
the code, folks will go out of their way to specify "hugepage-sized"
page vs "pmd-mapped hugepage" - but at least from my experience,
external documentation doesn't. Given that THP as a concept doesn't
make much sense without the possibility of pmd-mapping, I think
"THPeligible here means "pmd mappable". For example, AnonHugePages in
smaps means  pmd-mapped anon hugepages.

That all said - the following patches will delete
transparent_hugepage_active() anyways.

> I should elaborate this in the commit log.
>
> >
> > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > > index 84b9cf4b9be9..d0f8020164fc 100644
> > > --- a/mm/khugepaged.c
> > > +++ b/mm/khugepaged.c
> > > @@ -454,6 +454,9 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
> > >                                 vma->vm_pgoff, HPAGE_PMD_NR))
> > >                 return false;
> > >
> > > +       if (!transhuge_vma_size_ok(vma))
> > > +               return false;
> > > +
> > >         /* Enabled via shmem mount options or sysfs settings. */
> > >         if (shmem_file(vma->vm_file))
> > >                 return shmem_huge_enabled(vma);
> > > @@ -512,9 +515,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
> > >                           unsigned long vm_flags)
> > >  {
> > >         if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
> > > -           khugepaged_enabled() &&
> > > -           (((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
> > > -            (vma->vm_end & HPAGE_PMD_MASK))) {
> > > +           khugepaged_enabled()) {
> > >                 if (hugepage_vma_check(vma, vm_flags))
> > >                         __khugepaged_enter(vma->vm_mm);
> > >         }
> > > @@ -2142,10 +2143,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
> > >                         progress++;
> > >                         continue;
> > >                 }
> > > -               hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
> > > +
> > > +               hstart = round_up(vma->vm_start, HPAGE_PMD_SIZE);
> > >                 hend = vma->vm_end & HPAGE_PMD_MASK;
> > > -               if (hstart >= hend)
> > > -                       goto skip;
> > >                 if (khugepaged_scan.address > hend)
> > >                         goto skip;
> > >                 if (khugepaged_scan.address < hstart)
> >
> > Likewise, could do round_down() here (just a suggestion)
>
> Fine to me.
>
> >
> > > --
> > > 2.26.3
> > >
> > >

Miaohe Lin June 10, 2022, 7:20 a.m. UTC | #4

On 2022/6/7 5:44, Yang Shi wrote:
> There are couple of places that check whether the vma size is ok for
> THP or not, they are open coded and duplicate, introduce
> transhuge_vma_size_ok() helper to do the job.
> 
> Signed-off-by: Yang Shi <shy828301@gmail.com>
> ---
>  include/linux/huge_mm.h | 17 +++++++++++++++++
>  mm/huge_memory.c        |  5 +----
>  mm/khugepaged.c         | 12 ++++++------
>  3 files changed, 24 insertions(+), 10 deletions(-)
> 
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 648cb3ce7099..a8f61db47f2a 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -116,6 +116,18 @@ extern struct kobj_attribute shmem_enabled_attr;
>  
>  extern unsigned long transparent_hugepage_flags;
>  
> +/*
> + * The vma size has to be large enough to hold an aligned HPAGE_PMD_SIZE area.
> + */
> +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> +{
> +	if (round_up(vma->vm_start, HPAGE_PMD_SIZE) <
> +	    (vma->vm_end & HPAGE_PMD_MASK))
> +		return true;
> +
> +	return false;
> +}
> +
>  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
>  		unsigned long addr)
>  {
> @@ -345,6 +357,11 @@ static inline bool transparent_hugepage_active(struct vm_area_struct *vma)
>  	return false;
>  }
>  
> +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> +{
> +	return false;
> +}
> +
>  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
>  		unsigned long addr)
>  {
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 48182c8fe151..36ada544e494 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -71,10 +71,7 @@ unsigned long huge_zero_pfn __read_mostly = ~0UL;
>  
>  bool transparent_hugepage_active(struct vm_area_struct *vma)
>  {
> -	/* The addr is used to check if the vma size fits */
> -	unsigned long addr = (vma->vm_end & HPAGE_PMD_MASK) - HPAGE_PMD_SIZE;
> -
> -	if (!transhuge_vma_suitable(vma, addr))

There is also pgoff check for file page in transhuge_vma_suitable. Is it ignored
deliberately?

> +	if (!transhuge_vma_size_ok(vma))
>  		return false;
>  	if (vma_is_anonymous(vma))
>  		return __transparent_hugepage_enabled(vma);
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 84b9cf4b9be9..d0f8020164fc 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -454,6 +454,9 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
>  				vma->vm_pgoff, HPAGE_PMD_NR))
>  		return false;
>  
> +	if (!transhuge_vma_size_ok(vma))
> +		return false;
> +
>  	/* Enabled via shmem mount options or sysfs settings. */
>  	if (shmem_file(vma->vm_file))
>  		return shmem_huge_enabled(vma);
> @@ -512,9 +515,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
>  			  unsigned long vm_flags)
>  {
>  	if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
> -	    khugepaged_enabled() &&
> -	    (((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
> -	     (vma->vm_end & HPAGE_PMD_MASK))) {
> +	    khugepaged_enabled()) {
>  		if (hugepage_vma_check(vma, vm_flags))
>  			__khugepaged_enter(vma->vm_mm);
>  	}

After this change, khugepaged_enter_vma is identical to khugepaged_enter. Should one of
them be removed?

Thanks!

> @@ -2142,10 +2143,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>  			progress++;
>  			continue;
>  		}
> -		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
> +
> +		hstart = round_up(vma->vm_start, HPAGE_PMD_SIZE);
>  		hend = vma->vm_end & HPAGE_PMD_MASK;
> -		if (hstart >= hend)
> -			goto skip;
>  		if (khugepaged_scan.address > hend)
>  			goto skip;
>  		if (khugepaged_scan.address < hstart)
>

Yang Shi June 10, 2022, 4:38 p.m. UTC | #5

On Thu, Jun 9, 2022 at 5:52 PM Zach O'Keefe <zokeefe@google.com> wrote:
>
> On Thu, Jun 9, 2022 at 5:08 PM Yang Shi <shy828301@gmail.com> wrote:
> >
> > On Thu, Jun 9, 2022 at 3:21 PM Zach O'Keefe <zokeefe@google.com> wrote:
> > >
> > > On Mon, Jun 6, 2022 at 2:44 PM Yang Shi <shy828301@gmail.com> wrote:
> > > >
> > > > There are couple of places that check whether the vma size is ok for
> > > > THP or not, they are open coded and duplicate, introduce
> > > > transhuge_vma_size_ok() helper to do the job.
> > > >
> > > > Signed-off-by: Yang Shi <shy828301@gmail.com>
> > > > ---
> > > >  include/linux/huge_mm.h | 17 +++++++++++++++++
> > > >  mm/huge_memory.c        |  5 +----
> > > >  mm/khugepaged.c         | 12 ++++++------
> > > >  3 files changed, 24 insertions(+), 10 deletions(-)
> > > >
> > > > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > > > index 648cb3ce7099..a8f61db47f2a 100644
> > > > --- a/include/linux/huge_mm.h
> > > > +++ b/include/linux/huge_mm.h
> > > > @@ -116,6 +116,18 @@ extern struct kobj_attribute shmem_enabled_attr;
> > > >
> > > >  extern unsigned long transparent_hugepage_flags;
> > > >
> > > > +/*
> > > > + * The vma size has to be large enough to hold an aligned HPAGE_PMD_SIZE area.
> > > > + */
> > > > +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> > > > +{
> > > > +       if (round_up(vma->vm_start, HPAGE_PMD_SIZE) <
> > > > +           (vma->vm_end & HPAGE_PMD_MASK))
> > > > +               return true;
> > > > +
> > > > +       return false;
> > > > +}
> > >
> > > First time coming across round_up() - thanks for that - but for
> > > symmetry, maybe also use round_down() for the end? No strong opinion -
> > > just a suggestion given I've just discovered it.
> >
> > Yeah, round_down is fine too.
> >
> > >
> > > >  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> > > >                 unsigned long addr)
> > > >  {
> > > > @@ -345,6 +357,11 @@ static inline bool transparent_hugepage_active(struct vm_area_struct *vma)
> > > >         return false;
> > > >  }
> > > >
> > > > +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> > > > +{
> > > > +       return false;
> > > > +}
> > > > +
> > > >  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> > > >                 unsigned long addr)
> > > >  {
> > > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > > > index 48182c8fe151..36ada544e494 100644
> > > > --- a/mm/huge_memory.c
> > > > +++ b/mm/huge_memory.c
> > > > @@ -71,10 +71,7 @@ unsigned long huge_zero_pfn __read_mostly = ~0UL;
> > > >
> > > >  bool transparent_hugepage_active(struct vm_area_struct *vma)
> > > >  {
> > > > -       /* The addr is used to check if the vma size fits */
> > > > -       unsigned long addr = (vma->vm_end & HPAGE_PMD_MASK) - HPAGE_PMD_SIZE;
> > > > -
> > > > -       if (!transhuge_vma_suitable(vma, addr))
> > > > +       if (!transhuge_vma_size_ok(vma))
> > > >                 return false;
> > > >         if (vma_is_anonymous(vma))
> > > >                 return __transparent_hugepage_enabled(vma);
> > >
> > > Do we need a check for vma->vm_pgoff alignment here, after
> > > !vma_is_anonymous(), and now that we don't call
> > > transhuge_vma_suitable()?
> >
> > Actually I was thinking about this too. But the THPeligible bit shown
> > by smaps is a little bit ambiguous for file vma. The document says:
> > "THPeligible" indicates whether the mapping is eligible for allocating
> > THP pages - 1 if true, 0 otherwise.
> >
> > Even though it doesn't fulfill the alignment, it is still possible to
> > get THP allocated, but just can't be PMD mapped. So the old behavior
> > of THPeligible for file vma seems problematic, or at least doesn't
> > match the document.
>
> I think the term "THP" is used ambiguously. Often, but not always, in
> the code, folks will go out of their way to specify "hugepage-sized"
> page vs "pmd-mapped hugepage" - but at least from my experience,
> external documentation doesn't. Given that THP as a concept doesn't
> make much sense without the possibility of pmd-mapping, I think
> "THPeligible here means "pmd mappable". For example, AnonHugePages in
> smaps means  pmd-mapped anon hugepages.

Yeah, depends on the expectation.

>
> That all said - the following patches will delete
> transparent_hugepage_active() anyways.

Yes, how I could forget this :-( The following removal of
transparent_hugepage_active() will restore the old behavior.

>
> > I should elaborate this in the commit log.
> >
> > >
> > > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > > > index 84b9cf4b9be9..d0f8020164fc 100644
> > > > --- a/mm/khugepaged.c
> > > > +++ b/mm/khugepaged.c
> > > > @@ -454,6 +454,9 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
> > > >                                 vma->vm_pgoff, HPAGE_PMD_NR))
> > > >                 return false;
> > > >
> > > > +       if (!transhuge_vma_size_ok(vma))
> > > > +               return false;
> > > > +
> > > >         /* Enabled via shmem mount options or sysfs settings. */
> > > >         if (shmem_file(vma->vm_file))
> > > >                 return shmem_huge_enabled(vma);
> > > > @@ -512,9 +515,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
> > > >                           unsigned long vm_flags)
> > > >  {
> > > >         if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
> > > > -           khugepaged_enabled() &&
> > > > -           (((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
> > > > -            (vma->vm_end & HPAGE_PMD_MASK))) {
> > > > +           khugepaged_enabled()) {
> > > >                 if (hugepage_vma_check(vma, vm_flags))
> > > >                         __khugepaged_enter(vma->vm_mm);
> > > >         }
> > > > @@ -2142,10 +2143,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
> > > >                         progress++;
> > > >                         continue;
> > > >                 }
> > > > -               hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
> > > > +
> > > > +               hstart = round_up(vma->vm_start, HPAGE_PMD_SIZE);
> > > >                 hend = vma->vm_end & HPAGE_PMD_MASK;
> > > > -               if (hstart >= hend)
> > > > -                       goto skip;
> > > >                 if (khugepaged_scan.address > hend)
> > > >                         goto skip;
> > > >                 if (khugepaged_scan.address < hstart)
> > >
> > > Likewise, could do round_down() here (just a suggestion)
> >
> > Fine to me.
> >
> > >
> > > > --
> > > > 2.26.3
> > > >
> > > >

Yang Shi June 10, 2022, 4:47 p.m. UTC | #6

On Fri, Jun 10, 2022 at 12:20 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
>
> On 2022/6/7 5:44, Yang Shi wrote:
> > There are couple of places that check whether the vma size is ok for
> > THP or not, they are open coded and duplicate, introduce
> > transhuge_vma_size_ok() helper to do the job.
> >
> > Signed-off-by: Yang Shi <shy828301@gmail.com>
> > ---
> >  include/linux/huge_mm.h | 17 +++++++++++++++++
> >  mm/huge_memory.c        |  5 +----
> >  mm/khugepaged.c         | 12 ++++++------
> >  3 files changed, 24 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > index 648cb3ce7099..a8f61db47f2a 100644
> > --- a/include/linux/huge_mm.h
> > +++ b/include/linux/huge_mm.h
> > @@ -116,6 +116,18 @@ extern struct kobj_attribute shmem_enabled_attr;
> >
> >  extern unsigned long transparent_hugepage_flags;
> >
> > +/*
> > + * The vma size has to be large enough to hold an aligned HPAGE_PMD_SIZE area.
> > + */
> > +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> > +{
> > +     if (round_up(vma->vm_start, HPAGE_PMD_SIZE) <
> > +         (vma->vm_end & HPAGE_PMD_MASK))
> > +             return true;
> > +
> > +     return false;
> > +}
> > +
> >  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> >               unsigned long addr)
> >  {
> > @@ -345,6 +357,11 @@ static inline bool transparent_hugepage_active(struct vm_area_struct *vma)
> >       return false;
> >  }
> >
> > +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> > +{
> > +     return false;
> > +}
> > +
> >  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> >               unsigned long addr)
> >  {
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > index 48182c8fe151..36ada544e494 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -71,10 +71,7 @@ unsigned long huge_zero_pfn __read_mostly = ~0UL;
> >
> >  bool transparent_hugepage_active(struct vm_area_struct *vma)
> >  {
> > -     /* The addr is used to check if the vma size fits */
> > -     unsigned long addr = (vma->vm_end & HPAGE_PMD_MASK) - HPAGE_PMD_SIZE;
> > -
> > -     if (!transhuge_vma_suitable(vma, addr))
>
> There is also pgoff check for file page in transhuge_vma_suitable. Is it ignored
> deliberately?

This has been discussed in the previous threads. The following removal
of transparent_hugepage_active() will restore the behavior.

>
> > +     if (!transhuge_vma_size_ok(vma))
> >               return false;
> >       if (vma_is_anonymous(vma))
> >               return __transparent_hugepage_enabled(vma);
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index 84b9cf4b9be9..d0f8020164fc 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -454,6 +454,9 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
> >                               vma->vm_pgoff, HPAGE_PMD_NR))
> >               return false;
> >
> > +     if (!transhuge_vma_size_ok(vma))
> > +             return false;
> > +
> >       /* Enabled via shmem mount options or sysfs settings. */
> >       if (shmem_file(vma->vm_file))
> >               return shmem_huge_enabled(vma);
> > @@ -512,9 +515,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
> >                         unsigned long vm_flags)
> >  {
> >       if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
> > -         khugepaged_enabled() &&
> > -         (((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
> > -          (vma->vm_end & HPAGE_PMD_MASK))) {
> > +         khugepaged_enabled()) {
> >               if (hugepage_vma_check(vma, vm_flags))
> >                       __khugepaged_enter(vma->vm_mm);
> >       }
>
> After this change, khugepaged_enter_vma is identical to khugepaged_enter. Should one of
> them be removed?

Thanks for catching this. Although the later patch will make them
slightly different (khugepaged_enter() won't check hugepage flag
anymore), but the only user of khugepaged_enter() is page fault, and
it seems not worth keeping both. Will remove khugepaged_enter() in the
next version.

>
> Thanks!
>
> > @@ -2142,10 +2143,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
> >                       progress++;
> >                       continue;
> >               }
> > -             hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
> > +
> > +             hstart = round_up(vma->vm_start, HPAGE_PMD_SIZE);
> >               hend = vma->vm_end & HPAGE_PMD_MASK;
> > -             if (hstart >= hend)
> > -                     goto skip;
> >               if (khugepaged_scan.address > hend)
> >                       goto skip;
> >               if (khugepaged_scan.address < hstart)
> >
>

Yang Shi June 10, 2022, 9:24 p.m. UTC | #7

On Fri, Jun 10, 2022 at 9:38 AM Yang Shi <shy828301@gmail.com> wrote:
>
> On Thu, Jun 9, 2022 at 5:52 PM Zach O'Keefe <zokeefe@google.com> wrote:
> >
> > On Thu, Jun 9, 2022 at 5:08 PM Yang Shi <shy828301@gmail.com> wrote:
> > >
> > > On Thu, Jun 9, 2022 at 3:21 PM Zach O'Keefe <zokeefe@google.com> wrote:
> > > >
> > > > On Mon, Jun 6, 2022 at 2:44 PM Yang Shi <shy828301@gmail.com> wrote:
> > > > >
> > > > > There are couple of places that check whether the vma size is ok for
> > > > > THP or not, they are open coded and duplicate, introduce
> > > > > transhuge_vma_size_ok() helper to do the job.
> > > > >
> > > > > Signed-off-by: Yang Shi <shy828301@gmail.com>
> > > > > ---
> > > > >  include/linux/huge_mm.h | 17 +++++++++++++++++
> > > > >  mm/huge_memory.c        |  5 +----
> > > > >  mm/khugepaged.c         | 12 ++++++------
> > > > >  3 files changed, 24 insertions(+), 10 deletions(-)
> > > > >
> > > > > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > > > > index 648cb3ce7099..a8f61db47f2a 100644
> > > > > --- a/include/linux/huge_mm.h
> > > > > +++ b/include/linux/huge_mm.h
> > > > > @@ -116,6 +116,18 @@ extern struct kobj_attribute shmem_enabled_attr;
> > > > >
> > > > >  extern unsigned long transparent_hugepage_flags;
> > > > >
> > > > > +/*
> > > > > + * The vma size has to be large enough to hold an aligned HPAGE_PMD_SIZE area.
> > > > > + */
> > > > > +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> > > > > +{
> > > > > +       if (round_up(vma->vm_start, HPAGE_PMD_SIZE) <
> > > > > +           (vma->vm_end & HPAGE_PMD_MASK))
> > > > > +               return true;
> > > > > +
> > > > > +       return false;
> > > > > +}
> > > >
> > > > First time coming across round_up() - thanks for that - but for
> > > > symmetry, maybe also use round_down() for the end? No strong opinion -
> > > > just a suggestion given I've just discovered it.
> > >
> > > Yeah, round_down is fine too.
> > >
> > > >
> > > > >  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> > > > >                 unsigned long addr)
> > > > >  {
> > > > > @@ -345,6 +357,11 @@ static inline bool transparent_hugepage_active(struct vm_area_struct *vma)
> > > > >         return false;
> > > > >  }
> > > > >
> > > > > +static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
> > > > > +{
> > > > > +       return false;
> > > > > +}
> > > > > +
> > > > >  static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> > > > >                 unsigned long addr)
> > > > >  {
> > > > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > > > > index 48182c8fe151..36ada544e494 100644
> > > > > --- a/mm/huge_memory.c
> > > > > +++ b/mm/huge_memory.c
> > > > > @@ -71,10 +71,7 @@ unsigned long huge_zero_pfn __read_mostly = ~0UL;
> > > > >
> > > > >  bool transparent_hugepage_active(struct vm_area_struct *vma)
> > > > >  {
> > > > > -       /* The addr is used to check if the vma size fits */
> > > > > -       unsigned long addr = (vma->vm_end & HPAGE_PMD_MASK) - HPAGE_PMD_SIZE;
> > > > > -
> > > > > -       if (!transhuge_vma_suitable(vma, addr))
> > > > > +       if (!transhuge_vma_size_ok(vma))
> > > > >                 return false;
> > > > >         if (vma_is_anonymous(vma))
> > > > >                 return __transparent_hugepage_enabled(vma);
> > > >
> > > > Do we need a check for vma->vm_pgoff alignment here, after
> > > > !vma_is_anonymous(), and now that we don't call
> > > > transhuge_vma_suitable()?
> > >
> > > Actually I was thinking about this too. But the THPeligible bit shown
> > > by smaps is a little bit ambiguous for file vma. The document says:
> > > "THPeligible" indicates whether the mapping is eligible for allocating
> > > THP pages - 1 if true, 0 otherwise.
> > >
> > > Even though it doesn't fulfill the alignment, it is still possible to
> > > get THP allocated, but just can't be PMD mapped. So the old behavior
> > > of THPeligible for file vma seems problematic, or at least doesn't
> > > match the document.
> >
> > I think the term "THP" is used ambiguously. Often, but not always, in
> > the code, folks will go out of their way to specify "hugepage-sized"
> > page vs "pmd-mapped hugepage" - but at least from my experience,
> > external documentation doesn't. Given that THP as a concept doesn't
> > make much sense without the possibility of pmd-mapping, I think
> > "THPeligible here means "pmd mappable". For example, AnonHugePages in
> > smaps means  pmd-mapped anon hugepages.
>
> Yeah, depends on the expectation.

The funny thing is I was the last one who touched the THPeligible. It
seems the document needs to be updated too to make "pmd mappable" more
explicitly.

>
> >
> > That all said - the following patches will delete
> > transparent_hugepage_active() anyways.
>
> Yes, how I could forget this :-( The following removal of
> transparent_hugepage_active() will restore the old behavior.
>
> >
> > > I should elaborate this in the commit log.
> > >
> > > >
> > > > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > > > > index 84b9cf4b9be9..d0f8020164fc 100644
> > > > > --- a/mm/khugepaged.c
> > > > > +++ b/mm/khugepaged.c
> > > > > @@ -454,6 +454,9 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
> > > > >                                 vma->vm_pgoff, HPAGE_PMD_NR))
> > > > >                 return false;
> > > > >
> > > > > +       if (!transhuge_vma_size_ok(vma))
> > > > > +               return false;
> > > > > +
> > > > >         /* Enabled via shmem mount options or sysfs settings. */
> > > > >         if (shmem_file(vma->vm_file))
> > > > >                 return shmem_huge_enabled(vma);
> > > > > @@ -512,9 +515,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
> > > > >                           unsigned long vm_flags)
> > > > >  {
> > > > >         if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
> > > > > -           khugepaged_enabled() &&
> > > > > -           (((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
> > > > > -            (vma->vm_end & HPAGE_PMD_MASK))) {
> > > > > +           khugepaged_enabled()) {
> > > > >                 if (hugepage_vma_check(vma, vm_flags))
> > > > >                         __khugepaged_enter(vma->vm_mm);
> > > > >         }
> > > > > @@ -2142,10 +2143,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
> > > > >                         progress++;
> > > > >                         continue;
> > > > >                 }
> > > > > -               hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
> > > > > +
> > > > > +               hstart = round_up(vma->vm_start, HPAGE_PMD_SIZE);
> > > > >                 hend = vma->vm_end & HPAGE_PMD_MASK;
> > > > > -               if (hstart >= hend)
> > > > > -                       goto skip;
> > > > >                 if (khugepaged_scan.address > hend)
> > > > >                         goto skip;
> > > > >                 if (khugepaged_scan.address < hstart)
> > > >
> > > > Likewise, could do round_down() here (just a suggestion)
> > >
> > > Fine to me.
> > >
> > > >
> > > > > --
> > > > > 2.26.3
> > > > >
> > > > >

[v3,2/7] mm: thp: introduce transhuge_vma_size_ok() helper

Commit Message

Comments

Patch