diff mbox series

[v2,6/9] mm: Make hugetlb mappings go through mm_get_unmapped_area_vmflags

Message ID 20240729091018.2152-7-osalvador@suse.de (mailing list archive)
State New
Headers show
Series Unify hugetlb into arch_get_unmapped_area functions | expand

Commit Message

Oscar Salvador July 29, 2024, 9:10 a.m. UTC
Hugetlb mappings will no longer be special cased but rather go through
the generic mm_get_unmapped_area_vmflags function.
For that to happen, let us remove the .get_unmapped_area from
hugetlbfs_file_operations struct, and hint __get_unmapped_area
that it should not send hugetlb mappings through thp_get_unmapped_area_vmflags
but through mm_get_unmapped_area_vmflags.

Create also a function called hugetlb_mmap_check_and_align() where a
couple of safety checks are being done and the addr is aligned to
the huge page size.
Otherwise we will have to do this in every single function, which
duplicates quite a lot of code.

Signed-off-by: Oscar Salvador <osalvador@suse.de>
---
 fs/hugetlbfs/inode.c    | 22 ++++++++++++++--------
 include/linux/hugetlb.h |  8 +++-----
 mm/mmap.c               | 15 ++++++++++++++-
 3 files changed, 31 insertions(+), 14 deletions(-)

Comments

Lorenzo Stoakes July 31, 2024, 11:02 a.m. UTC | #1
On Mon, Jul 29, 2024 at 11:10:15AM GMT, Oscar Salvador wrote:
> Hugetlb mappings will no longer be special cased but rather go through
> the generic mm_get_unmapped_area_vmflags function.
> For that to happen, let us remove the .get_unmapped_area from
> hugetlbfs_file_operations struct, and hint __get_unmapped_area
> that it should not send hugetlb mappings through thp_get_unmapped_area_vmflags
> but through mm_get_unmapped_area_vmflags.
>
> Create also a function called hugetlb_mmap_check_and_align() where a
> couple of safety checks are being done and the addr is aligned to
> the huge page size.
> Otherwise we will have to do this in every single function, which
> duplicates quite a lot of code.
>
> Signed-off-by: Oscar Salvador <osalvador@suse.de>
> ---
>  fs/hugetlbfs/inode.c    | 22 ++++++++++++++--------
>  include/linux/hugetlb.h |  8 +++-----
>  mm/mmap.c               | 15 ++++++++++++++-
>  3 files changed, 31 insertions(+), 14 deletions(-)
>
> diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
> index 9f6cff356796..5d47a2785a5d 100644
> --- a/fs/hugetlbfs/inode.c
> +++ b/fs/hugetlbfs/inode.c
> @@ -258,15 +258,22 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
>  			pgoff, flags);
>  }
>
> -#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
> -static unsigned long
> -hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
> -			  unsigned long len, unsigned long pgoff,
> -			  unsigned long flags)
> +unsigned long
> +hugetlb_mmap_check_and_align(struct file *file, unsigned long addr,
> +			     unsigned long len, unsigned long flags)
>  {
> -	return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags);
> +	unsigned long addr0 = 0;
> +	struct hstate *h = hstate_file(file);
> +
> +	if (len & ~huge_page_mask(h))
> +		return -EINVAL;
> +	if ((flags & MAP_FIXED) && prepare_hugepage_range(file, addr, len))
> +		return -EINVAL;
> +	if (addr)
> +		addr0 = ALIGN(addr, huge_page_size(h));
> +
> +	return addr0;
>  }
> -#endif
>
>  /*
>   * Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset.
> @@ -1300,7 +1307,6 @@ static const struct file_operations hugetlbfs_file_operations = {
>  	.read_iter		= hugetlbfs_read_iter,
>  	.mmap			= hugetlbfs_file_mmap,
>  	.fsync			= noop_fsync,
> -	.get_unmapped_area	= hugetlb_get_unmapped_area,

This is causing a NULL pointer deref error in the mm self-tests,
specifically hugepage-shm.

This is because in __get_unmapped_area(), you check to see if the file has
an f_ops->get_unampped_area() however ('wonderfully'...) the shm stuff
wraps it, so this will be shm_get_unmapped_area() which then accesses the
underlying hugetlb file and _unconditionally_ calls
f_op->get_unmapped_area(), which you just made NULL and... kaboom :)

You can't even add null check in to this wrapper as at this point
everything assumes that you _can_ get an unmapped area. So yeah, it's kinda
broken.

This makes me think the whole thing is super-delicate and you probably need
to rethink this approach carefully, or least _very carefully_ audit users
of this operation.

>  	.llseek			= default_llseek,
>  	.fallocate		= hugetlbfs_fallocate,
>  	.fop_flags		= FOP_HUGE_PAGES,
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 0ec14e5e0890..1413cdcfdb1a 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -549,11 +549,9 @@ static inline struct hstate *hstate_inode(struct inode *i)
>  }
>  #endif /* !CONFIG_HUGETLBFS */
>
> -#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
> -unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
> -					unsigned long len, unsigned long pgoff,
> -					unsigned long flags);
> -#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */

By doing this you are causing an compilation error (at least on my compiler
with an x86-64 defconfig-based build):

arch/x86/mm/hugetlbpage.c:84:1: error: no previous prototype for
‘hugetlb_get_unmapped_area’ [-Werror=missing-prototypes]
   84 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
      | ^~~~~~~~~~~~~~~~~~~~~~~~~

So you need to rethink how you are doing this. I am not sure why we'd leave
these arch-specific bits around unless you're calling into them somehow
elsewhere? But this error suggests you aren't.

> +unsigned long
> +hugetlb_mmap_check_and_align(struct file *file, unsigned long addr,
> +			     unsigned long len, unsigned long flags);
>
>  unsigned long
>  generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 7b623811d82a..f755d8a298c5 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -849,6 +849,7 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
>  				  unsigned long, unsigned long, unsigned long)
>  				  = NULL;
>
> +	bool is_hugetlb = false;
>  	unsigned long error = arch_mmap_check(addr, len, flags);
>  	if (error)
>  		return error;
> @@ -857,6 +858,9 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
>  	if (len > TASK_SIZE)
>  		return -ENOMEM;
>
> +	if (file && is_file_hugepages(file))
> +		is_hugetlb = true;
> +
>  	if (file) {
>  		if (file->f_op->get_unmapped_area)
>  			get_area = file->f_op->get_unmapped_area;
> @@ -874,11 +878,20 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
>
>  	if (get_area) {
>  		addr = get_area(file, addr, len, pgoff, flags);
> -	} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
> +	} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && !is_hugetlb) {
>  		/* Ensures that larger anonymous mappings are THP aligned. */
>  		addr = thp_get_unmapped_area_vmflags(file, addr, len,
>  						     pgoff, flags, vm_flags);
>  	} else {
> +		/*
> +		 * Consolidate hugepages checks in one place, and also align addr
> +		 * to hugepage size.
> +		 */
> +		if (is_hugetlb) {
> +			addr = hugetlb_mmap_check_and_align(file, addr, len, flags);
> +			if (IS_ERR_VALUE(addr))
> +				return addr;
> +		}
>  		addr = mm_get_unmapped_area_vmflags(current->mm, file, addr, len,
>  						    pgoff, flags, vm_flags);
>  	}
> --
> 2.45.2
>

Andrew - could we drop this from mm-unstable until Oscar can respin? As
it's causing the mm self tests to kernel panic and I'm using them a lot in
my testing of a new series thanks :>)
Oscar Salvador July 31, 2024, 3:08 p.m. UTC | #2
On Wed, Jul 31, 2024 at 12:02:47PM +0100, Lorenzo Stoakes wrote:
> On Mon, Jul 29, 2024 at 11:10:15AM GMT, Oscar Salvador wrote:
> >   * Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset.
> > @@ -1300,7 +1307,6 @@ static const struct file_operations hugetlbfs_file_operations = {
> >  	.read_iter		= hugetlbfs_read_iter,
> >  	.mmap			= hugetlbfs_file_mmap,
> >  	.fsync			= noop_fsync,
> > -	.get_unmapped_area	= hugetlb_get_unmapped_area,
> 
> This is causing a NULL pointer deref error in the mm self-tests,
> specifically hugepage-shm.
> 
> This is because in __get_unmapped_area(), you check to see if the file has
> an f_ops->get_unampped_area() however ('wonderfully'...) the shm stuff
> wraps it, so this will be shm_get_unmapped_area() which then accesses the
> underlying hugetlb file and _unconditionally_ calls
> f_op->get_unmapped_area(), which you just made NULL and... kaboom :)
> 
> You can't even add null check in to this wrapper as at this point
> everything assumes that you _can_ get an unmapped area. So yeah, it's kinda
> broken.
> 
> This makes me think the whole thing is super-delicate and you probably need
> to rethink this approach carefully, or least _very carefully_ audit users
> of this operation.

Thanks for reporting this Lorenzo, highly appreciated.

I will check, but..

> By doing this you are causing an compilation error (at least on my compiler
> with an x86-64 defconfig-based build):
> 
> arch/x86/mm/hugetlbpage.c:84:1: error: no previous prototype for
> ‘hugetlb_get_unmapped_area’ [-Werror=missing-prototypes]
>    84 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
>       | ^~~~~~~~~~~~~~~~~~~~~~~~~

Something is off here.

git grep hugetlb_get_unmapped_area

returns nothing.
After this, arch/x86/mm/hugetlbpage.c should only contain:

  #ifdef CONFIG_X86_64
  bool __init arch_hugetlb_valid_size(unsigned long size)
  {
          if (size == PMD_SIZE)
                  return true;
          else if (size == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES))
                  return true;
          else
                  return false;
  }
  
  #ifdef CONFIG_CONTIG_ALLOC
  static __init int gigantic_pages_init(void)
  {
          /* With compaction or CMA we can allocate gigantic pages at runtime */
          if (boot_cpu_has(X86_FEATURE_GBPAGES))
                  hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
          return 0;
  }
  arch_initcall(gigantic_pages_init);
  #endif
  #endif

so what is going here?
Maybe the series was not properly applied to mm-unstable?

I will have a look.
Oscar Salvador July 31, 2024, 3:11 p.m. UTC | #3
On Wed, Jul 31, 2024 at 05:08:24PM +0200, Oscar Salvador wrote:
> On Wed, Jul 31, 2024 at 12:02:47PM +0100, Lorenzo Stoakes wrote:
> > On Mon, Jul 29, 2024 at 11:10:15AM GMT, Oscar Salvador wrote:
> > >   * Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset.
> > > @@ -1300,7 +1307,6 @@ static const struct file_operations hugetlbfs_file_operations = {
> > >  	.read_iter		= hugetlbfs_read_iter,
> > >  	.mmap			= hugetlbfs_file_mmap,
> > >  	.fsync			= noop_fsync,
> > > -	.get_unmapped_area	= hugetlb_get_unmapped_area,
> > 
> > This is causing a NULL pointer deref error in the mm self-tests,
> > specifically hugepage-shm.
> > 
> > This is because in __get_unmapped_area(), you check to see if the file has
> > an f_ops->get_unampped_area() however ('wonderfully'...) the shm stuff
> > wraps it, so this will be shm_get_unmapped_area() which then accesses the
> > underlying hugetlb file and _unconditionally_ calls
> > f_op->get_unmapped_area(), which you just made NULL and... kaboom :)
> > 
> > You can't even add null check in to this wrapper as at this point
> > everything assumes that you _can_ get an unmapped area. So yeah, it's kinda
> > broken.
> > 
> > This makes me think the whole thing is super-delicate and you probably need
> > to rethink this approach carefully, or least _very carefully_ audit users
> > of this operation.
> 
> Thanks for reporting this Lorenzo, highly appreciated.
> 
> I will check, but..
> 
> > By doing this you are causing an compilation error (at least on my compiler
> > with an x86-64 defconfig-based build):
> > 
> > arch/x86/mm/hugetlbpage.c:84:1: error: no previous prototype for
> > ‘hugetlb_get_unmapped_area’ [-Werror=missing-prototypes]
> >    84 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
> >       | ^~~~~~~~~~~~~~~~~~~~~~~~~
> 
> Something is off here.
> 
> git grep hugetlb_get_unmapped_area

Heh, of course I saw what is wrong after pressing intro.
Ok, with the entire series applied you should not see this problem as
hugetlb_get_unmapped_area gets totally wiped out, but checking out only
this commit indeed throws an error.

I will see how I can reshufle this.

thanks!
Lorenzo Stoakes July 31, 2024, 3:19 p.m. UTC | #4
On Wed, Jul 31, 2024 at 05:08:24PM GMT, Oscar Salvador wrote:
> On Wed, Jul 31, 2024 at 12:02:47PM +0100, Lorenzo Stoakes wrote:
> > On Mon, Jul 29, 2024 at 11:10:15AM GMT, Oscar Salvador wrote:
> > >   * Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset.
> > > @@ -1300,7 +1307,6 @@ static const struct file_operations hugetlbfs_file_operations = {
> > >  	.read_iter		= hugetlbfs_read_iter,
> > >  	.mmap			= hugetlbfs_file_mmap,
> > >  	.fsync			= noop_fsync,
> > > -	.get_unmapped_area	= hugetlb_get_unmapped_area,
> >
> > This is causing a NULL pointer deref error in the mm self-tests,
> > specifically hugepage-shm.
> >
> > This is because in __get_unmapped_area(), you check to see if the file has
> > an f_ops->get_unampped_area() however ('wonderfully'...) the shm stuff
> > wraps it, so this will be shm_get_unmapped_area() which then accesses the
> > underlying hugetlb file and _unconditionally_ calls
> > f_op->get_unmapped_area(), which you just made NULL and... kaboom :)
> >
> > You can't even add null check in to this wrapper as at this point
> > everything assumes that you _can_ get an unmapped area. So yeah, it's kinda
> > broken.
> >
> > This makes me think the whole thing is super-delicate and you probably need
> > to rethink this approach carefully, or least _very carefully_ audit users
> > of this operation.
>
> Thanks for reporting this Lorenzo, highly appreciated.
>

No problem :)

> I will check, but..

Yeah this is obviously the priority.

>
> > By doing this you are causing an compilation error (at least on my compiler
> > with an x86-64 defconfig-based build):
> >
> > arch/x86/mm/hugetlbpage.c:84:1: error: no previous prototype for
> > ‘hugetlb_get_unmapped_area’ [-Werror=missing-prototypes]
> >    84 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
> >       | ^~~~~~~~~~~~~~~~~~~~~~~~~
>
> Something is off here.
>
> git grep hugetlb_get_unmapped_area
>
> returns nothing.

Yeah this is at commit aee8efc95fc2 ("mm: make hugetlb mappings go through
mm_get_unmapped_area_vmflags").

If you:

git checkout aee8efc95fc2
git grep hugetlb_get_unmapped_area

You'll see it.

I'm guessing you remove this in future commits, but the kernel must be able
to build at every revision so we can bisect (I found this issue through a
bisect and had to fix this up to check).

A trivial fix is just to provide the prototype immediately prior to the
function decl, however the more correct solution is probably to do the
removals at the same time.

> After this, arch/x86/mm/hugetlbpage.c should only contain:
>
>   #ifdef CONFIG_X86_64
>   bool __init arch_hugetlb_valid_size(unsigned long size)
>   {
>           if (size == PMD_SIZE)
>                   return true;
>           else if (size == PUD_SIZE && boot_cpu_has(X86_FEATURE_GBPAGES))
>                   return true;
>           else
>                   return false;
>   }
>
>   #ifdef CONFIG_CONTIG_ALLOC
>   static __init int gigantic_pages_init(void)
>   {
>           /* With compaction or CMA we can allocate gigantic pages at runtime */
>           if (boot_cpu_has(X86_FEATURE_GBPAGES))
>                   hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
>           return 0;
>   }
>   arch_initcall(gigantic_pages_init);
>   #endif
>   #endif
>
> so what is going here?
> Maybe the series was not properly applied to mm-unstable?
>
> I will have a look.

I see this being removed in commit 631dc86d2f95 ("mm: drop
hugetlb_get_unmapped_area{_*} functions") which comes after aee8efc95fc2,
so basically I think you should squash the two... or at least find a way to
adjust them so this error doesn't arise.

This bit is just a bit of a slightly nitty cleanup to make sure things
build at every commit, the first issue is the really key one, just needs
some tweaking to deal with the frankly bloody horrible SHM stuff... Do not
blame you for missing that one!

Cheers :)

>
>
> --
> Oscar Salvador
> SUSE Labs
Oscar Salvador July 31, 2024, 4:04 p.m. UTC | #5
On Wed, Jul 31, 2024 at 04:19:09PM +0100, Lorenzo Stoakes wrote:
> Yeah this is at commit aee8efc95fc2 ("mm: make hugetlb mappings go through
> mm_get_unmapped_area_vmflags").
> 
> If you:
> 
> git checkout aee8efc95fc2
> git grep hugetlb_get_unmapped_area
> 
> You'll see it.
> 
> I'm guessing you remove this in future commits, but the kernel must be able
> to build at every revision so we can bisect (I found this issue through a
> bisect and had to fix this up to check).
> 
> A trivial fix is just to provide the prototype immediately prior to the
> function decl, however the more correct solution is probably to do the
> removals at the same time.

Yeah, I just squashed the removal commit and this one.

> This bit is just a bit of a slightly nitty cleanup to make sure things
> build at every commit, the first issue is the really key one, just needs
> some tweaking to deal with the frankly bloody horrible SHM stuff... Do not
> blame you for missing that one!

I did not check closely yet, but are blowing up in:

 if (shmem_huge != SHMEM_HUGE_FORCE) {
         ... 
	 if (file) {
		 VM_BUG_ON(file->f_op != &shmem_file_operations)

 ?
Lorenzo Stoakes July 31, 2024, 4:15 p.m. UTC | #6
On Wed, Jul 31, 2024 at 06:04:04PM GMT, Oscar Salvador wrote:
> On Wed, Jul 31, 2024 at 04:19:09PM +0100, Lorenzo Stoakes wrote:
> > Yeah this is at commit aee8efc95fc2 ("mm: make hugetlb mappings go through
> > mm_get_unmapped_area_vmflags").
> >
> > If you:
> >
> > git checkout aee8efc95fc2
> > git grep hugetlb_get_unmapped_area
> >
> > You'll see it.
> >
> > I'm guessing you remove this in future commits, but the kernel must be able
> > to build at every revision so we can bisect (I found this issue through a
> > bisect and had to fix this up to check).
> >
> > A trivial fix is just to provide the prototype immediately prior to the
> > function decl, however the more correct solution is probably to do the
> > removals at the same time.
>
> Yeah, I just squashed the removal commit and this one.
>
> > This bit is just a bit of a slightly nitty cleanup to make sure things
> > build at every commit, the first issue is the really key one, just needs
> > some tweaking to deal with the frankly bloody horrible SHM stuff... Do not
> > blame you for missing that one!
>
> I did not check closely yet, but are blowing up in:
>
>  if (shmem_huge != SHMEM_HUGE_FORCE) {
>          ...
> 	 if (file) {
> 		 VM_BUG_ON(file->f_op != &shmem_file_operations)
>
>  ?

I've not got the vm debug on in my build, so it's blowing up here for me:

static unsigned long shm_get_unmapped_area(struct file *file,
	unsigned long addr, unsigned long len, unsigned long pgoff,
	unsigned long flags)
{
	struct shm_file_data *sfd = shm_file_data(file);

	return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
						pgoff, flags);
}

Notice that that doesn't check whether sfd->file->f_op->get_unmapped_area
is NULL.

So since you remove this from the f_ops, it causes a NULL pointer deref.

In __get_unmapped_area() you have:

	if (file) {
		if (file->f_op->get_unmapped_area)
			get_area = file->f_op->get_unmapped_area;

...

	if (get_area) {
		addr = get_area(file, addr, len, pgoff, flags);

Now since you are dealing with a shm file that has shm_file_operations

static const struct file_operations shm_file_operations = {
..
	.get_unmapped_area	= shm_get_unmapped_area,
...
};

Then this get_area() is invoked, which calls shm_get_unmapped_area(), which
calls f_op->get_unmapped_area() on your hugetlbfs_file_operations object
which you just deleted and it's NULL.

This is why you have to be super careful here, there's clearly stuff out
there that assumes that this can't happen, which you need to track down.

A quick grep however _suggests_ this might be the one landmine place. But
you need to find a smart way to deal with this.

>
>
> --
> Oscar Salvador
> SUSE Labs
Andrew Morton July 31, 2024, 8:03 p.m. UTC | #7
On Wed, 31 Jul 2024 17:11:34 +0200 Oscar Salvador <osalvador@suse.de> wrote:

> > git grep hugetlb_get_unmapped_area
> 
> Heh, of course I saw what is wrong after pressing intro.
> Ok, with the entire series applied you should not see this problem as
> hugetlb_get_unmapped_area gets totally wiped out, but checking out only
> this commit indeed throws an error.
> 
> I will see how I can reshufle this.

I dropped the v2 series, so reshuffle away.
Oscar Salvador Aug. 1, 2024, 8:14 a.m. UTC | #8
On Wed, Jul 31, 2024 at 05:15:41PM +0100, Lorenzo Stoakes wrote:
> I've not got the vm debug on in my build, so it's blowing up here for me:
> 
> static unsigned long shm_get_unmapped_area(struct file *file,
> 	unsigned long addr, unsigned long len, unsigned long pgoff,
> 	unsigned long flags)
> {
> 	struct shm_file_data *sfd = shm_file_data(file);
> 
> 	return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
> 						pgoff, flags);
> }
> 
> Notice that that doesn't check whether sfd->file->f_op->get_unmapped_area
> is NULL.

I see now, thanks.

> So since you remove this from the f_ops, it causes a NULL pointer deref.
...
> static const struct file_operations shm_file_operations = {
> ..
> 	.get_unmapped_area	= shm_get_unmapped_area,
> ...
> };
> 
> Then this get_area() is invoked, which calls shm_get_unmapped_area(), which
> calls f_op->get_unmapped_area() on your hugetlbfs_file_operations object
> which you just deleted and it's NULL.
> 
> This is why you have to be super careful here, there's clearly stuff out
> there that assumes that this can't happen, which you need to track down.
> 
> A quick grep however _suggests_ this might be the one landmine place. But
> you need to find a smart way to deal with this.

Probably, the most straightforward way to fix this is to instead of
setting .get_unmapped_area to NULL for hugetlbfs_file_operations, would
be to have it re-defined like:

 .get_unmapped_area = mm_get_unmapped_area_vmflags

Which is what we call after this patchset.
So no more things have to tweaked.

On a more correct way, __maybe__ have something like:


 diff --git a/ipc/shm.c b/ipc/shm.c
 index 3e3071252dac..222dca8a3716 100644
 --- a/ipc/shm.c
 +++ b/ipc/shm.c
 @@ -648,8 +648,11 @@ static unsigned long shm_get_unmapped_area(struct file *file,
  {
  	struct shm_file_data *sfd = shm_file_data(file);
  
 -	return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
 +	if (sfd->file->f_op->get_unmapped_area)
 +		return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
  						pgoff, flags);
 +
 +	return mm_get_unmapped_area_vmflags(sfd->file, addr, len, pgoff, flags);
  }
  
  static const struct file_operations shm_file_operations = {

 
Still unsure about which approach looks more correct though.
Lorenzo Stoakes Aug. 1, 2024, 10:11 a.m. UTC | #9
On Thu, Aug 01, 2024 at 10:14:26AM GMT, Oscar Salvador wrote:
> On Wed, Jul 31, 2024 at 05:15:41PM +0100, Lorenzo Stoakes wrote:
> > I've not got the vm debug on in my build, so it's blowing up here for me:
> >
> > static unsigned long shm_get_unmapped_area(struct file *file,
> > 	unsigned long addr, unsigned long len, unsigned long pgoff,
> > 	unsigned long flags)
> > {
> > 	struct shm_file_data *sfd = shm_file_data(file);
> >
> > 	return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
> > 						pgoff, flags);
> > }
> >
> > Notice that that doesn't check whether sfd->file->f_op->get_unmapped_area
> > is NULL.
>
> I see now, thanks.
>
> > So since you remove this from the f_ops, it causes a NULL pointer deref.
> ...
> > static const struct file_operations shm_file_operations = {
> > ..
> > 	.get_unmapped_area	= shm_get_unmapped_area,
> > ...
> > };
> >
> > Then this get_area() is invoked, which calls shm_get_unmapped_area(), which
> > calls f_op->get_unmapped_area() on your hugetlbfs_file_operations object
> > which you just deleted and it's NULL.
> >
> > This is why you have to be super careful here, there's clearly stuff out
> > there that assumes that this can't happen, which you need to track down.
> >
> > A quick grep however _suggests_ this might be the one landmine place. But
> > you need to find a smart way to deal with this.
>
> Probably, the most straightforward way to fix this is to instead of
> setting .get_unmapped_area to NULL for hugetlbfs_file_operations, would
> be to have it re-defined like:
>
>  .get_unmapped_area = mm_get_unmapped_area_vmflags

I prefer this at a glance.

>
> Which is what we call after this patchset.
> So no more things have to tweaked.
>
> On a more correct way, __maybe__ have something like:
>
>
>  diff --git a/ipc/shm.c b/ipc/shm.c
>  index 3e3071252dac..222dca8a3716 100644
>  --- a/ipc/shm.c
>  +++ b/ipc/shm.c
>  @@ -648,8 +648,11 @@ static unsigned long shm_get_unmapped_area(struct file *file,
>   {
>   	struct shm_file_data *sfd = shm_file_data(file);
>
>  -	return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
>  +	if (sfd->file->f_op->get_unmapped_area)
>  +		return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
>   						pgoff, flags);
>  +
>  +	return mm_get_unmapped_area_vmflags(sfd->file, addr, len, pgoff, flags);
>   }
>
>   static const struct file_operations shm_file_operations = {
>

I hate this to be honest, it's another 'we just have to remember to call an
arbitrary function' situation (why here and not elsewhere?) and
perpetuating the horrible if (hugetlb) { ... } approach to things.

I mean the shm code is _hateful_ anyway, but yeah I really really don't
like this.

I'd quite like us to add a check here for that function being NULL though,
I was mistaken in my previous reply saying we can't do anything here,
actually you can return an error, and so I'd prefer for us to return an
error in that case.

>
> Still unsure about which approach looks more correct though.

I think I've made my point of view clear fwiw at least ;)

>
> --
> Oscar Salvador
> SUSE Labs
kernel test robot Aug. 5, 2024, 9:03 p.m. UTC | #10
Hi Oscar,

kernel test robot noticed the following build warnings:

[auto build test WARNING on s390/features]
[also build test WARNING on akpm-mm/mm-everything powerpc/next powerpc/fixes deller-parisc/for-next arnd-asm-generic/master linus/master v6.11-rc2 next-20240805]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Oscar-Salvador/mm-mmap-Teach-generic_get_unmapped_area-_topdown-to-handle-hugetlb-mappings/20240729-171449
base:   https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git features
patch link:    https://lore.kernel.org/r/20240729091018.2152-7-osalvador%40suse.de
patch subject: [PATCH v2 6/9] mm: Make hugetlb mappings go through mm_get_unmapped_area_vmflags
config: x86_64-randconfig-012-20240802 (https://download.01.org/0day-ci/archive/20240806/202408060456.yBmmX9hr-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240806/202408060456.yBmmX9hr-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408060456.yBmmX9hr-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> arch/x86/mm/hugetlbpage.c:84:1: warning: no previous prototype for 'hugetlb_get_unmapped_area' [-Wmissing-prototypes]
      84 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
         | ^~~~~~~~~~~~~~~~~~~~~~~~~


vim +/hugetlb_get_unmapped_area +84 arch/x86/mm/hugetlbpage.c

^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16   82  
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16   83  unsigned long
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  @84  hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16   85  		unsigned long len, unsigned long pgoff, unsigned long flags)
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16   86  {
39c11e6c05b7fe arch/x86/mm/hugetlbpage.c  Andi Kleen             2008-07-23   87  	struct hstate *h = hstate_file(file);
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16   88  	struct mm_struct *mm = current->mm;
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16   89  	struct vm_area_struct *vma;
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16   90  
39c11e6c05b7fe arch/x86/mm/hugetlbpage.c  Andi Kleen             2008-07-23   91  	if (len & ~huge_page_mask(h))
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16   92  		return -EINVAL;
44b04912fa7248 arch/x86/mm/hugetlbpage.c  Kirill A. Shutemov     2017-07-17   93  
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16   94  	if (len > TASK_SIZE)
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16   95  		return -ENOMEM;
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16   96  
1e0f25dbf2464d arch/x86/mm/hugetlbpage.c  Kirill A. Shutemov     2017-11-15   97  	/* No address checking. See comment at mmap_address_hint_valid() */
5a8130f2b186ac arch/i386/mm/hugetlbpage.c Benjamin Herrenschmidt 2007-05-06   98  	if (flags & MAP_FIXED) {
a5516438959d90 arch/x86/mm/hugetlbpage.c  Andi Kleen             2008-07-23   99  		if (prepare_hugepage_range(file, addr, len))
5a8130f2b186ac arch/i386/mm/hugetlbpage.c Benjamin Herrenschmidt 2007-05-06  100  			return -EINVAL;
5a8130f2b186ac arch/i386/mm/hugetlbpage.c Benjamin Herrenschmidt 2007-05-06  101  		return addr;
5a8130f2b186ac arch/i386/mm/hugetlbpage.c Benjamin Herrenschmidt 2007-05-06  102  	}
5a8130f2b186ac arch/i386/mm/hugetlbpage.c Benjamin Herrenschmidt 2007-05-06  103  
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  104  	if (addr) {
1e0f25dbf2464d arch/x86/mm/hugetlbpage.c  Kirill A. Shutemov     2017-11-15  105  		addr &= huge_page_mask(h);
1e0f25dbf2464d arch/x86/mm/hugetlbpage.c  Kirill A. Shutemov     2017-11-15  106  		if (!mmap_address_hint_valid(addr, len))
1e0f25dbf2464d arch/x86/mm/hugetlbpage.c  Kirill A. Shutemov     2017-11-15  107  			goto get_unmapped_area;
1e0f25dbf2464d arch/x86/mm/hugetlbpage.c  Kirill A. Shutemov     2017-11-15  108  
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  109  		vma = find_vma(mm, addr);
1e0f25dbf2464d arch/x86/mm/hugetlbpage.c  Kirill A. Shutemov     2017-11-15  110  		if (!vma || addr + len <= vm_start_gap(vma))
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  111  			return addr;
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  112  	}
1e0f25dbf2464d arch/x86/mm/hugetlbpage.c  Kirill A. Shutemov     2017-11-15  113  
1e0f25dbf2464d arch/x86/mm/hugetlbpage.c  Kirill A. Shutemov     2017-11-15  114  get_unmapped_area:
529ce23a764f25 arch/x86/mm/hugetlbpage.c  Rick Edgecombe         2024-03-25  115  	if (!test_bit(MMF_TOPDOWN, &mm->flags))
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  116  		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  117  				pgoff, flags);
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  118  	else
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  119  		return hugetlb_get_unmapped_area_topdown(file, addr, len,
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  120  				pgoff, flags);
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  121  }
fd8526ad14c182 arch/x86/mm/hugetlbpage.c  Kirill A. Shutemov     2013-11-19  122  #endif /* CONFIG_HUGETLB_PAGE */
^1da177e4c3f41 arch/i386/mm/hugetlbpage.c Linus Torvalds         2005-04-16  123
kernel test robot Aug. 11, 2024, 1:23 p.m. UTC | #11
Hello,

kernel test robot noticed "BUG:kernel_NULL_pointer_dereference,address" on:

commit: 535f03fb3da2b7b2fe5089ee5d1a291774a298e3 ("[PATCH v2 6/9] mm: Make hugetlb mappings go through mm_get_unmapped_area_vmflags")
url: https://github.com/intel-lab-lkp/linux/commits/Oscar-Salvador/mm-mmap-Teach-generic_get_unmapped_area-_topdown-to-handle-hugetlb-mappings/20240729-171449
base: https://git.kernel.org/cgit/linux/kernel/git/s390/linux.git features
patch link: https://lore.kernel.org/all/20240729091018.2152-7-osalvador@suse.de/
patch subject: [PATCH v2 6/9] mm: Make hugetlb mappings go through mm_get_unmapped_area_vmflags

in testcase: trinity
version: trinity-static-x86_64-x86_64-1c734c75-1_2020-01-06
with following parameters:

	runtime: 600s



compiler: gcc-12
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)


+---------------------------------------------+------------+------------+
|                                             | ec3b0c2006 | 535f03fb3d |
+---------------------------------------------+------------+------------+
| boot_successes                              | 6          | 0          |
| boot_failures                               | 0          | 9          |
| BUG:kernel_NULL_pointer_dereference,address | 0          | 9          |
| Oops                                        | 0          | 9          |
| Kernel_panic-not_syncing:Fatal_exception    | 0          | 9          |
+---------------------------------------------+------------+------------+


If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202408112137.e013a399-oliver.sang@intel.com


[   38.976763][  T448] BUG: kernel NULL pointer dereference, address: 0000000000000000
[   38.977518][  T448] #PF: supervisor instruction fetch in kernel mode
[   38.977981][  T448] #PF: error_code(0x0010) - not-present page
[   38.978411][  T448] PGD 800000012c3cc067 P4D 800000012c3cc067 PUD 12c3cd067 PMD 0
[   38.978949][  T448] Oops: Oops: 0010 [#1] PREEMPT SMP PTI
[   38.979343][  T448] CPU: 1 UID: 0 PID: 448 Comm: trinity Not tainted 6.10.0-12075-g535f03fb3da2 #1
[   38.979990][  T448] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[   38.980725][  T448] RIP: 0010:0x0
[ 38.980993][ T448] Code: Unable to access opcode bytes at 0xffffffffffffffd6.

Code starting with the faulting instruction
===========================================
[   38.981530][  T448] RSP: 0018:ffffc9000108fb58 EFLAGS: 00010246
[   38.981977][  T448] RAX: 0000000000000000 RBX: 0000000000200000 RCX: 0000000000000000
[   38.982550][  T448] RDX: 0000000000200000 RSI: 0000000000000000 RDI: ffff88812ce36600
[   38.983124][  T448] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000073
[   38.983692][  T448] R10: fffffffffffffff4 R11: 0000000000000000 R12: ffff888319920000
[   38.984258][  T448] R13: 0000000000000003 R14: ffff88831996fe00 R15: ffff888114c42000
[   38.984826][  T448] FS:  000000000109a880(0000) GS:ffff88842fc00000(0000) knlGS:0000000000000000
[   38.985465][  T448] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   38.985942][  T448] CR2: ffffffffffffffd6 CR3: 0000000114c52000 CR4: 00000000000406b0
[   38.986515][  T448] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   38.988705][  T448] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   38.989257][  T448] Call Trace:
[   38.989493][  T448]  <TASK>
[ 38.989709][ T448] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434) 
[ 38.990001][ T448] ? page_fault_oops (arch/x86/mm/fault.c:715) 
[ 38.990343][ T448] ? exc_page_fault (arch/x86/include/asm/irqflags.h:26 arch/x86/include/asm/irqflags.h:87 arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539) 
[ 38.990680][ T448] ? asm_exc_page_fault (arch/x86/include/asm/idtentry.h:623) 
[ 38.991046][ T448] __get_unmapped_area (mm/mmap.c:1932) 
[ 38.991458][ T448] ? find_held_lock (kernel/locking/lockdep.c:5249) 
[ 38.991796][ T448] ? do_shmat (include/linux/mmap_lock.h:122 ipc/shm.c:1643) 
[ 38.992098][ T448] do_mmap (mm/mmap.c:1325) 
[ 38.992394][ T448] ? do_shmat (include/linux/mmap_lock.h:122 ipc/shm.c:1643) 
[ 38.992712][ T448] do_shmat (ipc/shm.c:1658) 
[ 38.993016][ T448] __x64_sys_shmat (ipc/shm.c:1694 ipc/shm.c:1688 ipc/shm.c:1688) 
[ 38.993337][ T448] do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) 
[ 38.993670][ T448] ? lock_acquire (kernel/locking/lockdep.c:466 kernel/locking/lockdep.c:5761 kernel/locking/lockdep.c:5724) 
[ 38.994000][ T448] ? kvm_sched_clock_read (arch/x86/kernel/kvmclock.c:91) 
[ 38.994366][ T448] ? local_clock_noinstr (kernel/sched/clock.c:301) 
[ 38.994720][ T448] ? local_clock (arch/x86/include/asm/preempt.h:94 kernel/sched/clock.c:316) 
[ 38.995061][ T448] ? __lock_release+0x11a/0x290 
[ 38.995511][ T448] ? lock_release (kernel/locking/lockdep.c:466 kernel/locking/lockdep.c:5782) 
[ 38.995833][ T448] ? syscall_exit_to_user_mode_prepare (kernel/entry/common.c:199 (discriminator 1)) 
[ 38.996289][ T448] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4299 kernel/locking/lockdep.c:4358) 
[ 38.996715][ T448] ? syscall_exit_to_user_mode (arch/x86/include/asm/processor.h:702 arch/x86/include/asm/entry-common.h:91 include/linux/entry-common.h:364 kernel/entry/common.c:220) 
[ 38.997105][ T448] ? do_syscall_64 (arch/x86/entry/common.c:102) 
[ 38.997436][ T448] ? do_shmat (ipc/shm.c:1680) 
[ 38.997749][ T448] ? syscall_exit_to_user_mode_prepare (kernel/entry/common.c:199 (discriminator 1)) 
[ 38.998189][ T448] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4299 kernel/locking/lockdep.c:4358) 
[ 38.998606][ T448] ? syscall_exit_to_user_mode (arch/x86/include/asm/processor.h:702 arch/x86/include/asm/entry-common.h:91 include/linux/entry-common.h:364 kernel/entry/common.c:220) 
[ 39.000911][ T448] ? do_syscall_64 (arch/x86/entry/common.c:102) 
[ 39.001243][ T448] ? do_syscall_64 (arch/x86/entry/common.c:102) 
[ 39.001574][ T448] ? syscall_exit_to_user_mode_prepare (kernel/entry/common.c:199 (discriminator 1)) 
[ 39.002016][ T448] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4299 kernel/locking/lockdep.c:4358) 
[ 39.002445][ T448] ? syscall_exit_to_user_mode (arch/x86/include/asm/processor.h:702 arch/x86/include/asm/entry-common.h:91 include/linux/entry-common.h:364 kernel/entry/common.c:220) 
[ 39.002840][ T448] ? do_syscall_64 (arch/x86/entry/common.c:102) 
[ 39.003201][ T448] ? syscall_exit_to_user_mode_prepare (kernel/entry/common.c:199 (discriminator 1)) 
[ 39.003644][ T448] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4299 kernel/locking/lockdep.c:4358) 
[ 39.004049][ T448] ? syscall_exit_to_user_mode (arch/x86/include/asm/processor.h:702 arch/x86/include/asm/entry-common.h:91 include/linux/entry-common.h:364 kernel/entry/common.c:220) 
[ 39.004443][ T448] ? do_syscall_64 (arch/x86/entry/common.c:102) 
[ 39.004767][ T448] ? do_syscall_64 (arch/x86/entry/common.c:102) 
[ 39.005087][ T448] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) 
[   39.005501][  T448] RIP: 0033:0x4648b7
[ 39.005775][ T448] Code: 00 66 90 b8 29 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 5d 46 00 00 c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 b8 1e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 0f 83 3d 46 00 00 c3 66 2e 0f 1f 84 00 00 00 00
All code
========
   0:	00 66 90             	add    %ah,-0x70(%rsi)
   3:	b8 29 00 00 00       	mov    $0x29,%eax
   8:	0f 05                	syscall
   a:	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax
  10:	0f 83 5d 46 00 00    	jae    0x4673
  16:	c3                   	ret
  17:	66 2e 0f 1f 84 00 00 	cs nopw 0x0(%rax,%rax,1)
  1e:	00 00 00 
  21:	66 90                	xchg   %ax,%ax
  23:	b8 1e 00 00 00       	mov    $0x1e,%eax
  28:	0f 05                	syscall
  2a:*	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax		<-- trapping instruction
  30:	0f 83 3d 46 00 00    	jae    0x4673
  36:	c3                   	ret
  37:	66                   	data16
  38:	2e                   	cs
  39:	0f                   	.byte 0xf
  3a:	1f                   	(bad)
  3b:	84 00                	test   %al,(%rax)
  3d:	00 00                	add    %al,(%rax)
	...

Code starting with the faulting instruction
===========================================
   0:	48 3d 01 f0 ff ff    	cmp    $0xfffffffffffff001,%rax
   6:	0f 83 3d 46 00 00    	jae    0x4649
   c:	c3                   	ret
   d:	66                   	data16
   e:	2e                   	cs
   f:	0f                   	.byte 0xf
  10:	1f                   	(bad)
  11:	84 00                	test   %al,(%rax)
  13:	00 00                	add    %al,(%rax)
	...
[   39.007086][  T448] RSP: 002b:00007ffe45cb7dc8 EFLAGS: 00000246 ORIG_RAX: 000000000000001e
[   39.007608][  T448] RAX: ffffffffffffffda RBX: 0000000000007000 RCX: 00000000004648b7
[   39.008147][  T448] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000002
[   39.008683][  T448] RBP: 00007ffe45cb7df0 R08: 00000000010975f0 R09: 000000000109a880
[   39.009185][  T448] R10: ffffffffffffffff R11: 0000000000000246 R12: 0000000000001000
[   39.009547][  T448] R13: 0000000000000002 R14: 00000000010b0e20 R15: 0000000054001fb0
[   39.009912][  T448]  </TASK>
[   39.010094][  T448] Modules linked in: polyval_clmulni polyval_generic ghash_clmulni_intel intel_agp intel_gtt
[   39.010801][  T448] CR2: 0000000000000000
[   39.011091][  T448] ---[ end trace 0000000000000000 ]---
[   39.011399][  T448] RIP: 0010:0x0
[ 39.011649][ T448] Code: Unable to access opcode bytes at 0xffffffffffffffd6.

Code starting with the faulting instruction
===========================================


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20240811/202408112137.e013a399-oliver.sang@intel.com
diff mbox series

Patch

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9f6cff356796..5d47a2785a5d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -258,15 +258,22 @@  generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 			pgoff, flags);
 }
 
-#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-static unsigned long
-hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-			  unsigned long len, unsigned long pgoff,
-			  unsigned long flags)
+unsigned long
+hugetlb_mmap_check_and_align(struct file *file, unsigned long addr,
+			     unsigned long len, unsigned long flags)
 {
-	return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags);
+	unsigned long addr0 = 0;
+	struct hstate *h = hstate_file(file);
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if ((flags & MAP_FIXED) && prepare_hugepage_range(file, addr, len))
+		return -EINVAL;
+	if (addr)
+		addr0 = ALIGN(addr, huge_page_size(h));
+
+	return addr0;
 }
-#endif
 
 /*
  * Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset.
@@ -1300,7 +1307,6 @@  static const struct file_operations hugetlbfs_file_operations = {
 	.read_iter		= hugetlbfs_read_iter,
 	.mmap			= hugetlbfs_file_mmap,
 	.fsync			= noop_fsync,
-	.get_unmapped_area	= hugetlb_get_unmapped_area,
 	.llseek			= default_llseek,
 	.fallocate		= hugetlbfs_fallocate,
 	.fop_flags		= FOP_HUGE_PAGES,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0ec14e5e0890..1413cdcfdb1a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -549,11 +549,9 @@  static inline struct hstate *hstate_inode(struct inode *i)
 }
 #endif /* !CONFIG_HUGETLBFS */
 
-#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-					unsigned long len, unsigned long pgoff,
-					unsigned long flags);
-#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
+unsigned long
+hugetlb_mmap_check_and_align(struct file *file, unsigned long addr,
+			     unsigned long len, unsigned long flags);
 
 unsigned long
 generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
diff --git a/mm/mmap.c b/mm/mmap.c
index 7b623811d82a..f755d8a298c5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -849,6 +849,7 @@  __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 				  unsigned long, unsigned long, unsigned long)
 				  = NULL;
 
+	bool is_hugetlb = false;
 	unsigned long error = arch_mmap_check(addr, len, flags);
 	if (error)
 		return error;
@@ -857,6 +858,9 @@  __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 	if (len > TASK_SIZE)
 		return -ENOMEM;
 
+	if (file && is_file_hugepages(file))
+		is_hugetlb = true;
+
 	if (file) {
 		if (file->f_op->get_unmapped_area)
 			get_area = file->f_op->get_unmapped_area;
@@ -874,11 +878,20 @@  __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 
 	if (get_area) {
 		addr = get_area(file, addr, len, pgoff, flags);
-	} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+	} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && !is_hugetlb) {
 		/* Ensures that larger anonymous mappings are THP aligned. */
 		addr = thp_get_unmapped_area_vmflags(file, addr, len,
 						     pgoff, flags, vm_flags);
 	} else {
+		/*
+		 * Consolidate hugepages checks in one place, and also align addr
+		 * to hugepage size.
+		 */
+		if (is_hugetlb) {
+			addr = hugetlb_mmap_check_and_align(file, addr, len, flags);
+			if (IS_ERR_VALUE(addr))
+				return addr;
+		}
 		addr = mm_get_unmapped_area_vmflags(current->mm, file, addr, len,
 						    pgoff, flags, vm_flags);
 	}