diff mbox

[v1,3/4] arm64: Fix the page leak in pud/pmd_set_huge

Message ID 1521017305-28518-4-git-send-email-cpandya@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Chintan Pandya March 14, 2018, 8:48 a.m. UTC
While setting huge page, we need to take care of
previously existing next level mapping. Since,
we are going to overrite previous mapping, the
only reference to next level page table will get
lost and the next level page table will be zombie,
occupying space forever. So, free it before
overriding.

Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
---
 arch/arm64/mm/mmu.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

Comments

Marc Zyngier March 14, 2018, 10:35 a.m. UTC | #1
On 14/03/18 08:48, Chintan Pandya wrote:
> While setting huge page, we need to take care of
> previously existing next level mapping. Since,
> we are going to overrite previous mapping, the
> only reference to next level page table will get
> lost and the next level page table will be zombie,
> occupying space forever. So, free it before
> overriding.
> 
> Signed-off-by: Chintan Pandya <cpandya@codeaurora.org>
> ---
>  arch/arm64/mm/mmu.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 8c704f1..c0df264 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -32,7 +32,7 @@
>  #include <linux/io.h>
>  #include <linux/mm.h>
>  #include <linux/vmalloc.h>
> -
> +#include <linux/hugetlb.h>
>  #include <asm/barrier.h>
>  #include <asm/cputype.h>
>  #include <asm/fixmap.h>
> @@ -45,6 +45,7 @@
>  #include <asm/memblock.h>
>  #include <asm/mmu_context.h>
>  #include <asm/ptdump.h>
> +#include <asm/page.h>
>  
>  #define NO_BLOCK_MAPPINGS	BIT(0)
>  #define NO_CONT_MAPPINGS	BIT(1)
> @@ -939,6 +940,9 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
>  		return 0;
>  
>  	BUG_ON(phys & ~PUD_MASK);
> +	if (pud_val(*pud) && !pud_huge(*pud))
> +		free_page((unsigned long)__va(pud_val(*pud)));
> +

This is absolutely scary. Isn't this page still referenced in the page
tables (assuming patch 4 has been applied too)?

>  	set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
>  	return 1;
>  }
> @@ -953,6 +957,9 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
>  		return 0;
>  
>  	BUG_ON(phys & ~PMD_MASK);
> +	if (pmd_val(*pmd) && !pmd_huge(*pmd))
> +		free_page((unsigned long)__va(pmd_val(*pmd)));
> +
>  	set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
>  	return 1;
>  }
> 

Thanks,

	M.
Mark Rutland March 14, 2018, 10:53 a.m. UTC | #2
On Wed, Mar 14, 2018 at 02:18:24PM +0530, Chintan Pandya wrote:
> While setting huge page, we need to take care of
> previously existing next level mapping. Since,
> we are going to overrite previous mapping, the
> only reference to next level page table will get
> lost and the next level page table will be zombie,
> occupying space forever. So, free it before
> overriding.

> @@ -939,6 +940,9 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
>  		return 0;
>  
>  	BUG_ON(phys & ~PUD_MASK);
> +	if (pud_val(*pud) && !pud_huge(*pud))
> +		free_page((unsigned long)__va(pud_val(*pud)));
> +
>  	set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
>  	return 1;
>  }
> @@ -953,6 +957,9 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
>  		return 0;
>  
>  	BUG_ON(phys & ~PMD_MASK);
> +	if (pmd_val(*pmd) && !pmd_huge(*pmd))
> +		free_page((unsigned long)__va(pmd_val(*pmd)));
> +

As Marc noted, (assuming the subsequent revert is applied) in both of
these cases, these tables are still live, and thus it is not safe to
free them.

Consider that immediately after freeing the pages, they may get
re-allocated elsewhere, where they may be modified. If this happens
before TLB invalidation, page table walks may allocate junk into TLBs,
resulting in a number of problems.

It is *never* safe to free a live page table, therefore NAK to this
patch.

Thanks,
Mark.
Chintan Pandya March 14, 2018, 11:27 a.m. UTC | #3
On 3/14/2018 4:23 PM, Mark Rutland wrote:
> On Wed, Mar 14, 2018 at 02:18:24PM +0530, Chintan Pandya wrote:
>> While setting huge page, we need to take care of
>> previously existing next level mapping. Since,
>> we are going to overrite previous mapping, the
>> only reference to next level page table will get
>> lost and the next level page table will be zombie,
>> occupying space forever. So, free it before
>> overriding.
> 
>> @@ -939,6 +940,9 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
>>   		return 0;
>>   
>>   	BUG_ON(phys & ~PUD_MASK);
>> +	if (pud_val(*pud) && !pud_huge(*pud))
>> +		free_page((unsigned long)__va(pud_val(*pud)));
>> +
>>   	set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
>>   	return 1;
>>   }
>> @@ -953,6 +957,9 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
>>   		return 0;
>>   
>>   	BUG_ON(phys & ~PMD_MASK);
>> +	if (pmd_val(*pmd) && !pmd_huge(*pmd))
>> +		free_page((unsigned long)__va(pmd_val(*pmd)));
>> +
> 
> As Marc noted, (assuming the subsequent revert is applied) in both of
> these cases, these tables are still live, and thus it is not safe to
> free them.
> 
> Consider that immediately after freeing the pages, they may get
> re-allocated elsewhere, where they may be modified. If this happens
> before TLB invalidation, page table walks may allocate junk into TLBs,
> resulting in a number of problems.
Ah okay. What about this sequence,
1) I store old PMD/PUD values
2) Update the PMD/PUD with section mapping
3) Invalidate TLB
4) Then free the *leaked* page

> 
> It is *never* safe to free a live page table, therefore NAK to this
> patch.
> 
> Thanks,
> Mark.
> 

Chintan
Mark Rutland March 14, 2018, 11:50 a.m. UTC | #4
On Wed, Mar 14, 2018 at 04:57:29PM +0530, Chintan Pandya wrote:
> 
> 
> On 3/14/2018 4:23 PM, Mark Rutland wrote:
> > On Wed, Mar 14, 2018 at 02:18:24PM +0530, Chintan Pandya wrote:
> > > While setting huge page, we need to take care of
> > > previously existing next level mapping. Since,
> > > we are going to overrite previous mapping, the
> > > only reference to next level page table will get
> > > lost and the next level page table will be zombie,
> > > occupying space forever. So, free it before
> > > overriding.
> > 
> > > @@ -939,6 +940,9 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
> > >   		return 0;
> > >   	BUG_ON(phys & ~PUD_MASK);
> > > +	if (pud_val(*pud) && !pud_huge(*pud))
> > > +		free_page((unsigned long)__va(pud_val(*pud)));
> > > +
> > >   	set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
> > >   	return 1;
> > >   }
> > > @@ -953,6 +957,9 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
> > >   		return 0;
> > >   	BUG_ON(phys & ~PMD_MASK);
> > > +	if (pmd_val(*pmd) && !pmd_huge(*pmd))
> > > +		free_page((unsigned long)__va(pmd_val(*pmd)));
> > > +
> > 
> > As Marc noted, (assuming the subsequent revert is applied) in both of
> > these cases, these tables are still live, and thus it is not safe to
> > free them.
> > 
> > Consider that immediately after freeing the pages, they may get
> > re-allocated elsewhere, where they may be modified. If this happens
> > before TLB invalidation, page table walks may allocate junk into TLBs,
> > resulting in a number of problems.
> Ah okay. What about this sequence,
> 1) I store old PMD/PUD values
> 2) Update the PMD/PUD with section mapping
> 3) Invalidate TLB
> 4) Then free the *leaked* page

You must invalidate the TLB *before* setting the new entry:

1) store the old entry value
2) clear the entry
3) invalidate the TLB

... then you can either:

4) update the entry
5) free the old table

... or:

4) free the old table
5) update the entry

Thanks,
Mark.
kernel test robot March 16, 2018, 2:50 p.m. UTC | #5
Hi Chintan,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.16-rc5]
[cannot apply to next-20180316]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Chintan-Pandya/Fix-issues-with-huge-mapping-in-ioremap/20180316-132223
config: arm64-allmodconfig (attached as .config)
compiler: aarch64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=arm64 

All error/warnings (new ones prefixed by >>):

   In file included from arch/arm64/include/asm/page.h:27:0,
                    from include/linux/shm.h:6,
                    from include/linux/sched.h:16,
                    from include/linux/sched/task_stack.h:9,
                    from include/linux/elfcore.h:7,
                    from include/linux/crash_core.h:6,
                    from include/linux/kexec.h:18,
                    from arch/arm64/mm/mmu.c:26:
   arch/arm64/mm/mmu.c: In function 'pud_set_huge':
>> arch/arm64/mm/mmu.c:943:15: error: 'pud' undeclared (first use in this function); did you mean 'pudp'?
     if (pud_val(*pud) && !pud_huge(*pud))
                  ^
   arch/arm64/include/asm/pgtable-types.h:50:22: note: in definition of macro 'pgd_val'
    #define pgd_val(x) ((x).pgd)
                         ^
>> arch/arm64/mm/mmu.c:943:6: note: in expansion of macro 'pud_val'
     if (pud_val(*pud) && !pud_huge(*pud))
         ^~~~~~~
   arch/arm64/mm/mmu.c:943:15: note: each undeclared identifier is reported only once for each function it appears in
     if (pud_val(*pud) && !pud_huge(*pud))
                  ^
   arch/arm64/include/asm/pgtable-types.h:50:22: note: in definition of macro 'pgd_val'
    #define pgd_val(x) ((x).pgd)
                         ^
>> arch/arm64/mm/mmu.c:943:6: note: in expansion of macro 'pud_val'
     if (pud_val(*pud) && !pud_huge(*pud))
         ^~~~~~~
   arch/arm64/mm/mmu.c: In function 'pmd_set_huge':
>> arch/arm64/mm/mmu.c:960:15: error: 'pmd' undeclared (first use in this function); did you mean 'pmdp'?
     if (pmd_val(*pmd) && !pmd_huge(*pmd))
                  ^
   arch/arm64/include/asm/pgtable-types.h:39:22: note: in definition of macro 'pmd_val'
    #define pmd_val(x) ((x).pmd)
                         ^

vim +943 arch/arm64/mm/mmu.c

   932	
   933	int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
   934	{
   935		pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
   936						pgprot_val(mk_sect_prot(prot)));
   937	
   938		/* ioremap_page_range doesn't honour BBM */
   939		if (pud_present(READ_ONCE(*pudp)))
   940			return 0;
   941	
   942		BUG_ON(phys & ~PUD_MASK);
 > 943		if (pud_val(*pud) && !pud_huge(*pud))
   944			free_page((unsigned long)__va(pud_val(*pud)));
   945	
   946		set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
   947		return 1;
   948	}
   949	
   950	int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
   951	{
   952		pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
   953						pgprot_val(mk_sect_prot(prot)));
   954	
   955		/* ioremap_page_range doesn't honour BBM */
   956		if (pmd_present(READ_ONCE(*pmdp)))
   957			return 0;
   958	
   959		BUG_ON(phys & ~PMD_MASK);
 > 960		if (pmd_val(*pmd) && !pmd_huge(*pmd))
   961			free_page((unsigned long)__va(pmd_val(*pmd)));
   962	
   963		set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
   964		return 1;
   965	}
   966	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 8c704f1..c0df264 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -32,7 +32,7 @@ 
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
-
+#include <linux/hugetlb.h>
 #include <asm/barrier.h>
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
@@ -45,6 +45,7 @@ 
 #include <asm/memblock.h>
 #include <asm/mmu_context.h>
 #include <asm/ptdump.h>
+#include <asm/page.h>
 
 #define NO_BLOCK_MAPPINGS	BIT(0)
 #define NO_CONT_MAPPINGS	BIT(1)
@@ -939,6 +940,9 @@  int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
 		return 0;
 
 	BUG_ON(phys & ~PUD_MASK);
+	if (pud_val(*pud) && !pud_huge(*pud))
+		free_page((unsigned long)__va(pud_val(*pud)));
+
 	set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }
@@ -953,6 +957,9 @@  int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
 		return 0;
 
 	BUG_ON(phys & ~PMD_MASK);
+	if (pmd_val(*pmd) && !pmd_huge(*pmd))
+		free_page((unsigned long)__va(pmd_val(*pmd)));
+
 	set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
 	return 1;
 }