Message ID | 1521017305-28518-4-git-send-email-cpandya@codeaurora.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 14/03/18 08:48, Chintan Pandya wrote: > While setting huge page, we need to take care of > previously existing next level mapping. Since, > we are going to overrite previous mapping, the > only reference to next level page table will get > lost and the next level page table will be zombie, > occupying space forever. So, free it before > overriding. > > Signed-off-by: Chintan Pandya <cpandya@codeaurora.org> > --- > arch/arm64/mm/mmu.c | 9 ++++++++- > 1 file changed, 8 insertions(+), 1 deletion(-) > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index 8c704f1..c0df264 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -32,7 +32,7 @@ > #include <linux/io.h> > #include <linux/mm.h> > #include <linux/vmalloc.h> > - > +#include <linux/hugetlb.h> > #include <asm/barrier.h> > #include <asm/cputype.h> > #include <asm/fixmap.h> > @@ -45,6 +45,7 @@ > #include <asm/memblock.h> > #include <asm/mmu_context.h> > #include <asm/ptdump.h> > +#include <asm/page.h> > > #define NO_BLOCK_MAPPINGS BIT(0) > #define NO_CONT_MAPPINGS BIT(1) > @@ -939,6 +940,9 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) > return 0; > > BUG_ON(phys & ~PUD_MASK); > + if (pud_val(*pud) && !pud_huge(*pud)) > + free_page((unsigned long)__va(pud_val(*pud))); > + This is absolutely scary. Isn't this page still referenced in the page tables (assuming patch 4 has been applied too)? > set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); > return 1; > } > @@ -953,6 +957,9 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) > return 0; > > BUG_ON(phys & ~PMD_MASK); > + if (pmd_val(*pmd) && !pmd_huge(*pmd)) > + free_page((unsigned long)__va(pmd_val(*pmd))); > + > set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); > return 1; > } > Thanks, M.
On Wed, Mar 14, 2018 at 02:18:24PM +0530, Chintan Pandya wrote: > While setting huge page, we need to take care of > previously existing next level mapping. Since, > we are going to overrite previous mapping, the > only reference to next level page table will get > lost and the next level page table will be zombie, > occupying space forever. So, free it before > overriding. > @@ -939,6 +940,9 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) > return 0; > > BUG_ON(phys & ~PUD_MASK); > + if (pud_val(*pud) && !pud_huge(*pud)) > + free_page((unsigned long)__va(pud_val(*pud))); > + > set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); > return 1; > } > @@ -953,6 +957,9 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) > return 0; > > BUG_ON(phys & ~PMD_MASK); > + if (pmd_val(*pmd) && !pmd_huge(*pmd)) > + free_page((unsigned long)__va(pmd_val(*pmd))); > + As Marc noted, (assuming the subsequent revert is applied) in both of these cases, these tables are still live, and thus it is not safe to free them. Consider that immediately after freeing the pages, they may get re-allocated elsewhere, where they may be modified. If this happens before TLB invalidation, page table walks may allocate junk into TLBs, resulting in a number of problems. It is *never* safe to free a live page table, therefore NAK to this patch. Thanks, Mark.
On 3/14/2018 4:23 PM, Mark Rutland wrote: > On Wed, Mar 14, 2018 at 02:18:24PM +0530, Chintan Pandya wrote: >> While setting huge page, we need to take care of >> previously existing next level mapping. Since, >> we are going to overrite previous mapping, the >> only reference to next level page table will get >> lost and the next level page table will be zombie, >> occupying space forever. So, free it before >> overriding. > >> @@ -939,6 +940,9 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) >> return 0; >> >> BUG_ON(phys & ~PUD_MASK); >> + if (pud_val(*pud) && !pud_huge(*pud)) >> + free_page((unsigned long)__va(pud_val(*pud))); >> + >> set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); >> return 1; >> } >> @@ -953,6 +957,9 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) >> return 0; >> >> BUG_ON(phys & ~PMD_MASK); >> + if (pmd_val(*pmd) && !pmd_huge(*pmd)) >> + free_page((unsigned long)__va(pmd_val(*pmd))); >> + > > As Marc noted, (assuming the subsequent revert is applied) in both of > these cases, these tables are still live, and thus it is not safe to > free them. > > Consider that immediately after freeing the pages, they may get > re-allocated elsewhere, where they may be modified. If this happens > before TLB invalidation, page table walks may allocate junk into TLBs, > resulting in a number of problems. Ah okay. What about this sequence, 1) I store old PMD/PUD values 2) Update the PMD/PUD with section mapping 3) Invalidate TLB 4) Then free the *leaked* page > > It is *never* safe to free a live page table, therefore NAK to this > patch. > > Thanks, > Mark. > Chintan
On Wed, Mar 14, 2018 at 04:57:29PM +0530, Chintan Pandya wrote: > > > On 3/14/2018 4:23 PM, Mark Rutland wrote: > > On Wed, Mar 14, 2018 at 02:18:24PM +0530, Chintan Pandya wrote: > > > While setting huge page, we need to take care of > > > previously existing next level mapping. Since, > > > we are going to overrite previous mapping, the > > > only reference to next level page table will get > > > lost and the next level page table will be zombie, > > > occupying space forever. So, free it before > > > overriding. > > > > > @@ -939,6 +940,9 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) > > > return 0; > > > BUG_ON(phys & ~PUD_MASK); > > > + if (pud_val(*pud) && !pud_huge(*pud)) > > > + free_page((unsigned long)__va(pud_val(*pud))); > > > + > > > set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); > > > return 1; > > > } > > > @@ -953,6 +957,9 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) > > > return 0; > > > BUG_ON(phys & ~PMD_MASK); > > > + if (pmd_val(*pmd) && !pmd_huge(*pmd)) > > > + free_page((unsigned long)__va(pmd_val(*pmd))); > > > + > > > > As Marc noted, (assuming the subsequent revert is applied) in both of > > these cases, these tables are still live, and thus it is not safe to > > free them. > > > > Consider that immediately after freeing the pages, they may get > > re-allocated elsewhere, where they may be modified. If this happens > > before TLB invalidation, page table walks may allocate junk into TLBs, > > resulting in a number of problems. > Ah okay. What about this sequence, > 1) I store old PMD/PUD values > 2) Update the PMD/PUD with section mapping > 3) Invalidate TLB > 4) Then free the *leaked* page You must invalidate the TLB *before* setting the new entry: 1) store the old entry value 2) clear the entry 3) invalidate the TLB ... then you can either: 4) update the entry 5) free the old table ... or: 4) free the old table 5) update the entry Thanks, Mark.
Hi Chintan, Thank you for the patch! Yet something to improve: [auto build test ERROR on linus/master] [also build test ERROR on v4.16-rc5] [cannot apply to next-20180316] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Chintan-Pandya/Fix-issues-with-huge-mapping-in-ioremap/20180316-132223 config: arm64-allmodconfig (attached as .config) compiler: aarch64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree make.cross ARCH=arm64 All error/warnings (new ones prefixed by >>): In file included from arch/arm64/include/asm/page.h:27:0, from include/linux/shm.h:6, from include/linux/sched.h:16, from include/linux/sched/task_stack.h:9, from include/linux/elfcore.h:7, from include/linux/crash_core.h:6, from include/linux/kexec.h:18, from arch/arm64/mm/mmu.c:26: arch/arm64/mm/mmu.c: In function 'pud_set_huge': >> arch/arm64/mm/mmu.c:943:15: error: 'pud' undeclared (first use in this function); did you mean 'pudp'? if (pud_val(*pud) && !pud_huge(*pud)) ^ arch/arm64/include/asm/pgtable-types.h:50:22: note: in definition of macro 'pgd_val' #define pgd_val(x) ((x).pgd) ^ >> arch/arm64/mm/mmu.c:943:6: note: in expansion of macro 'pud_val' if (pud_val(*pud) && !pud_huge(*pud)) ^~~~~~~ arch/arm64/mm/mmu.c:943:15: note: each undeclared identifier is reported only once for each function it appears in if (pud_val(*pud) && !pud_huge(*pud)) ^ arch/arm64/include/asm/pgtable-types.h:50:22: note: in definition of macro 'pgd_val' #define pgd_val(x) ((x).pgd) ^ >> arch/arm64/mm/mmu.c:943:6: note: in expansion of macro 'pud_val' if (pud_val(*pud) && !pud_huge(*pud)) ^~~~~~~ arch/arm64/mm/mmu.c: In function 'pmd_set_huge': >> arch/arm64/mm/mmu.c:960:15: error: 'pmd' undeclared (first use in this function); did you mean 'pmdp'? if (pmd_val(*pmd) && !pmd_huge(*pmd)) ^ arch/arm64/include/asm/pgtable-types.h:39:22: note: in definition of macro 'pmd_val' #define pmd_val(x) ((x).pmd) ^ vim +943 arch/arm64/mm/mmu.c 932 933 int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) 934 { 935 pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | 936 pgprot_val(mk_sect_prot(prot))); 937 938 /* ioremap_page_range doesn't honour BBM */ 939 if (pud_present(READ_ONCE(*pudp))) 940 return 0; 941 942 BUG_ON(phys & ~PUD_MASK); > 943 if (pud_val(*pud) && !pud_huge(*pud)) 944 free_page((unsigned long)__va(pud_val(*pud))); 945 946 set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); 947 return 1; 948 } 949 950 int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) 951 { 952 pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | 953 pgprot_val(mk_sect_prot(prot))); 954 955 /* ioremap_page_range doesn't honour BBM */ 956 if (pmd_present(READ_ONCE(*pmdp))) 957 return 0; 958 959 BUG_ON(phys & ~PMD_MASK); > 960 if (pmd_val(*pmd) && !pmd_huge(*pmd)) 961 free_page((unsigned long)__va(pmd_val(*pmd))); 962 963 set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); 964 return 1; 965 } 966 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8c704f1..c0df264 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -32,7 +32,7 @@ #include <linux/io.h> #include <linux/mm.h> #include <linux/vmalloc.h> - +#include <linux/hugetlb.h> #include <asm/barrier.h> #include <asm/cputype.h> #include <asm/fixmap.h> @@ -45,6 +45,7 @@ #include <asm/memblock.h> #include <asm/mmu_context.h> #include <asm/ptdump.h> +#include <asm/page.h> #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) @@ -939,6 +940,9 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) return 0; BUG_ON(phys & ~PUD_MASK); + if (pud_val(*pud) && !pud_huge(*pud)) + free_page((unsigned long)__va(pud_val(*pud))); + set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); return 1; } @@ -953,6 +957,9 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) return 0; BUG_ON(phys & ~PMD_MASK); + if (pmd_val(*pmd) && !pmd_huge(*pmd)) + free_page((unsigned long)__va(pmd_val(*pmd))); + set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); return 1; }
While setting huge page, we need to take care of previously existing next level mapping. Since, we are going to overrite previous mapping, the only reference to next level page table will get lost and the next level page table will be zombie, occupying space forever. So, free it before overriding. Signed-off-by: Chintan Pandya <cpandya@codeaurora.org> --- arch/arm64/mm/mmu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)