Message ID | 20250304222018.615808-4-yang@os.amperecomputing.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: support FEAT_BBM level 2 and large block mapping when rodata=full | expand |
On 04/03/2025 22:19, Yang Shi wrote: > The later patch will enhance __create_pgd_mapping() and related helpers > to split kernel linear mapping, it requires have return value. So make > __create_pgd_mapping() and helpers non-void functions. > > And move the BUG_ON() out of page table alloc helper since failing > splitting kernel linear mapping is not fatal and can be handled by the > callers in the later patch. Have BUG_ON() after > __create_pgd_mapping_locked() returns to keep the current callers behavior > intact. > > Suggested-by: Ryan Roberts <ryan.roberts@arm.com> > Signed-off-by: Yang Shi <yang@os.amperecomputing.com> > --- > arch/arm64/mm/mmu.c | 127 ++++++++++++++++++++++++++++++-------------- > 1 file changed, 86 insertions(+), 41 deletions(-) > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index b4df5bc5b1b8..dccf0877285b 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -189,11 +189,11 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, > } while (ptep++, addr += PAGE_SIZE, addr != end); > } > > -static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, > - unsigned long end, phys_addr_t phys, > - pgprot_t prot, > - phys_addr_t (*pgtable_alloc)(int), > - int flags) > +static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, > + unsigned long end, phys_addr_t phys, > + pgprot_t prot, > + phys_addr_t (*pgtable_alloc)(int), > + int flags) > { > unsigned long next; > pmd_t pmd = READ_ONCE(*pmdp); > @@ -208,6 +208,8 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, > pmdval |= PMD_TABLE_PXN; > BUG_ON(!pgtable_alloc); > pte_phys = pgtable_alloc(PAGE_SHIFT); > + if (!pte_phys) > + return -ENOMEM; nit: personally I'd prefer to see a "goto out" and funnel all to a single return statement. You do that in some functions (via loop break), but would be cleaner if consistent. If pgtable_alloc() is modified to return int (see my comment at the bottom), this becomes: ret = pgtable_alloc(PAGE_SHIFT, &pte_phys); if (ret) goto out; > ptep = pte_set_fixmap(pte_phys); > init_clear_pgtable(ptep); > ptep += pte_index(addr); > @@ -239,13 +241,16 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, > * walker. > */ > pte_clear_fixmap(); > + > + return 0; > } > > -static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, > - phys_addr_t phys, pgprot_t prot, > - phys_addr_t (*pgtable_alloc)(int), int flags) > +static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, > + phys_addr_t phys, pgprot_t prot, > + phys_addr_t (*pgtable_alloc)(int), int flags) > { > unsigned long next; > + int ret = 0; > > do { > pmd_t old_pmd = READ_ONCE(*pmdp); > @@ -264,22 +269,27 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, > BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), > READ_ONCE(pmd_val(*pmdp)))); > } else { > - alloc_init_cont_pte(pmdp, addr, next, phys, prot, > + ret = alloc_init_cont_pte(pmdp, addr, next, phys, prot, > pgtable_alloc, flags); > + if (ret) > + break; > > BUG_ON(pmd_val(old_pmd) != 0 && > pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp))); > } > phys += next - addr; > } while (pmdp++, addr = next, addr != end); > + > + return ret; > } > > -static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, > - unsigned long end, phys_addr_t phys, > - pgprot_t prot, > - phys_addr_t (*pgtable_alloc)(int), int flags) > +static int alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, > + unsigned long end, phys_addr_t phys, > + pgprot_t prot, > + phys_addr_t (*pgtable_alloc)(int), int flags) > { > unsigned long next; > + int ret = 0; > pud_t pud = READ_ONCE(*pudp); > pmd_t *pmdp; > > @@ -295,6 +305,8 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, > pudval |= PUD_TABLE_PXN; > BUG_ON(!pgtable_alloc); > pmd_phys = pgtable_alloc(PMD_SHIFT); > + if (!pmd_phys) > + return -ENOMEM; > pmdp = pmd_set_fixmap(pmd_phys); > init_clear_pgtable(pmdp); > pmdp += pmd_index(addr); > @@ -314,21 +326,26 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, > (flags & NO_CONT_MAPPINGS) == 0) > __prot = __pgprot(pgprot_val(prot) | PTE_CONT); > > - init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); > + ret = init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); > + if (ret) > + break; > > pmdp += pmd_index(next) - pmd_index(addr); > phys += next - addr; > } while (addr = next, addr != end); > > pmd_clear_fixmap(); > + > + return ret; > } > > -static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, > - phys_addr_t phys, pgprot_t prot, > - phys_addr_t (*pgtable_alloc)(int), > - int flags) > +static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, > + phys_addr_t phys, pgprot_t prot, > + phys_addr_t (*pgtable_alloc)(int), > + int flags) > { > unsigned long next; > + int ret = 0; > p4d_t p4d = READ_ONCE(*p4dp); > pud_t *pudp; > > @@ -340,6 +357,8 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, > p4dval |= P4D_TABLE_PXN; > BUG_ON(!pgtable_alloc); > pud_phys = pgtable_alloc(PUD_SHIFT); > + if (!pud_phys) > + return -ENOMEM; > pudp = pud_set_fixmap(pud_phys); > init_clear_pgtable(pudp); > pudp += pud_index(addr); > @@ -369,8 +388,10 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, > BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), > READ_ONCE(pud_val(*pudp)))); > } else { > - alloc_init_cont_pmd(pudp, addr, next, phys, prot, > + ret = alloc_init_cont_pmd(pudp, addr, next, phys, prot, > pgtable_alloc, flags); > + if (ret) > + break; > > BUG_ON(pud_val(old_pud) != 0 && > pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); > @@ -379,14 +400,17 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, > } while (pudp++, addr = next, addr != end); > > pud_clear_fixmap(); > + > + return ret; > } > > -static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, > - phys_addr_t phys, pgprot_t prot, > - phys_addr_t (*pgtable_alloc)(int), > - int flags) > +static int alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, > + phys_addr_t phys, pgprot_t prot, > + phys_addr_t (*pgtable_alloc)(int), > + int flags) > { > unsigned long next; > + int ret = 0; > pgd_t pgd = READ_ONCE(*pgdp); > p4d_t *p4dp; > > @@ -398,6 +422,8 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, > pgdval |= PGD_TABLE_PXN; > BUG_ON(!pgtable_alloc); > p4d_phys = pgtable_alloc(P4D_SHIFT); > + if (!p4d_phys) > + return -ENOMEM; > p4dp = p4d_set_fixmap(p4d_phys); > init_clear_pgtable(p4dp); > p4dp += p4d_index(addr); > @@ -412,8 +438,10 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, > > next = p4d_addr_end(addr, end); > > - alloc_init_pud(p4dp, addr, next, phys, prot, > + ret = alloc_init_pud(p4dp, addr, next, phys, prot, > pgtable_alloc, flags); > + if (ret) > + break; > > BUG_ON(p4d_val(old_p4d) != 0 && > p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp))); > @@ -422,23 +450,26 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, > } while (p4dp++, addr = next, addr != end); > > p4d_clear_fixmap(); > + > + return ret; > } > > -static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, > - unsigned long virt, phys_addr_t size, > - pgprot_t prot, > - phys_addr_t (*pgtable_alloc)(int), > - int flags) > +static int __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, > + unsigned long virt, phys_addr_t size, > + pgprot_t prot, > + phys_addr_t (*pgtable_alloc)(int), > + int flags) > { > unsigned long addr, end, next; > pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); > + int ret = 0; > > /* > * If the virtual and physical address don't have the same offset > * within a page, we cannot map the region as the caller expects. > */ > if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) > - return; > + return -EINVAL; > > phys &= PAGE_MASK; > addr = virt & PAGE_MASK; > @@ -446,29 +477,38 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, > > do { > next = pgd_addr_end(addr, end); > - alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc, > + ret = alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc, > flags); > + if (ret) > + break; > phys += next - addr; > } while (pgdp++, addr = next, addr != end); > + > + return ret; > } > > -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, > - unsigned long virt, phys_addr_t size, > - pgprot_t prot, > - phys_addr_t (*pgtable_alloc)(int), > - int flags) > +static int __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, > + unsigned long virt, phys_addr_t size, > + pgprot_t prot, > + phys_addr_t (*pgtable_alloc)(int), > + int flags) > { > + int ret; > + > mutex_lock(&fixmap_lock); > - __create_pgd_mapping_locked(pgdir, phys, virt, size, prot, > + ret = __create_pgd_mapping_locked(pgdir, phys, virt, size, prot, > pgtable_alloc, flags); > + BUG_ON(ret); This function now returns an error, but also BUGs on ret!=0. For this patch, I'd suggest keeping this function as void. But I believe there is a pre-existing bug in arch_add_memory(). That's called at runtime so if __create_pgd_mapping() fails and BUGs, it will take down a running system. With this foundational patch, we can fix that with an additional patch to pass along the error code instead of BUGing in that case. arch_add_memory() would need to unwind whatever __create_pgd_mapping() managed to do before the memory allocation failure (presumably unmapping and freeing any allocated tables). I'm happy to do this as a follow up patch. > mutex_unlock(&fixmap_lock); > + > + return ret; > } > > #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 > extern __alias(__create_pgd_mapping_locked) > -void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, > - phys_addr_t size, pgprot_t prot, > - phys_addr_t (*pgtable_alloc)(int), int flags); > +int create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, > + phys_addr_t size, pgprot_t prot, > + phys_addr_t (*pgtable_alloc)(int), int flags); create_kpti_ng_temp_pgd() now returns error instead of BUGing on allocation failure, but I don't see a change to handle that error. You'll want to update __kpti_install_ng_mappings() to BUG on error. > #endif > > static phys_addr_t __pgd_pgtable_alloc(int shift) > @@ -476,13 +516,17 @@ static phys_addr_t __pgd_pgtable_alloc(int shift) > /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */ > void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO); > > - BUG_ON(!ptr); > + if (!ptr) > + return 0; 0 is a valid (though unlikely) physical address. I guess you could technically encode like ERR_PTR(), but since you are returning phys_addr_t and not a pointer, then perhaps it will be clearer to make this return int and accept a pointer to a phys_addr_t, which it will populate on success? > + > return __pa(ptr); > } > > static phys_addr_t pgd_pgtable_alloc(int shift) > { > phys_addr_t pa = __pgd_pgtable_alloc(shift); > + if (!pa) > + goto out; This would obviously need to be fixed up as per above. > struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); > > /* > @@ -498,6 +542,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) > else if (shift == PMD_SHIFT) > BUG_ON(!pagetable_pmd_ctor(ptdesc)); > > +out: > return pa; > } > You have left early_pgtable_alloc() to panic() on allocation failure. Given we can now unwind the stack with error code, I think it would be more consistent to also allow early_pgtable_alloc() to return error. Thanks, Ryan
On 3/14/25 4:51 AM, Ryan Roberts wrote: > On 04/03/2025 22:19, Yang Shi wrote: >> The later patch will enhance __create_pgd_mapping() and related helpers >> to split kernel linear mapping, it requires have return value. So make >> __create_pgd_mapping() and helpers non-void functions. >> >> And move the BUG_ON() out of page table alloc helper since failing >> splitting kernel linear mapping is not fatal and can be handled by the >> callers in the later patch. Have BUG_ON() after >> __create_pgd_mapping_locked() returns to keep the current callers behavior >> intact. >> >> Suggested-by: Ryan Roberts<ryan.roberts@arm.com> >> Signed-off-by: Yang Shi<yang@os.amperecomputing.com> >> --- >> arch/arm64/mm/mmu.c | 127 ++++++++++++++++++++++++++++++-------------- >> 1 file changed, 86 insertions(+), 41 deletions(-) >> >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c >> index b4df5bc5b1b8..dccf0877285b 100644 >> --- a/arch/arm64/mm/mmu.c >> +++ b/arch/arm64/mm/mmu.c >> @@ -189,11 +189,11 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, >> } while (ptep++, addr += PAGE_SIZE, addr != end); >> } >> >> -static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, >> - unsigned long end, phys_addr_t phys, >> - pgprot_t prot, >> - phys_addr_t (*pgtable_alloc)(int), >> - int flags) >> +static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, >> + unsigned long end, phys_addr_t phys, >> + pgprot_t prot, >> + phys_addr_t (*pgtable_alloc)(int), >> + int flags) >> { >> unsigned long next; >> pmd_t pmd = READ_ONCE(*pmdp); >> @@ -208,6 +208,8 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, >> pmdval |= PMD_TABLE_PXN; >> BUG_ON(!pgtable_alloc); >> pte_phys = pgtable_alloc(PAGE_SHIFT); >> + if (!pte_phys) >> + return -ENOMEM; > nit: personally I'd prefer to see a "goto out" and funnel all to a single return > statement. You do that in some functions (via loop break), but would be cleaner > if consistent. > > If pgtable_alloc() is modified to return int (see my comment at the bottom), > this becomes: > > ret = pgtable_alloc(PAGE_SHIFT, &pte_phys); > if (ret) > goto out; OK >> ptep = pte_set_fixmap(pte_phys); >> init_clear_pgtable(ptep); >> ptep += pte_index(addr); >> @@ -239,13 +241,16 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, >> * walker. >> */ >> pte_clear_fixmap(); >> + >> + return 0; >> } >> >> -static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, >> - phys_addr_t phys, pgprot_t prot, >> - phys_addr_t (*pgtable_alloc)(int), int flags) >> +static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, >> + phys_addr_t phys, pgprot_t prot, >> + phys_addr_t (*pgtable_alloc)(int), int flags) >> { >> unsigned long next; >> + int ret = 0; >> >> do { >> pmd_t old_pmd = READ_ONCE(*pmdp); >> @@ -264,22 +269,27 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, >> BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), >> READ_ONCE(pmd_val(*pmdp)))); >> } else { >> - alloc_init_cont_pte(pmdp, addr, next, phys, prot, >> + ret = alloc_init_cont_pte(pmdp, addr, next, phys, prot, >> pgtable_alloc, flags); >> + if (ret) >> + break; >> >> BUG_ON(pmd_val(old_pmd) != 0 && >> pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp))); >> } >> phys += next - addr; >> } while (pmdp++, addr = next, addr != end); >> + >> + return ret; >> } >> >> -static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, >> - unsigned long end, phys_addr_t phys, >> - pgprot_t prot, >> - phys_addr_t (*pgtable_alloc)(int), int flags) >> +static int alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, >> + unsigned long end, phys_addr_t phys, >> + pgprot_t prot, >> + phys_addr_t (*pgtable_alloc)(int), int flags) >> { >> unsigned long next; >> + int ret = 0; >> pud_t pud = READ_ONCE(*pudp); >> pmd_t *pmdp; >> >> @@ -295,6 +305,8 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, >> pudval |= PUD_TABLE_PXN; >> BUG_ON(!pgtable_alloc); >> pmd_phys = pgtable_alloc(PMD_SHIFT); >> + if (!pmd_phys) >> + return -ENOMEM; >> pmdp = pmd_set_fixmap(pmd_phys); >> init_clear_pgtable(pmdp); >> pmdp += pmd_index(addr); >> @@ -314,21 +326,26 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, >> (flags & NO_CONT_MAPPINGS) == 0) >> __prot = __pgprot(pgprot_val(prot) | PTE_CONT); >> >> - init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); >> + ret = init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); >> + if (ret) >> + break; >> >> pmdp += pmd_index(next) - pmd_index(addr); >> phys += next - addr; >> } while (addr = next, addr != end); >> >> pmd_clear_fixmap(); >> + >> + return ret; >> } >> >> -static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, >> - phys_addr_t phys, pgprot_t prot, >> - phys_addr_t (*pgtable_alloc)(int), >> - int flags) >> +static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, >> + phys_addr_t phys, pgprot_t prot, >> + phys_addr_t (*pgtable_alloc)(int), >> + int flags) >> { >> unsigned long next; >> + int ret = 0; >> p4d_t p4d = READ_ONCE(*p4dp); >> pud_t *pudp; >> >> @@ -340,6 +357,8 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, >> p4dval |= P4D_TABLE_PXN; >> BUG_ON(!pgtable_alloc); >> pud_phys = pgtable_alloc(PUD_SHIFT); >> + if (!pud_phys) >> + return -ENOMEM; >> pudp = pud_set_fixmap(pud_phys); >> init_clear_pgtable(pudp); >> pudp += pud_index(addr); >> @@ -369,8 +388,10 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, >> BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), >> READ_ONCE(pud_val(*pudp)))); >> } else { >> - alloc_init_cont_pmd(pudp, addr, next, phys, prot, >> + ret = alloc_init_cont_pmd(pudp, addr, next, phys, prot, >> pgtable_alloc, flags); >> + if (ret) >> + break; >> >> BUG_ON(pud_val(old_pud) != 0 && >> pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); >> @@ -379,14 +400,17 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, >> } while (pudp++, addr = next, addr != end); >> >> pud_clear_fixmap(); >> + >> + return ret; >> } >> >> -static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, >> - phys_addr_t phys, pgprot_t prot, >> - phys_addr_t (*pgtable_alloc)(int), >> - int flags) >> +static int alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, >> + phys_addr_t phys, pgprot_t prot, >> + phys_addr_t (*pgtable_alloc)(int), >> + int flags) >> { >> unsigned long next; >> + int ret = 0; >> pgd_t pgd = READ_ONCE(*pgdp); >> p4d_t *p4dp; >> >> @@ -398,6 +422,8 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, >> pgdval |= PGD_TABLE_PXN; >> BUG_ON(!pgtable_alloc); >> p4d_phys = pgtable_alloc(P4D_SHIFT); >> + if (!p4d_phys) >> + return -ENOMEM; >> p4dp = p4d_set_fixmap(p4d_phys); >> init_clear_pgtable(p4dp); >> p4dp += p4d_index(addr); >> @@ -412,8 +438,10 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, >> >> next = p4d_addr_end(addr, end); >> >> - alloc_init_pud(p4dp, addr, next, phys, prot, >> + ret = alloc_init_pud(p4dp, addr, next, phys, prot, >> pgtable_alloc, flags); >> + if (ret) >> + break; >> >> BUG_ON(p4d_val(old_p4d) != 0 && >> p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp))); >> @@ -422,23 +450,26 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, >> } while (p4dp++, addr = next, addr != end); >> >> p4d_clear_fixmap(); >> + >> + return ret; >> } >> >> -static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, >> - unsigned long virt, phys_addr_t size, >> - pgprot_t prot, >> - phys_addr_t (*pgtable_alloc)(int), >> - int flags) >> +static int __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, >> + unsigned long virt, phys_addr_t size, >> + pgprot_t prot, >> + phys_addr_t (*pgtable_alloc)(int), >> + int flags) >> { >> unsigned long addr, end, next; >> pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); >> + int ret = 0; >> >> /* >> * If the virtual and physical address don't have the same offset >> * within a page, we cannot map the region as the caller expects. >> */ >> if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) >> - return; >> + return -EINVAL; >> >> phys &= PAGE_MASK; >> addr = virt & PAGE_MASK; >> @@ -446,29 +477,38 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, >> >> do { >> next = pgd_addr_end(addr, end); >> - alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc, >> + ret = alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc, >> flags); >> + if (ret) >> + break; >> phys += next - addr; >> } while (pgdp++, addr = next, addr != end); >> + >> + return ret; >> } >> >> -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, >> - unsigned long virt, phys_addr_t size, >> - pgprot_t prot, >> - phys_addr_t (*pgtable_alloc)(int), >> - int flags) >> +static int __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, >> + unsigned long virt, phys_addr_t size, >> + pgprot_t prot, >> + phys_addr_t (*pgtable_alloc)(int), >> + int flags) >> { >> + int ret; >> + >> mutex_lock(&fixmap_lock); >> - __create_pgd_mapping_locked(pgdir, phys, virt, size, prot, >> + ret = __create_pgd_mapping_locked(pgdir, phys, virt, size, prot, >> pgtable_alloc, flags); >> + BUG_ON(ret); > This function now returns an error, but also BUGs on ret!=0. For this patch, I'd > suggest keeping this function as void. You mean __create_pgd_mapping(), right? > But I believe there is a pre-existing bug in arch_add_memory(). That's called at > runtime so if __create_pgd_mapping() fails and BUGs, it will take down a running > system. Yes, it is the current behavior. > With this foundational patch, we can fix that with an additional patch to pass > along the error code instead of BUGing in that case. arch_add_memory() would > need to unwind whatever __create_pgd_mapping() managed to do before the memory > allocation failure (presumably unmapping and freeing any allocated tables). I'm > happy to do this as a follow up patch. Yes, the allocated page tables need to be freed. Thank you for taking it. >> mutex_unlock(&fixmap_lock); >> + >> + return ret; >> } >> >> #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 >> extern __alias(__create_pgd_mapping_locked) >> -void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, >> - phys_addr_t size, pgprot_t prot, >> - phys_addr_t (*pgtable_alloc)(int), int flags); >> +int create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, >> + phys_addr_t size, pgprot_t prot, >> + phys_addr_t (*pgtable_alloc)(int), int flags); > create_kpti_ng_temp_pgd() now returns error instead of BUGing on allocation > failure, but I don't see a change to handle that error. You'll want to update > __kpti_install_ng_mappings() to BUG on error. Yes, I missed that. It should BUG on error. >> #endif >> >> static phys_addr_t __pgd_pgtable_alloc(int shift) >> @@ -476,13 +516,17 @@ static phys_addr_t __pgd_pgtable_alloc(int shift) >> /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */ >> void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO); >> >> - BUG_ON(!ptr); >> + if (!ptr) >> + return 0; > 0 is a valid (though unlikely) physical address. I guess you could technically > encode like ERR_PTR(), but since you are returning phys_addr_t and not a > pointer, then perhaps it will be clearer to make this return int and accept a > pointer to a phys_addr_t, which it will populate on success? Actually I did something similar in the first place, but just returned the virt address. Then did something if it returns NULL. That made the code a little more messy since we need convert the virt address to phys address because __create_pgd_mapping() and the helpers require phys address, and changed the functions definition. But I noticed 0 should be not a valid phys address if I remember correctly. I also noticed early_pgtable_alloc() calls memblock_phys_alloc_range(), it returns 0 on failure. If 0 is valid phys address, then it should not do that, right? And I also noticed the memblock range 0 - memstart_addr is actually removed from memblock (see arm64_memblock_init()), so IIUC 0 should be not valid phys address. So the patch ended up being as is. If this assumption doesn't stand, I think your suggestion makes sense. >> + >> return __pa(ptr); >> } >> >> static phys_addr_t pgd_pgtable_alloc(int shift) >> { >> phys_addr_t pa = __pgd_pgtable_alloc(shift); >> + if (!pa) >> + goto out; > This would obviously need to be fixed up as per above. > >> struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); >> >> /* >> @@ -498,6 +542,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) >> else if (shift == PMD_SHIFT) >> BUG_ON(!pagetable_pmd_ctor(ptdesc)); >> >> +out: >> return pa; >> } >> > You have left early_pgtable_alloc() to panic() on allocation failure. Given we > can now unwind the stack with error code, I think it would be more consistent to > also allow early_pgtable_alloc() to return error. The early_pgtable_alloc() is just used for painting linear mapping at early boot stage, if it fails I don't think unwinding the stack is feasible and worth it. Did I miss something? Thanks, Yang > Thanks, > Ryan >
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b4df5bc5b1b8..dccf0877285b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -189,11 +189,11 @@ static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end, } while (ptep++, addr += PAGE_SIZE, addr != end); } -static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, - unsigned long end, phys_addr_t phys, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), - int flags) +static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) { unsigned long next; pmd_t pmd = READ_ONCE(*pmdp); @@ -208,6 +208,8 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(PAGE_SHIFT); + if (!pte_phys) + return -ENOMEM; ptep = pte_set_fixmap(pte_phys); init_clear_pgtable(ptep); ptep += pte_index(addr); @@ -239,13 +241,16 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, * walker. */ pte_clear_fixmap(); + + return 0; } -static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), int flags) +static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags) { unsigned long next; + int ret = 0; do { pmd_t old_pmd = READ_ONCE(*pmdp); @@ -264,22 +269,27 @@ static void init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd), READ_ONCE(pmd_val(*pmdp)))); } else { - alloc_init_cont_pte(pmdp, addr, next, phys, prot, + ret = alloc_init_cont_pte(pmdp, addr, next, phys, prot, pgtable_alloc, flags); + if (ret) + break; BUG_ON(pmd_val(old_pmd) != 0 && pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp))); } phys += next - addr; } while (pmdp++, addr = next, addr != end); + + return ret; } -static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, - unsigned long end, phys_addr_t phys, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), int flags) +static int alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags) { unsigned long next; + int ret = 0; pud_t pud = READ_ONCE(*pudp); pmd_t *pmdp; @@ -295,6 +305,8 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(PMD_SHIFT); + if (!pmd_phys) + return -ENOMEM; pmdp = pmd_set_fixmap(pmd_phys); init_clear_pgtable(pmdp); pmdp += pmd_index(addr); @@ -314,21 +326,26 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, (flags & NO_CONT_MAPPINGS) == 0) __prot = __pgprot(pgprot_val(prot) | PTE_CONT); - init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); + ret = init_pmd(pmdp, addr, next, phys, __prot, pgtable_alloc, flags); + if (ret) + break; pmdp += pmd_index(next) - pmd_index(addr); phys += next - addr; } while (addr = next, addr != end); pmd_clear_fixmap(); + + return ret; } -static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), - int flags) +static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) { unsigned long next; + int ret = 0; p4d_t p4d = READ_ONCE(*p4dp); pud_t *pudp; @@ -340,6 +357,8 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(PUD_SHIFT); + if (!pud_phys) + return -ENOMEM; pudp = pud_set_fixmap(pud_phys); init_clear_pgtable(pudp); pudp += pud_index(addr); @@ -369,8 +388,10 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, BUG_ON(!pgattr_change_is_safe(pud_val(old_pud), READ_ONCE(pud_val(*pudp)))); } else { - alloc_init_cont_pmd(pudp, addr, next, phys, prot, + ret = alloc_init_cont_pmd(pudp, addr, next, phys, prot, pgtable_alloc, flags); + if (ret) + break; BUG_ON(pud_val(old_pud) != 0 && pud_val(old_pud) != READ_ONCE(pud_val(*pudp))); @@ -379,14 +400,17 @@ static void alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end, } while (pudp++, addr = next, addr != end); pud_clear_fixmap(); + + return ret; } -static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), - int flags) +static int alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) { unsigned long next; + int ret = 0; pgd_t pgd = READ_ONCE(*pgdp); p4d_t *p4dp; @@ -398,6 +422,8 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, pgdval |= PGD_TABLE_PXN; BUG_ON(!pgtable_alloc); p4d_phys = pgtable_alloc(P4D_SHIFT); + if (!p4d_phys) + return -ENOMEM; p4dp = p4d_set_fixmap(p4d_phys); init_clear_pgtable(p4dp); p4dp += p4d_index(addr); @@ -412,8 +438,10 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, next = p4d_addr_end(addr, end); - alloc_init_pud(p4dp, addr, next, phys, prot, + ret = alloc_init_pud(p4dp, addr, next, phys, prot, pgtable_alloc, flags); + if (ret) + break; BUG_ON(p4d_val(old_p4d) != 0 && p4d_val(old_p4d) != READ_ONCE(p4d_val(*p4dp))); @@ -422,23 +450,26 @@ static void alloc_init_p4d(pgd_t *pgdp, unsigned long addr, unsigned long end, } while (p4dp++, addr = next, addr != end); p4d_clear_fixmap(); + + return ret; } -static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), - int flags) +static int __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) { unsigned long addr, end, next; pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); + int ret = 0; /* * If the virtual and physical address don't have the same offset * within a page, we cannot map the region as the caller expects. */ if (WARN_ON((phys ^ virt) & ~PAGE_MASK)) - return; + return -EINVAL; phys &= PAGE_MASK; addr = virt & PAGE_MASK; @@ -446,29 +477,38 @@ static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys, do { next = pgd_addr_end(addr, end); - alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc, + ret = alloc_init_p4d(pgdp, addr, next, phys, prot, pgtable_alloc, flags); + if (ret) + break; phys += next - addr; } while (pgdp++, addr = next, addr != end); + + return ret; } -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), - int flags) +static int __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), + int flags) { + int ret; + mutex_lock(&fixmap_lock); - __create_pgd_mapping_locked(pgdir, phys, virt, size, prot, + ret = __create_pgd_mapping_locked(pgdir, phys, virt, size, prot, pgtable_alloc, flags); + BUG_ON(ret); mutex_unlock(&fixmap_lock); + + return ret; } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 extern __alias(__create_pgd_mapping_locked) -void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), int flags); +int create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags); #endif static phys_addr_t __pgd_pgtable_alloc(int shift) @@ -476,13 +516,17 @@ static phys_addr_t __pgd_pgtable_alloc(int shift) /* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */ void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL & ~__GFP_ZERO); - BUG_ON(!ptr); + if (!ptr) + return 0; + return __pa(ptr); } static phys_addr_t pgd_pgtable_alloc(int shift) { phys_addr_t pa = __pgd_pgtable_alloc(shift); + if (!pa) + goto out; struct ptdesc *ptdesc = page_ptdesc(phys_to_page(pa)); /* @@ -498,6 +542,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) else if (shift == PMD_SHIFT) BUG_ON(!pagetable_pmd_ctor(ptdesc)); +out: return pa; }
The later patch will enhance __create_pgd_mapping() and related helpers to split kernel linear mapping, it requires have return value. So make __create_pgd_mapping() and helpers non-void functions. And move the BUG_ON() out of page table alloc helper since failing splitting kernel linear mapping is not fatal and can be handled by the callers in the later patch. Have BUG_ON() after __create_pgd_mapping_locked() returns to keep the current callers behavior intact. Suggested-by: Ryan Roberts <ryan.roberts@arm.com> Signed-off-by: Yang Shi <yang@os.amperecomputing.com> --- arch/arm64/mm/mmu.c | 127 ++++++++++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 41 deletions(-)