Message ID | 20240806022114.3320543-4-yuzhao@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/arm64: re-enable HVO | expand |
On Mon, Aug 5, 2024 at 8:21 PM Yu Zhao <yuzhao@google.com> wrote: > > Pause remote CPUs so that the local CPU can follow the proper BBM > sequence to safely update the vmemmap mapping `struct page` areas. > > While updating the vmemmap, it is guaranteed that neither the local > CPU nor the remote ones will access the `struct page` area being > updated, and therefore they will not trigger kernel PFs. > > Signed-off-by: Yu Zhao <yuzhao@google.com> > --- > arch/arm64/include/asm/pgalloc.h | 55 ++++++++++++++++++++++++++++++++ > mm/hugetlb_vmemmap.c | 14 ++++++++ > 2 files changed, 69 insertions(+) > > diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h > index 8ff5f2a2579e..1af1aa34a351 100644 > --- a/arch/arm64/include/asm/pgalloc.h > +++ b/arch/arm64/include/asm/pgalloc.h > @@ -12,6 +12,7 @@ > #include <asm/processor.h> > #include <asm/cacheflush.h> > #include <asm/tlbflush.h> > +#include <asm/cpu.h> > > #define __HAVE_ARCH_PGD_FREE > #define __HAVE_ARCH_PUD_FREE > @@ -137,4 +138,58 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) > __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); > } > > +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP > + > +#define vmemmap_update_lock vmemmap_update_lock > +static inline void vmemmap_update_lock(void) > +{ > + cpus_read_lock(); > +} > + > +#define vmemmap_update_unlock vmemmap_update_unlock > +static inline void vmemmap_update_unlock(void) > +{ > + cpus_read_unlock(); > +} > + > +#define vmemmap_update_pte vmemmap_update_pte > +static inline void vmemmap_update_pte(unsigned long addr, pte_t *ptep, pte_t pte) > +{ > + preempt_disable(); > + pause_remote_cpus(); > + > + pte_clear(&init_mm, addr, ptep); > + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); > + set_pte_at(&init_mm, addr, ptep, pte); > + > + resume_remote_cpus(); > + preempt_enable(); > +} Note that I kept this API from Nanyong for the sake of discussion. What I actually plan to test in our production is: #define vmemmap_update_pte_range_start vmemmap_update_pte_range_start static inline void vmemmap_update_pte_range_start(pte_t *pte, unsigned long start, unsigned long end) { preempt_disable(); pause_remote_cpus(); for (; start != end; start += PAGE_SIZE, pte++) pte_clear(&init_mm, start, pte); flush_tlb_kernel_range(start, end); } #define vmemmap_update_pte_range_end vmemmap_update_pte_range_end static inline void vmemmap_update_pte_range_end(void) { resume_remote_cpus(); preempt_enable(); } > +#define vmemmap_update_pmd vmemmap_update_pmd > +static inline void vmemmap_update_pmd(unsigned long addr, pmd_t *pmdp, pte_t *ptep) > +{ > + preempt_disable(); > + pause_remote_cpus(); > + > + pmd_clear(pmdp); > + flush_tlb_kernel_range(addr, addr + PMD_SIZE); > + pmd_populate_kernel(&init_mm, pmdp, ptep); > + > + resume_remote_cpus(); > + preempt_enable(); > +} > + > +#define vmemmap_flush_tlb_all vmemmap_flush_tlb_all > +static inline void vmemmap_flush_tlb_all(void) > +{ > +} > + > +#define vmemmap_flush_tlb_range vmemmap_flush_tlb_range > +static inline void vmemmap_flush_tlb_range(unsigned long start, unsigned long end) > +{ > +} > + > +#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ > + > #endif > diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c > index 2dd92e58f304..893c73493d9c 100644 > --- a/mm/hugetlb_vmemmap.c > +++ b/mm/hugetlb_vmemmap.c > @@ -46,6 +46,18 @@ struct vmemmap_remap_walk { > unsigned long flags; > }; > > +#ifndef vmemmap_update_lock > +static void vmemmap_update_lock(void) > +{ > +} > +#endif > + > +#ifndef vmemmap_update_unlock > +static void vmemmap_update_unlock(void) > +{ > +} > +#endif > + > #ifndef vmemmap_update_pmd > static inline void vmemmap_update_pmd(unsigned long addr, > pmd_t *pmdp, pte_t *ptep) > @@ -194,10 +206,12 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, > > VM_BUG_ON(!PAGE_ALIGNED(start | end)); > > + vmemmap_update_lock(); > mmap_read_lock(&init_mm); > ret = walk_page_range_novma(&init_mm, start, end, &vmemmap_remap_ops, > NULL, walk); > mmap_read_unlock(&init_mm); > + vmemmap_update_unlock(); > if (ret) > return ret; > > -- > 2.46.0.rc2.264.g509ed76dc8-goog >
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 8ff5f2a2579e..1af1aa34a351 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -12,6 +12,7 @@ #include <asm/processor.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> +#include <asm/cpu.h> #define __HAVE_ARCH_PGD_FREE #define __HAVE_ARCH_PUD_FREE @@ -137,4 +138,58 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); } +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP + +#define vmemmap_update_lock vmemmap_update_lock +static inline void vmemmap_update_lock(void) +{ + cpus_read_lock(); +} + +#define vmemmap_update_unlock vmemmap_update_unlock +static inline void vmemmap_update_unlock(void) +{ + cpus_read_unlock(); +} + +#define vmemmap_update_pte vmemmap_update_pte +static inline void vmemmap_update_pte(unsigned long addr, pte_t *ptep, pte_t pte) +{ + preempt_disable(); + pause_remote_cpus(); + + pte_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + set_pte_at(&init_mm, addr, ptep, pte); + + resume_remote_cpus(); + preempt_enable(); +} + +#define vmemmap_update_pmd vmemmap_update_pmd +static inline void vmemmap_update_pmd(unsigned long addr, pmd_t *pmdp, pte_t *ptep) +{ + preempt_disable(); + pause_remote_cpus(); + + pmd_clear(pmdp); + flush_tlb_kernel_range(addr, addr + PMD_SIZE); + pmd_populate_kernel(&init_mm, pmdp, ptep); + + resume_remote_cpus(); + preempt_enable(); +} + +#define vmemmap_flush_tlb_all vmemmap_flush_tlb_all +static inline void vmemmap_flush_tlb_all(void) +{ +} + +#define vmemmap_flush_tlb_range vmemmap_flush_tlb_range +static inline void vmemmap_flush_tlb_range(unsigned long start, unsigned long end) +{ +} + +#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ + #endif diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 2dd92e58f304..893c73493d9c 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -46,6 +46,18 @@ struct vmemmap_remap_walk { unsigned long flags; }; +#ifndef vmemmap_update_lock +static void vmemmap_update_lock(void) +{ +} +#endif + +#ifndef vmemmap_update_unlock +static void vmemmap_update_unlock(void) +{ +} +#endif + #ifndef vmemmap_update_pmd static inline void vmemmap_update_pmd(unsigned long addr, pmd_t *pmdp, pte_t *ptep) @@ -194,10 +206,12 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, VM_BUG_ON(!PAGE_ALIGNED(start | end)); + vmemmap_update_lock(); mmap_read_lock(&init_mm); ret = walk_page_range_novma(&init_mm, start, end, &vmemmap_remap_ops, NULL, walk); mmap_read_unlock(&init_mm); + vmemmap_update_unlock(); if (ret) return ret;
Pause remote CPUs so that the local CPU can follow the proper BBM sequence to safely update the vmemmap mapping `struct page` areas. While updating the vmemmap, it is guaranteed that neither the local CPU nor the remote ones will access the `struct page` area being updated, and therefore they will not trigger kernel PFs. Signed-off-by: Yu Zhao <yuzhao@google.com> --- arch/arm64/include/asm/pgalloc.h | 55 ++++++++++++++++++++++++++++++++ mm/hugetlb_vmemmap.c | 14 ++++++++ 2 files changed, 69 insertions(+)