Message ID | 5f387ef7b800d5626f20e413090b30ee66d1c8d5.1552142593.git.gary@garyguo.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | TLB/I$ flush cleanups and improvements | expand |
On 3/9/19 6:52 AM, Gary Guo wrote: > From: Gary Guo <gary@garyguo.net> > > This patch rewrites the logic related to TLB flushing, both to cleanup > the code and to improve performance. > > We now use sfence.vma variant with specified ASID and virtual address > whenever possible. Even though only ASID 0 is used, it still improves > performance by preventing global mappings from being flushed from TLB. > > This patch also includes a IPI-based remote TLB shootdown, which is > useful at this stage for testing because BBL/OpenSBI ignores operands > of sbi_remote_sfence_vma_asid and always perform a global TLB flush. > The SBI-based remote TLB shootdown can still be opt-in using boot > cmdline "tlbi_method=sbi". > Thank you for bringing this up. I have fixed it in OpenSBI. https://github.com/riscv/opensbi/pull/89 I have also tested your patch series in HiFive Unleashed as well. FWIW: Tested-by : Atish Patra <atish.patra@wdc.com> > Signed-off-by: Gary Guo <gary@garyguo.net> > --- > arch/riscv/include/asm/pgtable.h | 2 +- > arch/riscv/include/asm/tlbflush.h | 76 +++++------ > arch/riscv/mm/Makefile | 1 + > arch/riscv/mm/context.c | 8 +- > arch/riscv/mm/init.c | 2 +- > arch/riscv/mm/tlbflush.c | 216 ++++++++++++++++++++++++++++++ > 6 files changed, 257 insertions(+), 48 deletions(-) > create mode 100644 arch/riscv/mm/tlbflush.c > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index 1141364d990e..19d1aeb059da 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -279,7 +279,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, > * Relying on flush_tlb_fix_spurious_fault would suffice, but > * the extra traps reduce performance. So, eagerly SFENCE.VMA. > */ > - local_flush_tlb_page(address); > + local_flush_tlb_page(vma, address); > } > > #define __HAVE_ARCH_PTE_SAME > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h > index 54fee0cadb1e..8c3086c6084c 100644 > --- a/arch/riscv/include/asm/tlbflush.h > +++ b/arch/riscv/include/asm/tlbflush.h > @@ -1,22 +1,14 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > /* > * Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com> > * Copyright (C) 2012 Regents of the University of California > - * > - * This program is free software; you can redistribute it and/or > - * modify it under the terms of the GNU General Public License > - * as published by the Free Software Foundation, version 2. > - * > - * This program is distributed in the hope that it will be useful, > - * but WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > - * GNU General Public License for more details. > + * Copyright (C) 2019 Gary Guo, University of Cambridge > */ > > #ifndef _ASM_RISCV_TLBFLUSH_H > #define _ASM_RISCV_TLBFLUSH_H > > #include <linux/mm_types.h> > -#include <asm/smp.h> > > /* > * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction > @@ -27,53 +19,47 @@ static inline void local_flush_tlb_all(void) > __asm__ __volatile__ ("sfence.vma" : : : "memory"); > } > > -/* Flush one page from local TLB */ > -static inline void local_flush_tlb_page(unsigned long addr) > +static inline void local_flush_tlb_mm(struct mm_struct *mm) > { > - __asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"); > + /* Flush ASID 0 so that global mappings are not affected */ > + __asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (0) : "memory"); > } > > -#ifndef CONFIG_SMP > - > -#define flush_tlb_all() local_flush_tlb_all() > -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) > - > -static inline void flush_tlb_range(struct vm_area_struct *vma, > - unsigned long start, unsigned long end) > +static inline void local_flush_tlb_page(struct vm_area_struct *vma, > + unsigned long addr) > { > - local_flush_tlb_all(); > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : : "r" (addr), "r" (0) > + : "memory"); > } > > -#define flush_tlb_mm(mm) flush_tlb_all() > +static inline void local_flush_tlb_kernel_page(unsigned long addr) > +{ > + __asm__ __volatile ("sfence.vma %0" : : "r" (addr) : "memory"); > +} __volatile__ > > -#else /* CONFIG_SMP */ > +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end); > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); > > -#include <asm/sbi.h> > +#ifdef CONFIG_SMP > > -static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, > - unsigned long size) > -{ > - struct cpumask hmask; > +void flush_tlb_all(void); > +void flush_tlb_mm(struct mm_struct *mm); > +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); > +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end); > +void flush_tlb_kernel_range(unsigned long start, unsigned long end); > > - cpumask_clear(&hmask); > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > - sbi_remote_sfence_vma(hmask.bits, start, size); > -} > +#else /* CONFIG_SMP */ > > -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) > -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) > -#define flush_tlb_range(vma, start, end) \ > - remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) > -#define flush_tlb_mm(mm) \ > - remote_sfence_vma(mm_cpumask(mm), 0, -1) > +#define flush_tlb_all() local_flush_tlb_all() > +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) > +#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) > +#define flush_tlb_range(vma, start, end) local_flush_tlb_range(vma, start, end) > +#define flush_tlb_kernel_range(start, end) \ > + local_flush_tlb_kernel_range(start, end) > > #endif /* CONFIG_SMP */ > > -/* Flush a range of kernel pages */ > -static inline void flush_tlb_kernel_range(unsigned long start, > - unsigned long end) > -{ > - flush_tlb_all(); > -} > - > #endif /* _ASM_RISCV_TLBFLUSH_H */ > diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile > index d75b035786d6..53b68fd3cb45 100644 > --- a/arch/riscv/mm/Makefile > +++ b/arch/riscv/mm/Makefile > @@ -4,3 +4,4 @@ obj-y += extable.o > obj-y += ioremap.o > obj-y += cacheflush.o > obj-y += context.o > +obj-y += tlbflush.o > diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c > index fbb1cfe80267..0f787bcd3a7a 100644 > --- a/arch/riscv/mm/context.c > +++ b/arch/riscv/mm/context.c > @@ -64,7 +64,13 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, > * privileged ISA 1.10 yet. > */ > csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE); > - local_flush_tlb_all(); > + > + /* > + * sfence.vma after SATP write. We call it on MM context instead of > + * calling local_flush_tlb_all to prevent global mappings from being > + * affected. > + */ > + local_flush_tlb_mm(next); > > flush_icache_deferred(next); > } > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index b379a75ac6a6..858f55e8b219 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -168,7 +168,7 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) > set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot)); > } else { > pte_clear(&init_mm, addr, ptep); > - local_flush_tlb_page(addr); > + local_flush_tlb_kernel_page(addr); > } > } > > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c > new file mode 100644 > index 000000000000..9099dc037cbd > --- /dev/null > +++ b/arch/riscv/mm/tlbflush.c > @@ -0,0 +1,216 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2019 Gary Guo, University of Cambridge > + */ > + > +#include <linux/mm.h> > +#include <asm/sbi.h> > + > +#define SFENCE_VMA_FLUSH_ALL ((unsigned long) -1) > + > +/* > + * This controls the maximum amount of page-level sfence.vma that the kernel > + * can issue when the kernel needs to flush a range from the TLB. If the size > + * of range goes beyond this threshold, a full sfence.vma is issued. > + * > + * Increase this number can negatively impact performance on implementations > + * where sfence.vma's address operand is ignored and always perform a global > + * TLB flush. On the other hand, implementations with page-level TLB flush > + * support can benefit from a larger number. > + */ > +static unsigned long tlbi_range_threshold = PAGE_SIZE; > + > +static int __init setup_tlbi_max_ops(char *str) > +{ > + int value = 0; > + > + get_option(&str, &value); > + > + /* > + * This value cannot be greater or equal to PTRS_PER_PTE, as we need > + * to full flush for any non-leaf page table change. The value has also > + * be at least 1. > + */ > + if (value >= PTRS_PER_PTE || value < 1) > + return -EINVAL; > + > + tlbi_range_threshold = value * PAGE_SIZE; > + return 0; > +} > +early_param("tlbi_max_ops", setup_tlbi_max_ops); > + Please document it in admin-guide/kernel-parameters.txt as well. > +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end) > +{ > + if (end - start > tlbi_range_threshold) { > + local_flush_tlb_mm(vma->vm_mm); > + return; > + } > + > + while (start < end) { > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : : "r" (start), "r" (0) > + : "memory"); > + start += PAGE_SIZE; > + } > +} > + > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) > +{ > + if (end - start > tlbi_range_threshold) { > + local_flush_tlb_all(); > + return; > + } > + > + while (start < end) { > + __asm__ __volatile__ ("sfence.vma %0" > + : : "r" (start) > + : "memory"); > + start += PAGE_SIZE; > + } > +} > + > +#ifdef CONFIG_SMP > + > +/* > + * BBL/OpenSBI are currently ignoring ASID and address range provided > + * by SBI call argument, and do a full TLB flush instead. This may > + * negatively impact performance on implementations with page-level > + * sfence.vma support. > + * > + * We provide an IPI-based remote shootdown implementation to improve > + * performance on implementations with page-level sfence.vma, and also to > + * allow testing of these implementations. > + * > + * This parameter allows the approach (IPI/SBI) to be specified using boot > + * cmdline. > + */ > +static bool tlbi_ipi = true; > + Since OpenSBI support has been added and IPI goes through SBI anyways, I think keeping sbi option by default makes more sense to me. > +static int __init setup_tlbi_method(char *str) > +{ > + if (strcmp(str, "ipi") == 0) > + tlbi_ipi = true; > + else if (strcmp(str, "sbi") == 0) > + tlbi_ipi = false; > + else > + return -EINVAL; > + > + return 0; > +} > +early_param("tlbi_method", setup_tlbi_method); Please document it in admin-guide/kernel-parameters.txt as well. > + > + > +struct tlbi { > + unsigned long start; > + unsigned long size; > + unsigned long asid; > +}; > + > +static void ipi_remote_sfence_vma(void *info) > +{ > + struct tlbi *data = info; > + unsigned long start = data->start; > + unsigned long size = data->size; > + unsigned long i; > + > + for (i = 0; i < size; i += PAGE_SIZE) { > + __asm__ __volatile__ ("sfence.vma %0" > + : : "r" (start + i) > + : "memory"); > + } > +} > + > +static void ipi_remote_sfence_vma_asid(void *info) > +{ > + struct tlbi *data = info; > + unsigned long asid = data->asid; > + unsigned long start = data->start; > + unsigned long size = data->size; > + unsigned long i; > + > + /* Flush entire MM context */ > + if (size == SFENCE_VMA_FLUSH_ALL) { > + __asm__ __volatile__ ("sfence.vma x0, %0" > + : : "r" (asid) > + : "memory"); > + return; > + } > + > + for (i = 0; i < size; i += PAGE_SIZE) { > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : : "r" (start + i), "r" (asid) > + : "memory"); > + } > +} > + > +static void remote_sfence_vma(unsigned long start, unsigned long size) > +{ > + if (tlbi_ipi) { > + struct tlbi info = { > + .start = start, > + .size = size, > + }; > + on_each_cpu(ipi_remote_sfence_vma, &info, 1); > + } else > + sbi_remote_sfence_vma(NULL, start, size); > +} > + > +static void remote_sfence_vma_asid(cpumask_t *mask, unsigned long start, > + unsigned long size, unsigned long asid) I think the next line should be aligned with function beginning. > +{ > + if (tlbi_ipi) { > + struct tlbi info = { > + .start = start, > + .size = size, > + .asid = asid, > + }; > + on_each_cpu_mask(mask, ipi_remote_sfence_vma_asid, &info, 1); > + } else { > + cpumask_t hmask; > + > + cpumask_clear(&hmask); > + riscv_cpuid_to_hartid_mask(mask, &hmask); > + sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid); > + } > +} > + > + > +void flush_tlb_all(void) > +{ > + sbi_remote_sfence_vma(NULL, 0, SFENCE_VMA_FLUSH_ALL); > +} > + > +void flush_tlb_mm(struct mm_struct *mm) > +{ > + remote_sfence_vma_asid(mm_cpumask(mm), 0, SFENCE_VMA_FLUSH_ALL, 0); > +} > + > +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > +{ > + remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, 0); > +} > + > + > +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end) same as previous about the next line alignment. > +{ > + if (end - start > tlbi_range_threshold) { > + flush_tlb_mm(vma->vm_mm); > + return; > + } > + > + remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), start, end - start, 0); > +} > + > +void flush_tlb_kernel_range(unsigned long start, unsigned long end) > +{ > + if (end - start > tlbi_range_threshold) { > + flush_tlb_all(); > + return; > + } > + > + remote_sfence_vma(start, end - start); > +} > + > +#endif /* CONFIG_SMP */ >
On Sat, Mar 09, 2019 at 02:52:04PM +0000, Gary Guo wrote: > This patch also includes a IPI-based remote TLB shootdown, which is > useful at this stage for testing because BBL/OpenSBI ignores operands > of sbi_remote_sfence_vma_asid and always perform a global TLB flush. > The SBI-based remote TLB shootdown can still be opt-in using boot > cmdline "tlbi_method=sbi". I think that part should be split into a separate patch, as it is a logically separate change.
On Sat, 09 Mar 2019 06:52:04 PST (-0800), gary@garyguo.net wrote: > From: Gary Guo <gary@garyguo.net> > > This patch rewrites the logic related to TLB flushing, both to cleanup > the code and to improve performance. > > We now use sfence.vma variant with specified ASID and virtual address > whenever possible. Even though only ASID 0 is used, it still improves > performance by preventing global mappings from being flushed from TLB. > > This patch also includes a IPI-based remote TLB shootdown, which is > useful at this stage for testing because BBL/OpenSBI ignores operands > of sbi_remote_sfence_vma_asid and always perform a global TLB flush. > The SBI-based remote TLB shootdown can still be opt-in using boot > cmdline "tlbi_method=sbi". Thanks for doing so much work here, but I don't think this actually the right way to go. I'd rather fix the firmware than put all this code in the kernel, particularly as we're putting this in the firmware under an assumption that there will be implementation-specific mechanism in the future. > Signed-off-by: Gary Guo <gary@garyguo.net> > --- > arch/riscv/include/asm/pgtable.h | 2 +- > arch/riscv/include/asm/tlbflush.h | 76 +++++------ > arch/riscv/mm/Makefile | 1 + > arch/riscv/mm/context.c | 8 +- > arch/riscv/mm/init.c | 2 +- > arch/riscv/mm/tlbflush.c | 216 ++++++++++++++++++++++++++++++ > 6 files changed, 257 insertions(+), 48 deletions(-) > create mode 100644 arch/riscv/mm/tlbflush.c > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index 1141364d990e..19d1aeb059da 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -279,7 +279,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, > * Relying on flush_tlb_fix_spurious_fault would suffice, but > * the extra traps reduce performance. So, eagerly SFENCE.VMA. > */ > - local_flush_tlb_page(address); > + local_flush_tlb_page(vma, address); > } > > #define __HAVE_ARCH_PTE_SAME > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h > index 54fee0cadb1e..8c3086c6084c 100644 > --- a/arch/riscv/include/asm/tlbflush.h > +++ b/arch/riscv/include/asm/tlbflush.h > @@ -1,22 +1,14 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > /* > * Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com> > * Copyright (C) 2012 Regents of the University of California > - * > - * This program is free software; you can redistribute it and/or > - * modify it under the terms of the GNU General Public License > - * as published by the Free Software Foundation, version 2. > - * > - * This program is distributed in the hope that it will be useful, > - * but WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > - * GNU General Public License for more details. > + * Copyright (C) 2019 Gary Guo, University of Cambridge > */ > > #ifndef _ASM_RISCV_TLBFLUSH_H > #define _ASM_RISCV_TLBFLUSH_H > > #include <linux/mm_types.h> > -#include <asm/smp.h> > > /* > * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction > @@ -27,53 +19,47 @@ static inline void local_flush_tlb_all(void) > __asm__ __volatile__ ("sfence.vma" : : : "memory"); > } > > -/* Flush one page from local TLB */ > -static inline void local_flush_tlb_page(unsigned long addr) > +static inline void local_flush_tlb_mm(struct mm_struct *mm) > { > - __asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"); > + /* Flush ASID 0 so that global mappings are not affected */ > + __asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (0) : "memory"); > } > > -#ifndef CONFIG_SMP > - > -#define flush_tlb_all() local_flush_tlb_all() > -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) > - > -static inline void flush_tlb_range(struct vm_area_struct *vma, > - unsigned long start, unsigned long end) > +static inline void local_flush_tlb_page(struct vm_area_struct *vma, > + unsigned long addr) > { > - local_flush_tlb_all(); > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : : "r" (addr), "r" (0) > + : "memory"); > } > > -#define flush_tlb_mm(mm) flush_tlb_all() > +static inline void local_flush_tlb_kernel_page(unsigned long addr) > +{ > + __asm__ __volatile ("sfence.vma %0" : : "r" (addr) : "memory"); > +} > > -#else /* CONFIG_SMP */ > +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end); > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); > > -#include <asm/sbi.h> > +#ifdef CONFIG_SMP > > -static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, > - unsigned long size) > -{ > - struct cpumask hmask; > +void flush_tlb_all(void); > +void flush_tlb_mm(struct mm_struct *mm); > +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); > +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end); > +void flush_tlb_kernel_range(unsigned long start, unsigned long end); > > - cpumask_clear(&hmask); > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > - sbi_remote_sfence_vma(hmask.bits, start, size); > -} > +#else /* CONFIG_SMP */ > > -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) > -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) > -#define flush_tlb_range(vma, start, end) \ > - remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) > -#define flush_tlb_mm(mm) \ > - remote_sfence_vma(mm_cpumask(mm), 0, -1) > +#define flush_tlb_all() local_flush_tlb_all() > +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) > +#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) > +#define flush_tlb_range(vma, start, end) local_flush_tlb_range(vma, start, end) > +#define flush_tlb_kernel_range(start, end) \ > + local_flush_tlb_kernel_range(start, end) > > #endif /* CONFIG_SMP */ > > -/* Flush a range of kernel pages */ > -static inline void flush_tlb_kernel_range(unsigned long start, > - unsigned long end) > -{ > - flush_tlb_all(); > -} > - > #endif /* _ASM_RISCV_TLBFLUSH_H */ > diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile > index d75b035786d6..53b68fd3cb45 100644 > --- a/arch/riscv/mm/Makefile > +++ b/arch/riscv/mm/Makefile > @@ -4,3 +4,4 @@ obj-y += extable.o > obj-y += ioremap.o > obj-y += cacheflush.o > obj-y += context.o > +obj-y += tlbflush.o > diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c > index fbb1cfe80267..0f787bcd3a7a 100644 > --- a/arch/riscv/mm/context.c > +++ b/arch/riscv/mm/context.c > @@ -64,7 +64,13 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, > * privileged ISA 1.10 yet. > */ > csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE); > - local_flush_tlb_all(); > + > + /* > + * sfence.vma after SATP write. We call it on MM context instead of > + * calling local_flush_tlb_all to prevent global mappings from being > + * affected. > + */ > + local_flush_tlb_mm(next); > > flush_icache_deferred(next); > } > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index b379a75ac6a6..858f55e8b219 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -168,7 +168,7 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) > set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot)); > } else { > pte_clear(&init_mm, addr, ptep); > - local_flush_tlb_page(addr); > + local_flush_tlb_kernel_page(addr); > } > } > > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c > new file mode 100644 > index 000000000000..9099dc037cbd > --- /dev/null > +++ b/arch/riscv/mm/tlbflush.c > @@ -0,0 +1,216 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2019 Gary Guo, University of Cambridge > + */ > + > +#include <linux/mm.h> > +#include <asm/sbi.h> > + > +#define SFENCE_VMA_FLUSH_ALL ((unsigned long) -1) > + > +/* > + * This controls the maximum amount of page-level sfence.vma that the kernel > + * can issue when the kernel needs to flush a range from the TLB. If the size > + * of range goes beyond this threshold, a full sfence.vma is issued. > + * > + * Increase this number can negatively impact performance on implementations > + * where sfence.vma's address operand is ignored and always perform a global > + * TLB flush. On the other hand, implementations with page-level TLB flush > + * support can benefit from a larger number. > + */ > +static unsigned long tlbi_range_threshold = PAGE_SIZE; > + > +static int __init setup_tlbi_max_ops(char *str) > +{ > + int value = 0; > + > + get_option(&str, &value); > + > + /* > + * This value cannot be greater or equal to PTRS_PER_PTE, as we need > + * to full flush for any non-leaf page table change. The value has also > + * be at least 1. > + */ > + if (value >= PTRS_PER_PTE || value < 1) > + return -EINVAL; > + > + tlbi_range_threshold = value * PAGE_SIZE; > + return 0; > +} > +early_param("tlbi_max_ops", setup_tlbi_max_ops); > + > +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end) > +{ > + if (end - start > tlbi_range_threshold) { > + local_flush_tlb_mm(vma->vm_mm); > + return; > + } > + > + while (start < end) { > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : : "r" (start), "r" (0) > + : "memory"); > + start += PAGE_SIZE; > + } > +} > + > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) > +{ > + if (end - start > tlbi_range_threshold) { > + local_flush_tlb_all(); > + return; > + } > + > + while (start < end) { > + __asm__ __volatile__ ("sfence.vma %0" > + : : "r" (start) > + : "memory"); > + start += PAGE_SIZE; > + } > +} > + > +#ifdef CONFIG_SMP > + > +/* > + * BBL/OpenSBI are currently ignoring ASID and address range provided > + * by SBI call argument, and do a full TLB flush instead. This may > + * negatively impact performance on implementations with page-level > + * sfence.vma support. > + * > + * We provide an IPI-based remote shootdown implementation to improve > + * performance on implementations with page-level sfence.vma, and also to > + * allow testing of these implementations. > + * > + * This parameter allows the approach (IPI/SBI) to be specified using boot > + * cmdline. > + */ > +static bool tlbi_ipi = true; > + > +static int __init setup_tlbi_method(char *str) > +{ > + if (strcmp(str, "ipi") == 0) > + tlbi_ipi = true; > + else if (strcmp(str, "sbi") == 0) > + tlbi_ipi = false; > + else > + return -EINVAL; > + > + return 0; > +} > +early_param("tlbi_method", setup_tlbi_method); > + > + > +struct tlbi { > + unsigned long start; > + unsigned long size; > + unsigned long asid; > +}; > + > +static void ipi_remote_sfence_vma(void *info) > +{ > + struct tlbi *data = info; > + unsigned long start = data->start; > + unsigned long size = data->size; > + unsigned long i; > + > + for (i = 0; i < size; i += PAGE_SIZE) { > + __asm__ __volatile__ ("sfence.vma %0" > + : : "r" (start + i) > + : "memory"); > + } > +} > + > +static void ipi_remote_sfence_vma_asid(void *info) > +{ > + struct tlbi *data = info; > + unsigned long asid = data->asid; > + unsigned long start = data->start; > + unsigned long size = data->size; > + unsigned long i; > + > + /* Flush entire MM context */ > + if (size == SFENCE_VMA_FLUSH_ALL) { > + __asm__ __volatile__ ("sfence.vma x0, %0" > + : : "r" (asid) > + : "memory"); > + return; > + } > + > + for (i = 0; i < size; i += PAGE_SIZE) { > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : : "r" (start + i), "r" (asid) > + : "memory"); > + } > +} > + > +static void remote_sfence_vma(unsigned long start, unsigned long size) > +{ > + if (tlbi_ipi) { > + struct tlbi info = { > + .start = start, > + .size = size, > + }; > + on_each_cpu(ipi_remote_sfence_vma, &info, 1); > + } else > + sbi_remote_sfence_vma(NULL, start, size); > +} > + > +static void remote_sfence_vma_asid(cpumask_t *mask, unsigned long start, > + unsigned long size, unsigned long asid) > +{ > + if (tlbi_ipi) { > + struct tlbi info = { > + .start = start, > + .size = size, > + .asid = asid, > + }; > + on_each_cpu_mask(mask, ipi_remote_sfence_vma_asid, &info, 1); > + } else { > + cpumask_t hmask; > + > + cpumask_clear(&hmask); > + riscv_cpuid_to_hartid_mask(mask, &hmask); > + sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid); > + } > +} > + > + > +void flush_tlb_all(void) > +{ > + sbi_remote_sfence_vma(NULL, 0, SFENCE_VMA_FLUSH_ALL); > +} > + > +void flush_tlb_mm(struct mm_struct *mm) > +{ > + remote_sfence_vma_asid(mm_cpumask(mm), 0, SFENCE_VMA_FLUSH_ALL, 0); > +} > + > +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > +{ > + remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, 0); > +} > + > + > +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end) > +{ > + if (end - start > tlbi_range_threshold) { > + flush_tlb_mm(vma->vm_mm); > + return; > + } > + > + remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), start, end - start, 0); > +} > + > +void flush_tlb_kernel_range(unsigned long start, unsigned long end) > +{ > + if (end - start > tlbi_range_threshold) { > + flush_tlb_all(); > + return; > + } > + > + remote_sfence_vma(start, end - start); > +} > + > +#endif /* CONFIG_SMP */ > -- > 2.17.1
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 1141364d990e..19d1aeb059da 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -279,7 +279,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * Relying on flush_tlb_fix_spurious_fault would suffice, but * the extra traps reduce performance. So, eagerly SFENCE.VMA. */ - local_flush_tlb_page(address); + local_flush_tlb_page(vma, address); } #define __HAVE_ARCH_PTE_SAME diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 54fee0cadb1e..8c3086c6084c 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -1,22 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com> * Copyright (C) 2012 Regents of the University of California - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Copyright (C) 2019 Gary Guo, University of Cambridge */ #ifndef _ASM_RISCV_TLBFLUSH_H #define _ASM_RISCV_TLBFLUSH_H #include <linux/mm_types.h> -#include <asm/smp.h> /* * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction @@ -27,53 +19,47 @@ static inline void local_flush_tlb_all(void) __asm__ __volatile__ ("sfence.vma" : : : "memory"); } -/* Flush one page from local TLB */ -static inline void local_flush_tlb_page(unsigned long addr) +static inline void local_flush_tlb_mm(struct mm_struct *mm) { - __asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"); + /* Flush ASID 0 so that global mappings are not affected */ + __asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (0) : "memory"); } -#ifndef CONFIG_SMP - -#define flush_tlb_all() local_flush_tlb_all() -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) { - local_flush_tlb_all(); + __asm__ __volatile__ ("sfence.vma %0, %1" + : : "r" (addr), "r" (0) + : "memory"); } -#define flush_tlb_mm(mm) flush_tlb_all() +static inline void local_flush_tlb_kernel_page(unsigned long addr) +{ + __asm__ __volatile ("sfence.vma %0" : : "r" (addr) : "memory"); +} -#else /* CONFIG_SMP */ +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); -#include <asm/sbi.h> +#ifdef CONFIG_SMP -static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, - unsigned long size) -{ - struct cpumask hmask; +void flush_tlb_all(void); +void flush_tlb_mm(struct mm_struct *mm); +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +void flush_tlb_kernel_range(unsigned long start, unsigned long end); - cpumask_clear(&hmask); - riscv_cpuid_to_hartid_mask(cmask, &hmask); - sbi_remote_sfence_vma(hmask.bits, start, size); -} +#else /* CONFIG_SMP */ -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) -#define flush_tlb_range(vma, start, end) \ - remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) -#define flush_tlb_mm(mm) \ - remote_sfence_vma(mm_cpumask(mm), 0, -1) +#define flush_tlb_all() local_flush_tlb_all() +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) +#define flush_tlb_range(vma, start, end) local_flush_tlb_range(vma, start, end) +#define flush_tlb_kernel_range(start, end) \ + local_flush_tlb_kernel_range(start, end) #endif /* CONFIG_SMP */ -/* Flush a range of kernel pages */ -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); -} - #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index d75b035786d6..53b68fd3cb45 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -4,3 +4,4 @@ obj-y += extable.o obj-y += ioremap.o obj-y += cacheflush.o obj-y += context.o +obj-y += tlbflush.o diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index fbb1cfe80267..0f787bcd3a7a 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -64,7 +64,13 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, * privileged ISA 1.10 yet. */ csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE); - local_flush_tlb_all(); + + /* + * sfence.vma after SATP write. We call it on MM context instead of + * calling local_flush_tlb_all to prevent global mappings from being + * affected. + */ + local_flush_tlb_mm(next); flush_icache_deferred(next); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b379a75ac6a6..858f55e8b219 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -168,7 +168,7 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot)); } else { pte_clear(&init_mm, addr, ptep); - local_flush_tlb_page(addr); + local_flush_tlb_kernel_page(addr); } } diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c new file mode 100644 index 000000000000..9099dc037cbd --- /dev/null +++ b/arch/riscv/mm/tlbflush.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Gary Guo, University of Cambridge + */ + +#include <linux/mm.h> +#include <asm/sbi.h> + +#define SFENCE_VMA_FLUSH_ALL ((unsigned long) -1) + +/* + * This controls the maximum amount of page-level sfence.vma that the kernel + * can issue when the kernel needs to flush a range from the TLB. If the size + * of range goes beyond this threshold, a full sfence.vma is issued. + * + * Increase this number can negatively impact performance on implementations + * where sfence.vma's address operand is ignored and always perform a global + * TLB flush. On the other hand, implementations with page-level TLB flush + * support can benefit from a larger number. + */ +static unsigned long tlbi_range_threshold = PAGE_SIZE; + +static int __init setup_tlbi_max_ops(char *str) +{ + int value = 0; + + get_option(&str, &value); + + /* + * This value cannot be greater or equal to PTRS_PER_PTE, as we need + * to full flush for any non-leaf page table change. The value has also + * be at least 1. + */ + if (value >= PTRS_PER_PTE || value < 1) + return -EINVAL; + + tlbi_range_threshold = value * PAGE_SIZE; + return 0; +} +early_param("tlbi_max_ops", setup_tlbi_max_ops); + +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + if (end - start > tlbi_range_threshold) { + local_flush_tlb_mm(vma->vm_mm); + return; + } + + while (start < end) { + __asm__ __volatile__ ("sfence.vma %0, %1" + : : "r" (start), "r" (0) + : "memory"); + start += PAGE_SIZE; + } +} + +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + if (end - start > tlbi_range_threshold) { + local_flush_tlb_all(); + return; + } + + while (start < end) { + __asm__ __volatile__ ("sfence.vma %0" + : : "r" (start) + : "memory"); + start += PAGE_SIZE; + } +} + +#ifdef CONFIG_SMP + +/* + * BBL/OpenSBI are currently ignoring ASID and address range provided + * by SBI call argument, and do a full TLB flush instead. This may + * negatively impact performance on implementations with page-level + * sfence.vma support. + * + * We provide an IPI-based remote shootdown implementation to improve + * performance on implementations with page-level sfence.vma, and also to + * allow testing of these implementations. + * + * This parameter allows the approach (IPI/SBI) to be specified using boot + * cmdline. + */ +static bool tlbi_ipi = true; + +static int __init setup_tlbi_method(char *str) +{ + if (strcmp(str, "ipi") == 0) + tlbi_ipi = true; + else if (strcmp(str, "sbi") == 0) + tlbi_ipi = false; + else + return -EINVAL; + + return 0; +} +early_param("tlbi_method", setup_tlbi_method); + + +struct tlbi { + unsigned long start; + unsigned long size; + unsigned long asid; +}; + +static void ipi_remote_sfence_vma(void *info) +{ + struct tlbi *data = info; + unsigned long start = data->start; + unsigned long size = data->size; + unsigned long i; + + for (i = 0; i < size; i += PAGE_SIZE) { + __asm__ __volatile__ ("sfence.vma %0" + : : "r" (start + i) + : "memory"); + } +} + +static void ipi_remote_sfence_vma_asid(void *info) +{ + struct tlbi *data = info; + unsigned long asid = data->asid; + unsigned long start = data->start; + unsigned long size = data->size; + unsigned long i; + + /* Flush entire MM context */ + if (size == SFENCE_VMA_FLUSH_ALL) { + __asm__ __volatile__ ("sfence.vma x0, %0" + : : "r" (asid) + : "memory"); + return; + } + + for (i = 0; i < size; i += PAGE_SIZE) { + __asm__ __volatile__ ("sfence.vma %0, %1" + : : "r" (start + i), "r" (asid) + : "memory"); + } +} + +static void remote_sfence_vma(unsigned long start, unsigned long size) +{ + if (tlbi_ipi) { + struct tlbi info = { + .start = start, + .size = size, + }; + on_each_cpu(ipi_remote_sfence_vma, &info, 1); + } else + sbi_remote_sfence_vma(NULL, start, size); +} + +static void remote_sfence_vma_asid(cpumask_t *mask, unsigned long start, + unsigned long size, unsigned long asid) +{ + if (tlbi_ipi) { + struct tlbi info = { + .start = start, + .size = size, + .asid = asid, + }; + on_each_cpu_mask(mask, ipi_remote_sfence_vma_asid, &info, 1); + } else { + cpumask_t hmask; + + cpumask_clear(&hmask); + riscv_cpuid_to_hartid_mask(mask, &hmask); + sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid); + } +} + + +void flush_tlb_all(void) +{ + sbi_remote_sfence_vma(NULL, 0, SFENCE_VMA_FLUSH_ALL); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + remote_sfence_vma_asid(mm_cpumask(mm), 0, SFENCE_VMA_FLUSH_ALL, 0); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) +{ + remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, 0); +} + + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + if (end - start > tlbi_range_threshold) { + flush_tlb_mm(vma->vm_mm); + return; + } + + remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), start, end - start, 0); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + if (end - start > tlbi_range_threshold) { + flush_tlb_all(); + return; + } + + remote_sfence_vma(start, end - start); +} + +#endif /* CONFIG_SMP */