diff mbox series

[v2,4/4] riscv: rewrite tlb flush for performance

Message ID 33588efb3909a4d699a952f93c26ea2f3c8bfdc4.1552069700.git.gary@garyguo.net (mailing list archive)
State New, archived
Headers show
Series Improvements related to TLB and I$ flush | expand

Commit Message

Gary Guo March 8, 2019, 6:40 p.m. UTC
From: Gary Guo <gary@garyguo.net>

This patch rewrites the logic related to TLB flushing, both to cleanup
the code and to improve performance.

We now use sfence.vma variant with specified ASID and virtual address
whenever possible.  Even though only ASID 0 is used, it still improves
performance by preventing global mappings from being flushed from TLB.

This patch also includes a IPI-based remote TLB shootdown, which is
useful at this stage for testing because BBL/OpenSBI ignores operands
of sbi_remote_sfence_vma_asid and always perform a global TLB flush.
The SBI-based remote TLB shootdown can still be opt-in using boot
cmdline "tlbi_method=sbi".

Signed-off-by: Gary Guo <gary@garyguo.net>
---
 arch/riscv/include/asm/pgtable.h  |   2 +-
 arch/riscv/include/asm/tlbflush.h |  73 ++++------
 arch/riscv/mm/Makefile            |   1 +
 arch/riscv/mm/context.c           |   8 +-
 arch/riscv/mm/tlbflush.c          | 216 ++++++++++++++++++++++++++++++
 5 files changed, 252 insertions(+), 48 deletions(-)
 create mode 100644 arch/riscv/mm/tlbflush.c

Comments

Gary Guo March 9, 2019, 2:22 a.m. UTC | #1
Just realised that __setup has been deprecated in favour of early_param.
I'll incorporate it in v3 of the patch.

> +
> +static int __init setup_tlbi_max_ops(char *str) {
> +	int value = 0;
> +
> +	get_option(&str, &value);
> +
> +	/*
> +	 * This value cannot be greater or equal to PTRS_PER_PTE, as we need
> +	 * to full flush for any non-leaf page table change. The value has also
> +	 * be at least 1.
> +	 */
> +	if (value >= PTRS_PER_PTE || value < 1)
> +		return 0;
> +
> +	tlbi_range_threshold = value * PAGE_SIZE;
> +	return 1;
> +}
> +__setup("tlbi_max_ops=", setup_tlbi_max_ops);
> +
Atish Patra March 9, 2019, 8:17 a.m. UTC | #2
On 3/8/19 10:40 AM, Gary Guo wrote:
> From: Gary Guo <gary@garyguo.net>
> 
> This patch rewrites the logic related to TLB flushing, both to cleanup
> the code and to improve performance.
> 
> We now use sfence.vma variant with specified ASID and virtual address
> whenever possible.  Even though only ASID 0 is used, it still improves
> performance by preventing global mappings from being flushed from TLB.
> 
> This patch also includes a IPI-based remote TLB shootdown, which is
> useful at this stage for testing because BBL/OpenSBI ignores operands
> of sbi_remote_sfence_vma_asid and always perform a global TLB flush.
> The SBI-based remote TLB shootdown can still be opt-in using boot
> cmdline "tlbi_method=sbi".
> 
> Signed-off-by: Gary Guo <gary@garyguo.net>
> ---
>   arch/riscv/include/asm/pgtable.h  |   2 +-
>   arch/riscv/include/asm/tlbflush.h |  73 ++++------
>   arch/riscv/mm/Makefile            |   1 +
>   arch/riscv/mm/context.c           |   8 +-
>   arch/riscv/mm/tlbflush.c          | 216 ++++++++++++++++++++++++++++++
>   5 files changed, 252 insertions(+), 48 deletions(-)
>   create mode 100644 arch/riscv/mm/tlbflush.c
> 
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 16301966d65b..47a8616b9de0 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -279,7 +279,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
>   	 * Relying on flush_tlb_fix_spurious_fault would suffice, but
>   	 * the extra traps reduce performance.  So, eagerly SFENCE.VMA.
>   	 */
> -	local_flush_tlb_page(address);
> +	local_flush_tlb_page(vma, address);
>   }
>   
>   #define __HAVE_ARCH_PTE_SAME
> diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
> index 54fee0cadb1e..d6c247ce17f3 100644
> --- a/arch/riscv/include/asm/tlbflush.h
> +++ b/arch/riscv/include/asm/tlbflush.h
> @@ -1,22 +1,14 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
>   /*
>    * Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com>
>    * Copyright (C) 2012 Regents of the University of California
> - *
> - *   This program is free software; you can redistribute it and/or
> - *   modify it under the terms of the GNU General Public License
> - *   as published by the Free Software Foundation, version 2.
> - *
> - *   This program is distributed in the hope that it will be useful,
> - *   but WITHOUT ANY WARRANTY; without even the implied warranty of
> - *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> - *   GNU General Public License for more details.
> + * Copyright (C) 2019 Gary Guo, University of Cambridge
>    */
>   
>   #ifndef _ASM_RISCV_TLBFLUSH_H
>   #define _ASM_RISCV_TLBFLUSH_H
>   
>   #include <linux/mm_types.h>
> -#include <asm/smp.h>
>   
>   /*
>    * Flush entire local TLB.  'sfence.vma' implicitly fences with the instruction
> @@ -27,53 +19,42 @@ static inline void local_flush_tlb_all(void)
>   	__asm__ __volatile__ ("sfence.vma" : : : "memory");
>   }
>   
> -/* Flush one page from local TLB */
> -static inline void local_flush_tlb_page(unsigned long addr)
> +static inline void local_flush_tlb_mm(struct mm_struct *mm)
>   {
> -	__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory");
> +	/* Flush ASID 0 so that global mappings are not affected */
> +	__asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (0) : "memory");
>   }
>   
> -#ifndef CONFIG_SMP
> -
> -#define flush_tlb_all() local_flush_tlb_all()
> -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
> -
> -static inline void flush_tlb_range(struct vm_area_struct *vma,
> -		unsigned long start, unsigned long end)
> +static inline void local_flush_tlb_page(struct vm_area_struct *vma,
> +	unsigned long addr)
>   {
> -	local_flush_tlb_all();
> +	__asm__ __volatile__ ("sfence.vma %0, %1"
> +			      : : "r" (addr), "r" (0)
> +			      : "memory");
>   }
>   
> -#define flush_tlb_mm(mm) flush_tlb_all()
> +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
> +	unsigned long end);
> +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
>   
> -#else /* CONFIG_SMP */
> -
> -#include <asm/sbi.h>
> +#ifdef CONFIG_SMP
>   
> -static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start,
> -				     unsigned long size)
> -{
> -	struct cpumask hmask;
> +void flush_tlb_all(void);
> +void flush_tlb_mm(struct mm_struct *mm);
> +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
> +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
> +	unsigned long end);
> +void flush_tlb_kernel_range(unsigned long start, unsigned long end);
>   
> -	cpumask_clear(&hmask);
> -	riscv_cpuid_to_hartid_mask(cmask, &hmask);
> -	sbi_remote_sfence_vma(hmask.bits, start, size);
> -}
> +#else /* CONFIG_SMP */
>   
> -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
> -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
> -#define flush_tlb_range(vma, start, end) \
> -	remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
> -#define flush_tlb_mm(mm) \
> -	remote_sfence_vma(mm_cpumask(mm), 0, -1)
> +#define flush_tlb_all() local_flush_tlb_all()
> +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
> +#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr)
> +#define flush_tlb_range(vma, start, end) local_flush_tlb_range(vma, start, end)
> +#define flush_tlb_kernel_range(start, end) \
> +	local_flush_tlb_kernel_range(start, end)
>   
>   #endif /* CONFIG_SMP */
>   
> -/* Flush a range of kernel pages */
> -static inline void flush_tlb_kernel_range(unsigned long start,
> -	unsigned long end)
> -{
> -	flush_tlb_all();
> -}
> -
>   #endif /* _ASM_RISCV_TLBFLUSH_H */
> diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
> index d75b035786d6..53b68fd3cb45 100644
> --- a/arch/riscv/mm/Makefile
> +++ b/arch/riscv/mm/Makefile
> @@ -4,3 +4,4 @@ obj-y += extable.o
>   obj-y += ioremap.o
>   obj-y += cacheflush.o
>   obj-y += context.o
> +obj-y += tlbflush.o
> diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
> index fbb1cfe80267..0f787bcd3a7a 100644
> --- a/arch/riscv/mm/context.c
> +++ b/arch/riscv/mm/context.c
> @@ -64,7 +64,13 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
>   	 * privileged ISA 1.10 yet.
>   	 */
>   	csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE);
> -	local_flush_tlb_all();
> +
> +	/*
> +	 * sfence.vma after SATP write. We call it on MM context instead of
> +	 * calling local_flush_tlb_all to prevent global mappings from being
> +	 * affected.
> +	 */
> +	local_flush_tlb_mm(next);
>   
>   	flush_icache_deferred(next);
>   }
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> new file mode 100644
> index 000000000000..b4b35e825495
> --- /dev/null
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -0,0 +1,216 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2019 Gary Guo, University of Cambridge
> + */
> +
> +#include <linux/mm.h>
> +#include <asm/sbi.h>
> +
> +#define SFENCE_VMA_FLUSH_ALL ((unsigned long) -1)
> +
> +/*
> + * This controls the maximum amount of page-level sfence.vma that the kernel
> + * can issue when the kernel needs to flush a range from the TLB.  If the size
> + * of range goes beyond this threshold, a full sfence.vma is issued.
> + *
> + * Increase this number can negatively impact performance on implemntations
> + * where sfence.vma's address operand is ignored and always perform a global
> + * TLB flush.  On the other hand, implementations with page-level TLB flush
> + * support can benefit from a larger number.
> + */
> +static unsigned long tlbi_range_threshold = PAGE_SIZE;
> +
> +static int __init setup_tlbi_max_ops(char *str)
> +{
> +	int value = 0;
> +
> +	get_option(&str, &value);
> +
> +	/*
> +	 * This value cannot be greater or equal to PTRS_PER_PTE, as we need
> +	 * to full flush for any non-leaf page table change. The value has also
> +	 * be at least 1.
> +	 */
> +	if (value >= PTRS_PER_PTE || value < 1)
> +		return 0;
> +
> +	tlbi_range_threshold = value * PAGE_SIZE;
> +	return 1;
> +}
> +__setup("tlbi_max_ops=", setup_tlbi_max_ops);
> +
> +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
> +	unsigned long end)
> +{
> +	if (end - start > tlbi_range_threshold) {
> +		local_flush_tlb_mm(vma->vm_mm);
> +		return;
> +	}
> +
> +	while (start < end) {
> +		__asm__ __volatile__ ("sfence.vma %0, %1"
> +				      : : "r" (start), "r" (0)
> +				      : "memory");
> +		start += PAGE_SIZE;
> +	}
> +}
> +
> +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
> +{
> +	if (end - start > tlbi_range_threshold) {
> +		local_flush_tlb_all();
> +		return;
> +	}
> +
> +	while (start < end) {
> +		__asm__ __volatile__ ("sfence.vma %0"
> +				      : : "r" (start)
> +				      : "memory");
> +		start += PAGE_SIZE;
> +	}
> +}
> +
> +#ifdef CONFIG_SMP
> +
> +/*
> + * BBL/OpenSBI are currently ignoring ASID and address range provided
> + * by SBI call argument, and do a full TLB flush instead. This may
> + * negatively impact performance on implementations with page-level
> + * sfence.vma support.
> + *
> + * We provide an IPI-based remote shootdown implementation to improve
> + * performance on implementations with page-level sfence.vma, and also to
> + * allow testing of these implementations.
> + *
> + * This parameter allows the approach (IPI/SBI) to be specified using boot
> + * cmdline.
> + */
> +static bool tlbi_ipi = true;
> +
> +static int __init setup_tlbi_method(char *str)
> +{
> +	if (strcmp(str, "ipi") == 0)
> +		tlbi_ipi = true;
> +	else if (strcmp(str, "sbi") == 0)
> +		tlbi_ipi = false;
> +	else
> +		return 0;
> +
> +	return 1;
> +}
> +__setup("tlbi_method=", setup_tlbi_method);
> +
> +
> +struct tlbi {
> +	unsigned long start;
> +	unsigned long size;
> +	unsigned long asid;
> +};
> +
> +static void ipi_remote_sfence_vma(void *info)
> +{
> +	struct tlbi *data = info;
> +	unsigned long start = data->start;
> +	unsigned long size = data->size;
> +	unsigned long i;
> +
> +	for (i = 0; i < size; i += PAGE_SIZE) {
> +		__asm__ __volatile__ ("sfence.vma %0"
> +				      : : "r" (start + i)
> +				      : "memory");
> +	}
> +}
> +
> +static void ipi_remote_sfence_vma_asid(void *info)
> +{
> +	struct tlbi *data = info;
> +	unsigned long asid = data->asid;
> +	unsigned long start = data->start;
> +	unsigned long size = data->size;
> +	unsigned long i;
> +
> +	/* Flush entire MM context */
> +	if (size == SFENCE_VMA_FLUSH_ALL) {
> +		__asm__ __volatile__ ("sfence.vma x0, %0"
> +				      : : "r" (asid)
> +				      : "memory");
> +		return;
> +	}
> +
> +	for (i = 0; i < size; i += PAGE_SIZE) {
> +		__asm__ __volatile__ ("sfence.vma %0, %1"
> +				      : : "r" (start + i), "r" (asid)
> +				      : "memory");
> +	}
> +}
> +
> +static void remote_sfence_vma(unsigned long start, unsigned long size)
> +{
> +	if (tlbi_ipi) {
> +		struct tlbi info = {
> +			.start = start,
> +			.size = size,
> +		};
> +		on_each_cpu(ipi_remote_sfence_vma, &info, 1);
> +	} else
> +		sbi_remote_sfence_vma(NULL, start, size);
> +}
> +
> +static void remote_sfence_vma_asid(cpumask_t *mask, unsigned long start,
> +		unsigned long size, unsigned long asid)
> +{
> +	if (tlbi_ipi) {
> +		struct tlbi info = {
> +			.start = start,
> +			.size = size,
> +			.asid = asid,
> +		};
> +		on_each_cpu_mask(mask, ipi_remote_sfence_vma_asid, &info, 1);
> +	} else {
> +		cpumask_t hmask;
> +
> +		cpumask_clear(&hmask);
> +		riscv_cpuid_to_hartid_mask(mask, &hmask);
> +		sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid);
> +	}
> +}
> +
> +
> +void flush_tlb_all(void)
> +{
> +	sbi_remote_sfence_vma(NULL, 0, SFENCE_VMA_FLUSH_ALL);
> +}
> +
> +void flush_tlb_mm(struct mm_struct *mm)
> +{
> +	remote_sfence_vma_asid(mm_cpumask(mm), 0, SFENCE_VMA_FLUSH_ALL, 0);
> +}
> +
> +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
> +{
> +	remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, 0);
> +}
> +

Can you please rebase on top of latest master ?
The fixmap patchset which got merged as a part of 5.1-rc1. It introduced 
another flush_tlb_page usage with old arguments causing a compilation 
failure.

Here is the PR.
https://patchwork.kernel.org/patch/10823195/

or if you prefer the series.

https://patchwork.kernel.org/project/linux-riscv/list/?series=79489

Regards,
Atish
> +
> +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
> +	unsigned long end)
> +{
> +	if (end - start > tlbi_range_threshold) {
> +		flush_tlb_mm(vma->vm_mm);
> +		return;
> +	}
> +
> +	remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), start, end - start, 0);
> +}
> +
> +void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> +{
> +	if (end - start > tlbi_range_threshold) {
> +		flush_tlb_all();
> +		return;
> +	}
> +
> +	remote_sfence_vma(start, end - start);
> +}
> +
> +#endif /* CONFIG_SMP */
>
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 16301966d65b..47a8616b9de0 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -279,7 +279,7 @@  static inline void update_mmu_cache(struct vm_area_struct *vma,
 	 * Relying on flush_tlb_fix_spurious_fault would suffice, but
 	 * the extra traps reduce performance.  So, eagerly SFENCE.VMA.
 	 */
-	local_flush_tlb_page(address);
+	local_flush_tlb_page(vma, address);
 }
 
 #define __HAVE_ARCH_PTE_SAME
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 54fee0cadb1e..d6c247ce17f3 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -1,22 +1,14 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com>
  * Copyright (C) 2012 Regents of the University of California
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ * Copyright (C) 2019 Gary Guo, University of Cambridge
  */
 
 #ifndef _ASM_RISCV_TLBFLUSH_H
 #define _ASM_RISCV_TLBFLUSH_H
 
 #include <linux/mm_types.h>
-#include <asm/smp.h>
 
 /*
  * Flush entire local TLB.  'sfence.vma' implicitly fences with the instruction
@@ -27,53 +19,42 @@  static inline void local_flush_tlb_all(void)
 	__asm__ __volatile__ ("sfence.vma" : : : "memory");
 }
 
-/* Flush one page from local TLB */
-static inline void local_flush_tlb_page(unsigned long addr)
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
 {
-	__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory");
+	/* Flush ASID 0 so that global mappings are not affected */
+	__asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (0) : "memory");
 }
 
-#ifndef CONFIG_SMP
-
-#define flush_tlb_all() local_flush_tlb_all()
-#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-		unsigned long start, unsigned long end)
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+	unsigned long addr)
 {
-	local_flush_tlb_all();
+	__asm__ __volatile__ ("sfence.vma %0, %1"
+			      : : "r" (addr), "r" (0)
+			      : "memory");
 }
 
-#define flush_tlb_mm(mm) flush_tlb_all()
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end);
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
-#else /* CONFIG_SMP */
-
-#include <asm/sbi.h>
+#ifdef CONFIG_SMP
 
-static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start,
-				     unsigned long size)
-{
-	struct cpumask hmask;
+void flush_tlb_all(void);
+void flush_tlb_mm(struct mm_struct *mm);
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end);
+void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
-	cpumask_clear(&hmask);
-	riscv_cpuid_to_hartid_mask(cmask, &hmask);
-	sbi_remote_sfence_vma(hmask.bits, start, size);
-}
+#else /* CONFIG_SMP */
 
-#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
-#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
-#define flush_tlb_range(vma, start, end) \
-	remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
-#define flush_tlb_mm(mm) \
-	remote_sfence_vma(mm_cpumask(mm), 0, -1)
+#define flush_tlb_all() local_flush_tlb_all()
+#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
+#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr)
+#define flush_tlb_range(vma, start, end) local_flush_tlb_range(vma, start, end)
+#define flush_tlb_kernel_range(start, end) \
+	local_flush_tlb_kernel_range(start, end)
 
 #endif /* CONFIG_SMP */
 
-/* Flush a range of kernel pages */
-static inline void flush_tlb_kernel_range(unsigned long start,
-	unsigned long end)
-{
-	flush_tlb_all();
-}
-
 #endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index d75b035786d6..53b68fd3cb45 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -4,3 +4,4 @@  obj-y += extable.o
 obj-y += ioremap.o
 obj-y += cacheflush.o
 obj-y += context.o
+obj-y += tlbflush.o
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index fbb1cfe80267..0f787bcd3a7a 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -64,7 +64,13 @@  void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 * privileged ISA 1.10 yet.
 	 */
 	csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE);
-	local_flush_tlb_all();
+
+	/*
+	 * sfence.vma after SATP write. We call it on MM context instead of
+	 * calling local_flush_tlb_all to prevent global mappings from being
+	 * affected.
+	 */
+	local_flush_tlb_mm(next);
 
 	flush_icache_deferred(next);
 }
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
new file mode 100644
index 000000000000..b4b35e825495
--- /dev/null
+++ b/arch/riscv/mm/tlbflush.c
@@ -0,0 +1,216 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Gary Guo, University of Cambridge
+ */
+
+#include <linux/mm.h>
+#include <asm/sbi.h>
+
+#define SFENCE_VMA_FLUSH_ALL ((unsigned long) -1)
+
+/*
+ * This controls the maximum amount of page-level sfence.vma that the kernel
+ * can issue when the kernel needs to flush a range from the TLB.  If the size
+ * of range goes beyond this threshold, a full sfence.vma is issued.
+ *
+ * Increase this number can negatively impact performance on implemntations
+ * where sfence.vma's address operand is ignored and always perform a global
+ * TLB flush.  On the other hand, implementations with page-level TLB flush
+ * support can benefit from a larger number.
+ */
+static unsigned long tlbi_range_threshold = PAGE_SIZE;
+
+static int __init setup_tlbi_max_ops(char *str)
+{
+	int value = 0;
+
+	get_option(&str, &value);
+
+	/*
+	 * This value cannot be greater or equal to PTRS_PER_PTE, as we need
+	 * to full flush for any non-leaf page table change. The value has also
+	 * be at least 1.
+	 */
+	if (value >= PTRS_PER_PTE || value < 1)
+		return 0;
+
+	tlbi_range_threshold = value * PAGE_SIZE;
+	return 1;
+}
+__setup("tlbi_max_ops=", setup_tlbi_max_ops);
+
+void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end)
+{
+	if (end - start > tlbi_range_threshold) {
+		local_flush_tlb_mm(vma->vm_mm);
+		return;
+	}
+
+	while (start < end) {
+		__asm__ __volatile__ ("sfence.vma %0, %1"
+				      : : "r" (start), "r" (0)
+				      : "memory");
+		start += PAGE_SIZE;
+	}
+}
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	if (end - start > tlbi_range_threshold) {
+		local_flush_tlb_all();
+		return;
+	}
+
+	while (start < end) {
+		__asm__ __volatile__ ("sfence.vma %0"
+				      : : "r" (start)
+				      : "memory");
+		start += PAGE_SIZE;
+	}
+}
+
+#ifdef CONFIG_SMP
+
+/*
+ * BBL/OpenSBI are currently ignoring ASID and address range provided
+ * by SBI call argument, and do a full TLB flush instead. This may
+ * negatively impact performance on implementations with page-level
+ * sfence.vma support.
+ *
+ * We provide an IPI-based remote shootdown implementation to improve
+ * performance on implementations with page-level sfence.vma, and also to
+ * allow testing of these implementations.
+ *
+ * This parameter allows the approach (IPI/SBI) to be specified using boot
+ * cmdline.
+ */
+static bool tlbi_ipi = true;
+
+static int __init setup_tlbi_method(char *str)
+{
+	if (strcmp(str, "ipi") == 0)
+		tlbi_ipi = true;
+	else if (strcmp(str, "sbi") == 0)
+		tlbi_ipi = false;
+	else
+		return 0;
+
+	return 1;
+}
+__setup("tlbi_method=", setup_tlbi_method);
+
+
+struct tlbi {
+	unsigned long start;
+	unsigned long size;
+	unsigned long asid;
+};
+
+static void ipi_remote_sfence_vma(void *info)
+{
+	struct tlbi *data = info;
+	unsigned long start = data->start;
+	unsigned long size = data->size;
+	unsigned long i;
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		__asm__ __volatile__ ("sfence.vma %0"
+				      : : "r" (start + i)
+				      : "memory");
+	}
+}
+
+static void ipi_remote_sfence_vma_asid(void *info)
+{
+	struct tlbi *data = info;
+	unsigned long asid = data->asid;
+	unsigned long start = data->start;
+	unsigned long size = data->size;
+	unsigned long i;
+
+	/* Flush entire MM context */
+	if (size == SFENCE_VMA_FLUSH_ALL) {
+		__asm__ __volatile__ ("sfence.vma x0, %0"
+				      : : "r" (asid)
+				      : "memory");
+		return;
+	}
+
+	for (i = 0; i < size; i += PAGE_SIZE) {
+		__asm__ __volatile__ ("sfence.vma %0, %1"
+				      : : "r" (start + i), "r" (asid)
+				      : "memory");
+	}
+}
+
+static void remote_sfence_vma(unsigned long start, unsigned long size)
+{
+	if (tlbi_ipi) {
+		struct tlbi info = {
+			.start = start,
+			.size = size,
+		};
+		on_each_cpu(ipi_remote_sfence_vma, &info, 1);
+	} else
+		sbi_remote_sfence_vma(NULL, start, size);
+}
+
+static void remote_sfence_vma_asid(cpumask_t *mask, unsigned long start,
+		unsigned long size, unsigned long asid)
+{
+	if (tlbi_ipi) {
+		struct tlbi info = {
+			.start = start,
+			.size = size,
+			.asid = asid,
+		};
+		on_each_cpu_mask(mask, ipi_remote_sfence_vma_asid, &info, 1);
+	} else {
+		cpumask_t hmask;
+
+		cpumask_clear(&hmask);
+		riscv_cpuid_to_hartid_mask(mask, &hmask);
+		sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid);
+	}
+}
+
+
+void flush_tlb_all(void)
+{
+	sbi_remote_sfence_vma(NULL, 0, SFENCE_VMA_FLUSH_ALL);
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	remote_sfence_vma_asid(mm_cpumask(mm), 0, SFENCE_VMA_FLUSH_ALL, 0);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, 0);
+}
+
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+	unsigned long end)
+{
+	if (end - start > tlbi_range_threshold) {
+		flush_tlb_mm(vma->vm_mm);
+		return;
+	}
+
+	remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), start, end - start, 0);
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	if (end - start > tlbi_range_threshold) {
+		flush_tlb_all();
+		return;
+	}
+
+	remote_sfence_vma(start, end - start);
+}
+
+#endif /* CONFIG_SMP */