@@ -701,7 +701,7 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf,
* the extra traps reduce performance. So, eagerly SFENCE.VMA.
*/
while (nr--)
- local_flush_tlb_page(address + nr * PAGE_SIZE);
+ local_flush_tlb_page(address + nr * PAGE_SIZE, PAGE_SIZE);
svvptc:;
/*
@@ -719,9 +719,12 @@ svvptc:;
static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
- pte_t *ptep = (pte_t *)pmdp;
+ asm goto(ALTERNATIVE("nop", "j %l[svvptc]", 0, RISCV_ISA_EXT_SVVPTC, 1)
+ : : : : svvptc);
- update_mmu_cache(vma, address, ptep);
+ local_flush_tlb_page(address, PMD_SIZE);
+
+svvptc:;
}
#define __HAVE_ARCH_PTE_SAME
@@ -29,18 +29,32 @@ static inline void local_flush_tlb_all_asid(unsigned long asid)
}
/* Flush one page from local TLB */
-static inline void local_flush_tlb_page(unsigned long addr)
+static inline void local_flush_tlb_page(unsigned long addr,
+ unsigned long page_size)
{
- ALT_SFENCE_VMA_ADDR(addr);
+ unsigned int i;
+ unsigned long hw_page_num = 1 << (PAGE_SHIFT - HW_PAGE_SHIFT);
+ unsigned long hw_page_size = page_size >> (PAGE_SHIFT - HW_PAGE_SHIFT);
+
+ for (i = 0; i < hw_page_num; i++, addr += hw_page_size)
+ ALT_SFENCE_VMA_ADDR(addr);
}
static inline void local_flush_tlb_page_asid(unsigned long addr,
+ unsigned long page_size,
unsigned long asid)
{
- if (asid != FLUSH_TLB_NO_ASID)
- ALT_SFENCE_VMA_ADDR_ASID(addr, asid);
- else
- local_flush_tlb_page(addr);
+ unsigned int i;
+ unsigned long hw_page_num, hw_page_size;
+
+ if (asid != FLUSH_TLB_NO_ASID) {
+ hw_page_num = 1 << (PAGE_SHIFT - HW_PAGE_SHIFT);
+ hw_page_size = page_size >> (PAGE_SHIFT - HW_PAGE_SHIFT);
+
+ for (i = 0; i < hw_page_num; i++, addr += hw_page_size)
+ ALT_SFENCE_VMA_ADDR_ASID(addr, asid);
+ } else
+ local_flush_tlb_page(addr, page_size);
}
void flush_tlb_all(void);
@@ -118,7 +118,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
pmd_t *pmd_k;
pte_t *pte_k;
int index;
- unsigned long pfn;
+ unsigned long pfn, page_size;
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs))
@@ -154,8 +154,10 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
no_context(regs, addr);
return;
}
- if (pud_leaf(pudp_get(pud_k)))
+ if (pud_leaf(pudp_get(pud_k))) {
+ page_size = PUD_SIZE;
goto flush_tlb;
+ }
/*
* Since the vmalloc area is global, it is unnecessary
@@ -166,8 +168,10 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
no_context(regs, addr);
return;
}
- if (pmd_leaf(pmdp_get(pmd_k)))
+ if (pmd_leaf(pmdp_get(pmd_k))) {
+ page_size = PMD_SIZE;
goto flush_tlb;
+ }
/*
* Make sure the actual PTE exists as well to
@@ -180,6 +184,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
no_context(regs, addr);
return;
}
+ page_size = PAGE_SIZE;
/*
* The kernel assumes that TLBs don't cache invalid
@@ -188,7 +193,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
* necessary even after writing invalid entries.
*/
flush_tlb:
- local_flush_tlb_page(addr);
+ local_flush_tlb_page(addr, page_size);
}
static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
@@ -356,7 +356,7 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
else
pte_clear(&init_mm, addr, ptep);
- local_flush_tlb_page(addr);
+ local_flush_tlb_page(addr, PAGE_SIZE);
}
static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
@@ -27,7 +27,7 @@ static void local_flush_tlb_range_threshold_asid(unsigned long start,
}
for (i = 0; i < nr_ptes_in_range; ++i) {
- local_flush_tlb_page_asid(start, asid);
+ local_flush_tlb_page_asid(start, stride, asid);
start += stride;
}
}
@@ -36,7 +36,7 @@ static inline void local_flush_tlb_range_asid(unsigned long start,
unsigned long size, unsigned long stride, unsigned long asid)
{
if (size <= stride)
- local_flush_tlb_page_asid(start, asid);
+ local_flush_tlb_page_asid(start, stride, asid);
else if (size == FLUSH_TLB_MAX_SIZE)
local_flush_tlb_all_asid(asid);
else
@@ -126,14 +126,7 @@ void flush_tlb_mm_range(struct mm_struct *mm,
start, end - start, page_size);
}
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
-{
- __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
- addr, PAGE_SIZE, PAGE_SIZE);
-}
-
-void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
+static inline unsigned long local_flush_tlb_page_size(struct vm_area_struct *vma)
{
unsigned long stride_size;
@@ -161,6 +154,24 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
}
}
+ return stride_size;
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+ unsigned long page_size;
+
+ page_size = local_flush_tlb_page_size(vma);
+ __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+ addr, page_size, page_size);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ unsigned long stride_size;
+
+ stride_size = local_flush_tlb_page_size(vma);
__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
start, end - start, stride_size);
}
When tlb flushing a page correponding to a certain address, CPU actually only flushes tlb entries of the first 4K hardware page. This commit reimplements tlb flushing function to flush all tlb entries of hardware pages in the same software page. Signed-off-by: Xu Lu <luxu.kernel@bytedance.com> --- arch/riscv/include/asm/pgtable.h | 9 ++++++--- arch/riscv/include/asm/tlbflush.h | 26 ++++++++++++++++++++------ arch/riscv/mm/fault.c | 13 +++++++++---- arch/riscv/mm/init.c | 2 +- arch/riscv/mm/tlbflush.c | 31 +++++++++++++++++++++---------- 5 files changed, 57 insertions(+), 24 deletions(-)