Message ID | 20190810014309.20838-1-atish.patra@wdc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | RISC-V: Issue a local tlb flush if possible. | expand |
On Sat, Aug 10, 2019 at 7:13 AM Atish Patra <atish.patra@wdc.com> wrote: > > In RISC-V, tlb flush happens via SBI which is expensive. > If the target cpumask contains a local hartid, some cost > can be saved by issuing a local tlb flush as we do that > in OpenSBI anyways. > > Signed-off-by: Atish Patra <atish.patra@wdc.com> > --- > arch/riscv/include/asm/tlbflush.h | 33 +++++++++++++++++++++++++++---- > 1 file changed, 29 insertions(+), 4 deletions(-) > > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h > index 687dd19735a7..b32ba4fa5888 100644 > --- a/arch/riscv/include/asm/tlbflush.h > +++ b/arch/riscv/include/asm/tlbflush.h > @@ -8,6 +8,7 @@ > #define _ASM_RISCV_TLBFLUSH_H > > #include <linux/mm_types.h> > +#include <linux/sched.h> > #include <asm/smp.h> > > /* > @@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, > unsigned long size) > { > struct cpumask hmask; > + struct cpumask tmask; > + int cpuid = smp_processor_id(); > > cpumask_clear(&hmask); > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > - sbi_remote_sfence_vma(hmask.bits, start, size); > + cpumask_clear(&tmask); > + > + if (cmask) > + cpumask_copy(&tmask, cmask); > + else > + cpumask_copy(&tmask, cpu_online_mask); This can be further simplified. We can totally avoid tmask, cpumask_copy(), and cpumask_clear() by directly updating hmask. In addition to this patch, we should also handle the case of empty hart mask in OpenSBI. > + > + if (cpumask_test_cpu(cpuid, &tmask)) { > + /* Save trap cost by issuing a local tlb flush here */ > + if ((start == 0 && size == -1) || (size > PAGE_SIZE)) > + local_flush_tlb_all(); > + else if (size == PAGE_SIZE) > + local_flush_tlb_page(start); > + cpumask_clear_cpu(cpuid, &tmask); > + } else if (cpumask_empty(&tmask)) { > + /* cpumask is empty. So just do a local flush */ > + local_flush_tlb_all(); > + return; > + } > + > + if (!cpumask_empty(&tmask)) { > + riscv_cpuid_to_hartid_mask(&tmask, &hmask); > + sbi_remote_sfence_vma(hmask.bits, start, size); > + } > } > > -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) > -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) > +#define flush_tlb_all() remote_sfence_vma(NULL, 0, -1) > +#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, (addr) + PAGE_SIZE) > #define flush_tlb_range(vma, start, end) \ > remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) > #define flush_tlb_mm(mm) \ > -- > 2.21.0 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv Regards, Anup
On 8/9/19, 8:30 PM, "Anup Patel" <anup@brainfault.org> wrote: On Sat, Aug 10, 2019 at 7:13 AM Atish Patra <atish.patra@wdc.com> wrote: > > In RISC-V, tlb flush happens via SBI which is expensive. > If the target cpumask contains a local hartid, some cost > can be saved by issuing a local tlb flush as we do that > in OpenSBI anyways. > > Signed-off-by: Atish Patra <atish.patra@wdc.com> > --- > arch/riscv/include/asm/tlbflush.h | 33 +++++++++++++++++++++++++++---- > 1 file changed, 29 insertions(+), 4 deletions(-) > > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h > index 687dd19735a7..b32ba4fa5888 100644 > --- a/arch/riscv/include/asm/tlbflush.h > +++ b/arch/riscv/include/asm/tlbflush.h > @@ -8,6 +8,7 @@ > #define _ASM_RISCV_TLBFLUSH_H > > #include <linux/mm_types.h> > +#include <linux/sched.h> > #include <asm/smp.h> > > /* > @@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, > unsigned long size) > { > struct cpumask hmask; > + struct cpumask tmask; > + int cpuid = smp_processor_id(); > > cpumask_clear(&hmask); > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > - sbi_remote_sfence_vma(hmask.bits, start, size); > + cpumask_clear(&tmask); > + > + if (cmask) > + cpumask_copy(&tmask, cmask); > + else > + cpumask_copy(&tmask, cpu_online_mask); This can be further simplified. We can totally avoid tmask, cpumask_copy(), and cpumask_clear() by directly updating hmask. Ahh yes. Thanks for pointing out. In addition to this patch, we should also handle the case of empty hart mask in OpenSBI. Yes. I have few other fixes as well (around fifo race condition and local flushing in OpenSBI). I will send them soon. Regards, Atish > + > + if (cpumask_test_cpu(cpuid, &tmask)) { > + /* Save trap cost by issuing a local tlb flush here */ > + if ((start == 0 && size == -1) || (size > PAGE_SIZE)) > + local_flush_tlb_all(); > + else if (size == PAGE_SIZE) > + local_flush_tlb_page(start); > + cpumask_clear_cpu(cpuid, &tmask); > + } else if (cpumask_empty(&tmask)) { > + /* cpumask is empty. So just do a local flush */ > + local_flush_tlb_all(); > + return; > + } > + > + if (!cpumask_empty(&tmask)) { > + riscv_cpuid_to_hartid_mask(&tmask, &hmask); > + sbi_remote_sfence_vma(hmask.bits, start, size); > + } > } > > -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) > -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) > +#define flush_tlb_all() remote_sfence_vma(NULL, 0, -1) > +#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, (addr) + PAGE_SIZE) > #define flush_tlb_range(vma, start, end) \ > remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) > #define flush_tlb_mm(mm) \ > -- > 2.21.0 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv Regards, Anup
On Aug 09 2019, Atish Patra <atish.patra@wdc.com> wrote: > @@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, > unsigned long size) > { > struct cpumask hmask; > + struct cpumask tmask; > + int cpuid = smp_processor_id(); > > cpumask_clear(&hmask); > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > - sbi_remote_sfence_vma(hmask.bits, start, size); > + cpumask_clear(&tmask); > + > + if (cmask) > + cpumask_copy(&tmask, cmask); > + else > + cpumask_copy(&tmask, cpu_online_mask); > + > + if (cpumask_test_cpu(cpuid, &tmask)) { > + /* Save trap cost by issuing a local tlb flush here */ > + if ((start == 0 && size == -1) || (size > PAGE_SIZE)) > + local_flush_tlb_all(); > + else if (size == PAGE_SIZE) > + local_flush_tlb_page(start); > + cpumask_clear_cpu(cpuid, &tmask); > + } else if (cpumask_empty(&tmask)) { > + /* cpumask is empty. So just do a local flush */ > + local_flush_tlb_all(); > + return; > + } > + > + if (!cpumask_empty(&tmask)) { > + riscv_cpuid_to_hartid_mask(&tmask, &hmask); > + sbi_remote_sfence_vma(hmask.bits, start, size); > + } > } I think it's becoming too big to be inline. Andreas.
On 8/9/19, 6:43 PM, "Atish Patra" <Atish.Patra@wdc.com> wrote:
In RISC-V, tlb flush happens via SBI which is expensive.
If the target cpumask contains a local hartid, some cost
can be saved by issuing a local tlb flush as we do that
in OpenSBI anyways.
Signed-off-by: Atish Patra <atish.patra@wdc.com>
---
arch/riscv/include/asm/tlbflush.h | 33 +++++++++++++++++++++++++++----
1 file changed, 29 insertions(+), 4 deletions(-)
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 687dd19735a7..b32ba4fa5888 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -8,6 +8,7 @@
#define _ASM_RISCV_TLBFLUSH_H
#include <linux/mm_types.h>
+#include <linux/sched.h>
#include <asm/smp.h>
/*
@@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start,
unsigned long size)
{
struct cpumask hmask;
+ struct cpumask tmask;
+ int cpuid = smp_processor_id();
cpumask_clear(&hmask);
- riscv_cpuid_to_hartid_mask(cmask, &hmask);
- sbi_remote_sfence_vma(hmask.bits, start, size);
+ cpumask_clear(&tmask);
+
+ if (cmask)
+ cpumask_copy(&tmask, cmask);
+ else
+ cpumask_copy(&tmask, cpu_online_mask);
+
+ if (cpumask_test_cpu(cpuid, &tmask)) {
+ /* Save trap cost by issuing a local tlb flush here */
+ if ((start == 0 && size == -1) || (size > PAGE_SIZE))
+ local_flush_tlb_all();
+ else if (size == PAGE_SIZE)
+ local_flush_tlb_page(start);
+ cpumask_clear_cpu(cpuid, &tmask);
+ } else if (cpumask_empty(&tmask)) {
+ /* cpumask is empty. So just do a local flush */
+ local_flush_tlb_all();
+ return;
+ }
+
+ if (!cpumask_empty(&tmask)) {
+ riscv_cpuid_to_hartid_mask(&tmask, &hmask);
+ sbi_remote_sfence_vma(hmask.bits, start, size);
+ }
}
-#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
-#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
+#define flush_tlb_all() remote_sfence_vma(NULL, 0, -1)
+#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, (addr) + PAGE_SIZE)
I did not see Paul's patch to fix this. I will remove this in v2 and rebase on top of Paul's patch.
Regards,
Atish
#define flush_tlb_range(vma, start, end) \
remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
#define flush_tlb_mm(mm) \
--
2.21.0
I agree with the comment that we really should move this out of line now, and also that we can simplify it further, which also includes not bothering with the SBI call if we were the only online CPU. I also thing we need to use get_cpu/put_cpu to be preemption safe. Also why would we need to do a local flush if we have a mask that doesn't include the local CPU? How about something like: void __riscv_flush_tlb(struct cpumask *cpumask, unsigned long start, unsigned long size) { unsigned int cpu; if (!cpumask) cpumask = cpu_online_mask; cpu = get_cpu(); if (!cpumask || cpumask_test_cpu(cpu, cpumask) { if ((start == 0 && size == -1) || size > PAGE_SIZE) local_flush_tlb_all(); else if (size == PAGE_SIZE) local_flush_tlb_page(start); cpumask_clear_cpu(cpuid, cpumask); } if (!cpumask_empty(cpumask)) { struct cpumask hmask; riscv_cpuid_to_hartid_mask(cpumask, &hmask); sbi_remote_sfence_vma(hmask.bits, start, size); } put_cpu(); }
> On Aug 9, 2019, at 8:43 PM, Atish Patra <atish.patra@wdc.com> wrote: > > In RISC-V, tlb flush happens via SBI which is expensive. > If the target cpumask contains a local hartid, some cost > can be saved by issuing a local tlb flush as we do that > in OpenSBI anyways. Is there anything other than convention and current usage that prevents the kernel from natively handling TLB flushes without ever making the SBI call? Someone is eventually going to want to run the linux kernel in machine mode, likely for performance and/or security reasons, and this will require flushing TLBs natively anyway. > > Signed-off-by: Atish Patra <atish.patra@wdc.com> > --- > arch/riscv/include/asm/tlbflush.h | 33 +++++++++++++++++++++++++++---- > 1 file changed, 29 insertions(+), 4 deletions(-) > > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h > index 687dd19735a7..b32ba4fa5888 100644 > --- a/arch/riscv/include/asm/tlbflush.h > +++ b/arch/riscv/include/asm/tlbflush.h > @@ -8,6 +8,7 @@ > #define _ASM_RISCV_TLBFLUSH_H > > #include <linux/mm_types.h> > +#include <linux/sched.h> > #include <asm/smp.h> > > /* > @@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, > unsigned long size) > { > struct cpumask hmask; > + struct cpumask tmask; > + int cpuid = smp_processor_id(); > > cpumask_clear(&hmask); > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > - sbi_remote_sfence_vma(hmask.bits, start, size); > + cpumask_clear(&tmask); > + > + if (cmask) > + cpumask_copy(&tmask, cmask); > + else > + cpumask_copy(&tmask, cpu_online_mask); > + > + if (cpumask_test_cpu(cpuid, &tmask)) { > + /* Save trap cost by issuing a local tlb flush here */ > + if ((start == 0 && size == -1) || (size > PAGE_SIZE)) > + local_flush_tlb_all(); > + else if (size == PAGE_SIZE) > + local_flush_tlb_page(start); > + cpumask_clear_cpu(cpuid, &tmask); > + } else if (cpumask_empty(&tmask)) { > + /* cpumask is empty. So just do a local flush */ > + local_flush_tlb_all(); > + return; > + } > + > + if (!cpumask_empty(&tmask)) { > + riscv_cpuid_to_hartid_mask(&tmask, &hmask); > + sbi_remote_sfence_vma(hmask.bits, start, size); > + } > } > > -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) > -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) > +#define flush_tlb_all() remote_sfence_vma(NULL, 0, -1) > +#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, (addr) + PAGE_SIZE) > #define flush_tlb_range(vma, start, end) \ > remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) > #define flush_tlb_mm(mm) \ > -- > 2.21.0 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Mon, 2019-08-12 at 10:36 -0500, Troy Benjegerdes wrote: > > On Aug 9, 2019, at 8:43 PM, Atish Patra <atish.patra@wdc.com> > > wrote: > > > > In RISC-V, tlb flush happens via SBI which is expensive. > > If the target cpumask contains a local hartid, some cost > > can be saved by issuing a local tlb flush as we do that > > in OpenSBI anyways. > > Is there anything other than convention and current usage that > prevents > the kernel from natively handling TLB flushes without ever making the > SBI > call? > > Someone is eventually going to want to run the linux kernel in > machine mode, > likely for performance and/or security reasons, and this will require > flushing TLBs > natively anyway. > The support is already added by Christoph in nommu series. https://lkml.org/lkml/2019/6/10/935 The idea is to just send IPIs directly in Linux. The same approach is not good in Supervisor mode until we can get rid of IPIs via SBI all together. Otherwise, every tlbflush will be even more expensive as it has to comeback to S mode and then execute sfence.vma. > > > Signed-off-by: Atish Patra <atish.patra@wdc.com> > > --- > > arch/riscv/include/asm/tlbflush.h | 33 +++++++++++++++++++++++++++- > > --- > > 1 file changed, 29 insertions(+), 4 deletions(-) > > > > diff --git a/arch/riscv/include/asm/tlbflush.h > > b/arch/riscv/include/asm/tlbflush.h > > index 687dd19735a7..b32ba4fa5888 100644 > > --- a/arch/riscv/include/asm/tlbflush.h > > +++ b/arch/riscv/include/asm/tlbflush.h > > @@ -8,6 +8,7 @@ > > #define _ASM_RISCV_TLBFLUSH_H > > > > #include <linux/mm_types.h> > > +#include <linux/sched.h> > > #include <asm/smp.h> > > > > /* > > @@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct > > cpumask *cmask, unsigned long start, > > unsigned long size) > > { > > struct cpumask hmask; > > + struct cpumask tmask; > > + int cpuid = smp_processor_id(); > > > > cpumask_clear(&hmask); > > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > > - sbi_remote_sfence_vma(hmask.bits, start, size); > > + cpumask_clear(&tmask); > > + > > + if (cmask) > > + cpumask_copy(&tmask, cmask); > > + else > > + cpumask_copy(&tmask, cpu_online_mask); > > + > > + if (cpumask_test_cpu(cpuid, &tmask)) { > > + /* Save trap cost by issuing a local tlb flush here */ > > + if ((start == 0 && size == -1) || (size > PAGE_SIZE)) > > + local_flush_tlb_all(); > > + else if (size == PAGE_SIZE) > > + local_flush_tlb_page(start); > > + cpumask_clear_cpu(cpuid, &tmask); > > + } else if (cpumask_empty(&tmask)) { > > + /* cpumask is empty. So just do a local flush */ > > + local_flush_tlb_all(); > > + return; > > + } > > + > > + if (!cpumask_empty(&tmask)) { > > + riscv_cpuid_to_hartid_mask(&tmask, &hmask); > > + sbi_remote_sfence_vma(hmask.bits, start, size); > > + } > > } > > > > -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) > > -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) > > +#define flush_tlb_all() remote_sfence_vma(NULL, 0, -1) > > +#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, > > (addr) + PAGE_SIZE) > > #define flush_tlb_range(vma, start, end) \ > > remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - > > (start)) > > #define flush_tlb_mm(mm) \ > > -- > > 2.21.0 > > > > > > _______________________________________________ > > linux-riscv mailing list > > linux-riscv@lists.infradead.org > > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Mon, Aug 12, 2019 at 10:36:25AM -0500, Troy Benjegerdes wrote: > Is there anything other than convention and current usage that prevents > the kernel from natively handling TLB flushes without ever making the SBI > call? Yes and no. In all existing RISC-V implementation remote TLB flushes are simply implementing using IPIs. So you could trivially implement remote TLB flush using IPIs, and in fact Gary Guo posted a series to do that a while ago. But: the RISC privileged spec requires that IPIs are only issued from M-mode and only delivered to M-mode. So what would be a trivial MMIO write plus interupt to wakeup the remote hart actually turns into a dance requiring multiple context switches between privile levels, and without additional optimizations that will be even slower than the current SBI based implementation. I've started a prototype implementation and spec edits to relax this and allow direct IPIs from S-mode to S-mode, which will speed up IPIs by about an order of magnitude, and I hope this will be how future RISC-V implementations work. > Someone is eventually going to want to run the linux kernel in machine mode, > likely for performance and/or security reasons, and this will require flushing TLBs > natively anyway. The nommu ports run in M-mode. But running a MMU-enabled port in M-mode is rather painful if not impossible (trust me, I've tried) due to how the privileged spec says that M-mode generally runs without address translation. There is a workaround using the MPRV bit in mstatus, but even that just uses the address translation for loads and stores, and not for the program text.
On Mon, 2019-08-12 at 07:56 -0700, Christoph Hellwig wrote: > I agree with the comment that we really should move this out of line > now, and sure. > also that we can simplify it further, which also includes > not bothering with the SBI call if we were the only online CPU. I already had that optimization in my patch. > I also thing we need to use get_cpu/put_cpu to be preemption safe. > ok. I will update that. > Also why would we need to do a local flush if we have a mask that > doesn't include the local CPU? > I thought if it recieves an empty cpumask, then it should at least do a local flush is the expected behavior. Are you saying that we should just skip all and return ? > How about something like: > > void __riscv_flush_tlb(struct cpumask *cpumask, unsigned long start, > unsigned long size) > { > unsigned int cpu; > > if (!cpumask) > cpumask = cpu_online_mask; > > cpu = get_cpu(); > if (!cpumask || cpumask_test_cpu(cpu, cpumask) { > if ((start == 0 && size == -1) || size > PAGE_SIZE) > local_flush_tlb_all(); > else if (size == PAGE_SIZE) > local_flush_tlb_page(start); > cpumask_clear_cpu(cpuid, cpumask); This will modify the original cpumask which is not correct. clear part has to be done on hmask to avoid a copy. Regards, Atish > } > > if (!cpumask_empty(cpumask)) { > struct cpumask hmask; > > riscv_cpuid_to_hartid_mask(cpumask, &hmask); > sbi_remote_sfence_vma(hmask.bits, start, size); > } > put_cpu(); > }
On Tue, Aug 13, 2019 at 12:15:15AM +0000, Atish Patra wrote: > I thought if it recieves an empty cpumask, then it should at least do a > local flush is the expected behavior. Are you saying that we should > just skip all and return ? How could we ever receive an empty cpu mask? I think it could only be empty without the current cpu. At least that is my reading of the callers and a few other implementations. > > if (!cpumask || cpumask_test_cpu(cpu, cpumask) { > > if ((start == 0 && size == -1) || size > PAGE_SIZE) > > local_flush_tlb_all(); > > else if (size == PAGE_SIZE) > > local_flush_tlb_page(start); > > cpumask_clear_cpu(cpuid, cpumask); > > This will modify the original cpumask which is not correct. clear part > has to be done on hmask to avoid a copy. Indeed. But looking at the x86 tlbflush implementation it seems like we could use cpumask_any_but() to avoid having to modify the passed in cpumask.
Hi Atish, On Fri, 9 Aug 2019, Atish Patra wrote: > In RISC-V, tlb flush happens via SBI which is expensive. > If the target cpumask contains a local hartid, some cost > can be saved by issuing a local tlb flush as we do that > in OpenSBI anyways. > > Signed-off-by: Atish Patra <atish.patra@wdc.com> A few brief comments/questions beyond the ones that others have mentioned: 1. At some point, some RISC-V systems may implement this SBI call in hardware, rather than in software. Then this might wind up becoming a de-optimization. I don't think there's anything to do about that in code right now, but it might be worth adding a comment, and thinking about how that case might be handled in the platform specification group. 2. If this patch masks or reduces the likelihood of hitting the TLB-related crashes that we're seeing, we probably will want to hold off on merging this one until we're relatively certain that those other problems have been fixed. > --- > arch/riscv/include/asm/tlbflush.h | 33 +++++++++++++++++++++++++++---- > 1 file changed, 29 insertions(+), 4 deletions(-) > > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h > index 687dd19735a7..b32ba4fa5888 100644 > --- a/arch/riscv/include/asm/tlbflush.h > +++ b/arch/riscv/include/asm/tlbflush.h > @@ -8,6 +8,7 @@ > #define _ASM_RISCV_TLBFLUSH_H > > #include <linux/mm_types.h> > +#include <linux/sched.h> > #include <asm/smp.h> > > /* > @@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, > unsigned long size) > { > struct cpumask hmask; > + struct cpumask tmask; > + int cpuid = smp_processor_id(); > > cpumask_clear(&hmask); > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > - sbi_remote_sfence_vma(hmask.bits, start, size); > + cpumask_clear(&tmask); > + > + if (cmask) > + cpumask_copy(&tmask, cmask); > + else > + cpumask_copy(&tmask, cpu_online_mask); > + > + if (cpumask_test_cpu(cpuid, &tmask)) { > + /* Save trap cost by issuing a local tlb flush here */ > + if ((start == 0 && size == -1) || (size > PAGE_SIZE)) > + local_flush_tlb_all(); > + else if (size == PAGE_SIZE) > + local_flush_tlb_page(start); > + cpumask_clear_cpu(cpuid, &tmask); > + } else if (cpumask_empty(&tmask)) { > + /* cpumask is empty. So just do a local flush */ > + local_flush_tlb_all(); > + return; > + } > + > + if (!cpumask_empty(&tmask)) { > + riscv_cpuid_to_hartid_mask(&tmask, &hmask); > + sbi_remote_sfence_vma(hmask.bits, start, size); > + } > } > > -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) > -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) > +#define flush_tlb_all() remote_sfence_vma(NULL, 0, -1) > +#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, (addr) + PAGE_SIZE) > #define flush_tlb_range(vma, start, end) \ > remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) > #define flush_tlb_mm(mm) \ > -- > 2.21.0 > > - Paul
On Tue, 2019-08-13 at 11:25 -0700, Paul Walmsley wrote: > Hi Atish, > > On Fri, 9 Aug 2019, Atish Patra wrote: > > > In RISC-V, tlb flush happens via SBI which is expensive. > > If the target cpumask contains a local hartid, some cost > > can be saved by issuing a local tlb flush as we do that > > in OpenSBI anyways. > > > > Signed-off-by: Atish Patra <atish.patra@wdc.com> > > A few brief comments/questions beyond the ones that others have > mentioned: > > 1. At some point, some RISC-V systems may implement this SBI call in > hardware, rather than in software. Then this might wind up becoming > a > de-optimization. I don't think there's anything to do about that in > code > right now, but it might be worth adding a comment, and thinking about > how > that case might be handled in the platform specification group. I am fine with adding a comment. But I did not understand why this would be deoptimization. I not exactly sure about the syntax of future TLB flush instructions. But if we have a hardware instruction for remote tlb flushing, it should be executed only for the harts other than current hart. Right ? > 2. If this patch masks or reduces the likelihood of hitting the > TLB-related crashes that we're seeing, we probably will want to hold > off > on merging this one until we're relatively certain that those other > problems have been fixed. > Yeah sure. I don't see any stress-ng error but still chasing down the glibc error. Regards, Atish > > > > --- > > arch/riscv/include/asm/tlbflush.h | 33 > > +++++++++++++++++++++++++++---- > > 1 file changed, 29 insertions(+), 4 deletions(-) > > > > diff --git a/arch/riscv/include/asm/tlbflush.h > > b/arch/riscv/include/asm/tlbflush.h > > index 687dd19735a7..b32ba4fa5888 100644 > > --- a/arch/riscv/include/asm/tlbflush.h > > +++ b/arch/riscv/include/asm/tlbflush.h > > @@ -8,6 +8,7 @@ > > #define _ASM_RISCV_TLBFLUSH_H > > > > #include <linux/mm_types.h> > > +#include <linux/sched.h> > > #include <asm/smp.h> > > > > /* > > @@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct > > cpumask *cmask, unsigned long start, > > unsigned long size) > > { > > struct cpumask hmask; > > + struct cpumask tmask; > > + int cpuid = smp_processor_id(); > > > > cpumask_clear(&hmask); > > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > > - sbi_remote_sfence_vma(hmask.bits, start, size); > > + cpumask_clear(&tmask); > > + > > + if (cmask) > > + cpumask_copy(&tmask, cmask); > > + else > > + cpumask_copy(&tmask, cpu_online_mask); > > + > > + if (cpumask_test_cpu(cpuid, &tmask)) { > > + /* Save trap cost by issuing a local tlb flush here */ > > + if ((start == 0 && size == -1) || (size > PAGE_SIZE)) > > + local_flush_tlb_all(); > > + else if (size == PAGE_SIZE) > > + local_flush_tlb_page(start); > > + cpumask_clear_cpu(cpuid, &tmask); > > + } else if (cpumask_empty(&tmask)) { > > + /* cpumask is empty. So just do a local flush */ > > + local_flush_tlb_all(); > > + return; > > + } > > + > > + if (!cpumask_empty(&tmask)) { > > + riscv_cpuid_to_hartid_mask(&tmask, &hmask); > > + sbi_remote_sfence_vma(hmask.bits, start, size); > > + } > > } > > > > -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) > > -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) > > +#define flush_tlb_all() remote_sfence_vma(NULL, 0, -1) > > +#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, > > (addr) + PAGE_SIZE) > > #define flush_tlb_range(vma, start, end) \ > > remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - > > (start)) > > #define flush_tlb_mm(mm) \ > > -- > > 2.21.0 > > > > > > - Paul
On Tue, 2019-08-13 at 07:30 -0700, hch@infradead.org wrote: > On Tue, Aug 13, 2019 at 12:15:15AM +0000, Atish Patra wrote: > > I thought if it recieves an empty cpumask, then it should at least > > do a > > local flush is the expected behavior. Are you saying that we should > > just skip all and return ? > > How could we ever receive an empty cpu mask? I think it could only > be empty without the current cpu. At least that is my reading of > the callers and a few other implementations. > We get ton of them. Here is the stack dump. [ 16.735814] [<ffffffe000035498>] walk_stackframe+0x0/0xa0^M 298436 [ 16.819037] [<ffffffe0000355f8>] show_stack+0x2a/0x34^M 298437 [ 16.899648] [<ffffffe00067b54c>] dump_stack+0x62/0x7c^M 298438 [ 16.977402] [<ffffffe0000ef6f6>] tlb_flush_mmu+0x14a/0x150^M 298439 [ 17.054197] [<ffffffe0000ef7a4>] tlb_finish_mmu+0x42/0x72^M 298440 [ 17.129986] [<ffffffe0000ede7c>] exit_mmap+0x8e/0xfa^M 298441 [ 17.203669] [<ffffffe000037d54>] mmput.part.3+0x1a/0xc4^M 298442 [ 17.275985] [<ffffffe000037e1e>] mmput+0x20/0x28^M 298443 [ 17.345248] [<ffffffe0001143c2>] flush_old_exec+0x418/0x5f8^M 298444 [ 17.415370] [<ffffffe000158408>] load_elf_binary+0x262/0xc84^M 298445 [ 17.483641] [<ffffffe000114614>] search_binary_handler.part.7+0x72/0x172^M 298446 [ 17.552078] [<ffffffe000114bb2>] __do_execve_file+0x40c/0x56a^M 298447 [ 17.617932] [<ffffffe00011503e>] sys_execve+0x26/0x32^M 298448 [ 17.682164] [<ffffffe00003437e>] ret_from_syscall+0x0/0xe^M It looks like it is in the path of clearing the old traces of already run script or program. I am not sure if the cpumask supposed to be empty in this path. Probably we should just issue tlb flush all for all CPUs instead of just the local CPU. > > > if (!cpumask || cpumask_test_cpu(cpu, cpumask) { > > > if ((start == 0 && size == -1) || size > PAGE_SIZE) > > > local_flush_tlb_all(); > > > else if (size == PAGE_SIZE) > > > local_flush_tlb_page(start); > > > cpumask_clear_cpu(cpuid, cpumask); > > > > This will modify the original cpumask which is not correct. clear > > part > > has to be done on hmask to avoid a copy. > > Indeed. But looking at the x86 tlbflush implementation it seems like > we > could use cpumask_any_but() to avoid having to modify the passed in > cpumask. Looking at the x86 code, it uses cpumask_any_but to just test if there any other cpu present apart from the current one. If yes, it calls smp_call_function_many which ignores the current cpu and execute tlb flush code on all other cpus. For RISC-V, it has to still send the cpumask containing local cpu and M-mode software may do a local tlb flush the tlbs again for no reason. Regards, Atish
On Thu, Aug 15, 2019 at 08:37:04PM +0000, Atish Patra wrote:
> We get ton of them. Here is the stack dump.
Looks like we might not need to flush anything at all here as the
mm_struct was never scheduled to run on any cpu?
On Mon, Aug 19, 2019 at 8:16 PM hch@infradead.org <hch@infradead.org> wrote: > > On Thu, Aug 15, 2019 at 08:37:04PM +0000, Atish Patra wrote: > > We get ton of them. Here is the stack dump. > > Looks like we might not need to flush anything at all here as the > mm_struct was never scheduled to run on any cpu? If we were using ASID then yes we don't need to flush anything but currently we don't use ASID due to lack of HW support and HW can certainly do speculatively page table walks so flushing local TLB when MM mask is empty might help. This just my theory and we need to stress test more. Regards, Anup
On Mon, Aug 19, 2019 at 08:39:02PM +0530, Anup Patel wrote: > If we were using ASID then yes we don't need to flush anything > but currently we don't use ASID due to lack of HW support and > HW can certainly do speculatively page table walks so flushing > local TLB when MM mask is empty might help. > > This just my theory and we need to stress test more. Well, when we context switch away from a mm we always flush the local tlb. So either the mm_struct has never been scheduled in, or we alrady did a local_tlb_flush and we context switched it up.
On Mon, 2019-08-19 at 08:10 -0700, hch@infradead.org wrote: > On Mon, Aug 19, 2019 at 08:39:02PM +0530, Anup Patel wrote: > > If we were using ASID then yes we don't need to flush anything > > but currently we don't use ASID due to lack of HW support and > > HW can certainly do speculatively page table walks so flushing > > local TLB when MM mask is empty might help. > > > > This just my theory and we need to stress test more. > > Well, when we context switch away from a mm we always flush the > local tlb. So either the mm_struct has never been scheduled in, Looking at the stack dump, it looks like this is the case. cpumask is empty possibly after a fork/exec situation where forked child is being replaced with actual program that is about to run. I also looked at x86 & powerpc implementation which doesn't seem to do anything special if cpumask is empty. I will send a v2 with no tlb flushing if cpumask is empty. > or we alrady did a local_tlb_flush and we context switched it up. Regards, Atish
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 687dd19735a7..b32ba4fa5888 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -8,6 +8,7 @@ #define _ASM_RISCV_TLBFLUSH_H #include <linux/mm_types.h> +#include <linux/sched.h> #include <asm/smp.h> /* @@ -46,14 +47,38 @@ static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, unsigned long size) { struct cpumask hmask; + struct cpumask tmask; + int cpuid = smp_processor_id(); cpumask_clear(&hmask); - riscv_cpuid_to_hartid_mask(cmask, &hmask); - sbi_remote_sfence_vma(hmask.bits, start, size); + cpumask_clear(&tmask); + + if (cmask) + cpumask_copy(&tmask, cmask); + else + cpumask_copy(&tmask, cpu_online_mask); + + if (cpumask_test_cpu(cpuid, &tmask)) { + /* Save trap cost by issuing a local tlb flush here */ + if ((start == 0 && size == -1) || (size > PAGE_SIZE)) + local_flush_tlb_all(); + else if (size == PAGE_SIZE) + local_flush_tlb_page(start); + cpumask_clear_cpu(cpuid, &tmask); + } else if (cpumask_empty(&tmask)) { + /* cpumask is empty. So just do a local flush */ + local_flush_tlb_all(); + return; + } + + if (!cpumask_empty(&tmask)) { + riscv_cpuid_to_hartid_mask(&tmask, &hmask); + sbi_remote_sfence_vma(hmask.bits, start, size); + } } -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) +#define flush_tlb_all() remote_sfence_vma(NULL, 0, -1) +#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, (addr) + PAGE_SIZE) #define flush_tlb_range(vma, start, end) \ remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) #define flush_tlb_mm(mm) \
In RISC-V, tlb flush happens via SBI which is expensive. If the target cpumask contains a local hartid, some cost can be saved by issuing a local tlb flush as we do that in OpenSBI anyways. Signed-off-by: Atish Patra <atish.patra@wdc.com> --- arch/riscv/include/asm/tlbflush.h | 33 +++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-)