Message ID | 20221013163551.6775-5-palmer@rivosinc.com (mailing list archive) |
---|---|
State | Changes Requested, archived |
Headers | show |
Series | RISC-V Hardware Probing User Interface | expand |
Am Donnerstag, 13. Oktober 2022, 18:35:50 CET schrieb Palmer Dabbelt: > This allows userspace to select various routines to use based on the > performance of misaligned access on the target hardware. > > Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> [...] > diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h > index cbda062de9bd..54bdcf9a5049 100644 > --- a/arch/riscv/include/asm/cpufeature.h > +++ b/arch/riscv/include/asm/cpufeature.h > @@ -18,4 +18,6 @@ struct riscv_cpuinfo { > > DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); > > +DECLARE_PER_CPU(long, misaligned_access_speed); just my 2ct ... wouldn't it make sense to have struct riscv_cpuinfo as the central instance for all cpu-related stuff, so misaligned_access_speed could also be part of it? > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index 553d755483ed..1599e40cd170 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -222,6 +226,22 @@ void __init riscv_fill_hwcap(void) > bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); > else > bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); > + > + /* > + * Check for the performance of misaligned accesses. > + */ > + cpu = hartid_to_cpuid_map(hartid); > + if (cpu < 0) > + continue; > + > + if (of_property_read_string(node, "riscv,misaligned-access-performance", &misaligned)) { I think this wants a "!" in front :-) . of_property_read_string() returns 0 on success, so running this results in a nullptr right now. > + if (strcmp(misaligned, "emulated") == 0) > + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_EMULATED; > + if (strcmp(misaligned, "slow") == 0) > + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_SLOW; > + if (strcmp(misaligned, "fast") == 0) > + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST; > + } > } > > /* We don't support systems with F but without D, so mask those out Heiko
On Tue, 29 Nov 2022 13:09:49 PST (-0800), heiko@sntech.de wrote: > Am Donnerstag, 13. Oktober 2022, 18:35:50 CET schrieb Palmer Dabbelt: >> This allows userspace to select various routines to use based on the >> performance of misaligned access on the target hardware. >> >> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> > > [...] > >> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h >> index cbda062de9bd..54bdcf9a5049 100644 >> --- a/arch/riscv/include/asm/cpufeature.h >> +++ b/arch/riscv/include/asm/cpufeature.h >> @@ -18,4 +18,6 @@ struct riscv_cpuinfo { >> >> DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); >> >> +DECLARE_PER_CPU(long, misaligned_access_speed); > > just my 2ct ... wouldn't it make sense to have struct riscv_cpuinfo > as the central instance for all cpu-related stuff, so > misaligned_access_speed could also be part of it? I remember going through this one a few times and ending up here despite some cleaner-looking ways of doing it. That way does look cleaner, though, so I'll give it a shot and we'll see what happens... >> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c >> index 553d755483ed..1599e40cd170 100644 >> --- a/arch/riscv/kernel/cpufeature.c >> +++ b/arch/riscv/kernel/cpufeature.c >> @@ -222,6 +226,22 @@ void __init riscv_fill_hwcap(void) >> bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); >> else >> bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); >> + >> + /* >> + * Check for the performance of misaligned accesses. >> + */ >> + cpu = hartid_to_cpuid_map(hartid); >> + if (cpu < 0) >> + continue; >> + >> + if (of_property_read_string(node, "riscv,misaligned-access-performance", &misaligned)) { > > I think this wants a "!" in front :-) . > > of_property_read_string() returns 0 on success, so running this > results in a nullptr right now. Thanks. I'd not gotten around to actually running this so I bet it's broken in a bunch of ways. Did you try it out? I was really hoping to find some time to get at least the simple stuff in for this cycle, but too many things keep coming up. > >> + if (strcmp(misaligned, "emulated") == 0) >> + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_EMULATED; >> + if (strcmp(misaligned, "slow") == 0) >> + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_SLOW; >> + if (strcmp(misaligned, "fast") == 0) >> + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST; >> + } >> } >> >> /* We don't support systems with F but without D, so mask those out > > > Heiko
Am Dienstag, 29. November 2022, 22:18:18 CET schrieb Palmer Dabbelt: > On Tue, 29 Nov 2022 13:09:49 PST (-0800), heiko@sntech.de wrote: > > Am Donnerstag, 13. Oktober 2022, 18:35:50 CET schrieb Palmer Dabbelt: > >> This allows userspace to select various routines to use based on the > >> performance of misaligned access on the target hardware. > >> > >> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> > > > > [...] > > > >> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h > >> index cbda062de9bd..54bdcf9a5049 100644 > >> --- a/arch/riscv/include/asm/cpufeature.h > >> +++ b/arch/riscv/include/asm/cpufeature.h > >> @@ -18,4 +18,6 @@ struct riscv_cpuinfo { > >> > >> DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); > >> > >> +DECLARE_PER_CPU(long, misaligned_access_speed); > > > > just my 2ct ... wouldn't it make sense to have struct riscv_cpuinfo > > as the central instance for all cpu-related stuff, so > > misaligned_access_speed could also be part of it? > > I remember going through this one a few times and ending up here despite > some cleaner-looking ways of doing it. That way does look cleaner, > though, so I'll give it a shot and we'll see what happens... > > >> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > >> index 553d755483ed..1599e40cd170 100644 > >> --- a/arch/riscv/kernel/cpufeature.c > >> +++ b/arch/riscv/kernel/cpufeature.c > >> @@ -222,6 +226,22 @@ void __init riscv_fill_hwcap(void) > >> bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); > >> else > >> bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); > >> + > >> + /* > >> + * Check for the performance of misaligned accesses. > >> + */ > >> + cpu = hartid_to_cpuid_map(hartid); > >> + if (cpu < 0) > >> + continue; > >> + > >> + if (of_property_read_string(node, "riscv,misaligned-access-performance", &misaligned)) { > > > > I think this wants a "!" in front :-) . > > > > of_property_read_string() returns 0 on success, so running this > > results in a nullptr right now. > > Thanks. I'd not gotten around to actually running this so I bet it's > broken in a bunch of ways. Did you try it out? I was really hoping to > find some time to get at least the simple stuff in for this cycle, but > too many things keep coming up. I ran the code today, so bumped into the "!" issue. I'm not yet "using" it, but that will come in the next days. And no worries about that time-thingy, your series already provides a nice pointer for the general direction to with things, so that is helpful in its own right. I will simply report any brokeness I find when it happens ;-) . Heiko
On Tue, 29 Nov 2022 14:10:02 PST (-0800), heiko@sntech.de wrote: > Am Dienstag, 29. November 2022, 22:18:18 CET schrieb Palmer Dabbelt: >> On Tue, 29 Nov 2022 13:09:49 PST (-0800), heiko@sntech.de wrote: >> > Am Donnerstag, 13. Oktober 2022, 18:35:50 CET schrieb Palmer Dabbelt: >> >> This allows userspace to select various routines to use based on the >> >> performance of misaligned access on the target hardware. >> >> >> >> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> >> > >> > [...] >> > >> >> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h >> >> index cbda062de9bd..54bdcf9a5049 100644 >> >> --- a/arch/riscv/include/asm/cpufeature.h >> >> +++ b/arch/riscv/include/asm/cpufeature.h >> >> @@ -18,4 +18,6 @@ struct riscv_cpuinfo { >> >> >> >> DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); >> >> >> >> +DECLARE_PER_CPU(long, misaligned_access_speed); >> > >> > just my 2ct ... wouldn't it make sense to have struct riscv_cpuinfo >> > as the central instance for all cpu-related stuff, so >> > misaligned_access_speed could also be part of it? >> >> I remember going through this one a few times and ending up here despite >> some cleaner-looking ways of doing it. That way does look cleaner, >> though, so I'll give it a shot and we'll see what happens... >> >> >> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c >> >> index 553d755483ed..1599e40cd170 100644 >> >> --- a/arch/riscv/kernel/cpufeature.c >> >> +++ b/arch/riscv/kernel/cpufeature.c >> >> @@ -222,6 +226,22 @@ void __init riscv_fill_hwcap(void) >> >> bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); >> >> else >> >> bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); >> >> + >> >> + /* >> >> + * Check for the performance of misaligned accesses. >> >> + */ >> >> + cpu = hartid_to_cpuid_map(hartid); >> >> + if (cpu < 0) >> >> + continue; >> >> + >> >> + if (of_property_read_string(node, "riscv,misaligned-access-performance", &misaligned)) { >> > >> > I think this wants a "!" in front :-) . >> > >> > of_property_read_string() returns 0 on success, so running this >> > results in a nullptr right now. >> >> Thanks. I'd not gotten around to actually running this so I bet it's >> broken in a bunch of ways. Did you try it out? I was really hoping to >> find some time to get at least the simple stuff in for this cycle, but >> too many things keep coming up. > > I ran the code today, so bumped into the "!" issue. > I'm not yet "using" it, but that will come in the next days. > > And no worries about that time-thingy, your series already provides > a nice pointer for the general direction to with things, so that is helpful > in its own right. I'm still worried about the release, as it's the last one before the glibc release and we'll need hooks there too. > I will simply report any brokeness I find when it happens ;-) . Awesome, thanks! > > > Heiko
diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst index b182fcf4cd30..f8aa9137d8aa 100644 --- a/Documentation/riscv/hwprobe.rst +++ b/Documentation/riscv/hwprobe.rst @@ -47,3 +47,16 @@ The following keys are defined: not minNum/maxNum") of the RISC-V ISA manual. * :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by version 2.2 of the RISC-V ISA manual. +* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information + about the selected set of processors. + * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned + accesses is unknown. + * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via + software, either in or below the kernel. These accesses are always + extremely slow. + * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in + hardware, but are slower than the cooresponding aligned accesses + sequences. + * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in + hardware and are faster than the cooresponding aligned accesses + sequences. diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index cbda062de9bd..54bdcf9a5049 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -18,4 +18,6 @@ struct riscv_cpuinfo { DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); +DECLARE_PER_CPU(long, misaligned_access_speed); + #endif diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 7e52f1e1fe10..4e45e33015bc 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,6 +8,6 @@ #include <uapi/asm/hwprobe.h> -#define RISCV_HWPROBE_MAX_KEY 4 +#define RISCV_HWPROBE_MAX_KEY 5 #endif diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index d3443be7eedc..5d98836cc446 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -26,6 +26,13 @@ struct riscv_ipi_ops { */ extern unsigned long __cpuid_to_hartid_map[NR_CPUS]; #define cpuid_to_hartid_map(cpu) __cpuid_to_hartid_map[cpu] +static inline long hartid_to_cpuid_map(unsigned long hartid) { + long i; + for (i = 0; i < NR_CPUS; ++i) + if (cpuid_to_hartid_map(i) == hartid) + return i; + return -1; +} /* print IPI stats */ void show_ipi_stats(struct seq_file *p, int prec); diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 07309376c6dc..e36b72f4e1af 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -24,8 +24,11 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 #define RISCV_HWPROBE_IMA_FD (1 << 0) #define RISCV_HWPROBE_IMA_C (1 << 1) +#define RISCV_HWPROBE_KEY_CPUPERF_0 5 +#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) +#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) +#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0) +#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0) /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ - - #endif diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 553d755483ed..1599e40cd170 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -15,6 +15,7 @@ #include <asm/cacheflush.h> #include <asm/errata_list.h> #include <asm/hwcap.h> +#include <asm/hwprobe.h> #include <asm/patch.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -31,6 +32,9 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; __ro_after_init DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX); EXPORT_SYMBOL(riscv_isa_ext_keys); +/* Performance information */ +DEFINE_PER_CPU(long, misaligned_access_speed); + /** * riscv_isa_extension_base() - Get base extension word * @@ -71,11 +75,11 @@ EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); void __init riscv_fill_hwcap(void) { struct device_node *node; - const char *isa; + const char *isa, *misaligned; char print_str[NUM_ALPHA_EXTS + 1]; int i, j, rc; static unsigned long isa2hwcap[256] = {0}; - unsigned long hartid; + unsigned long hartid, cpu; isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I; isa2hwcap['m'] = isa2hwcap['M'] = COMPAT_HWCAP_ISA_M; @@ -222,6 +226,22 @@ void __init riscv_fill_hwcap(void) bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); else bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); + + /* + * Check for the performance of misaligned accesses. + */ + cpu = hartid_to_cpuid_map(hartid); + if (cpu < 0) + continue; + + if (of_property_read_string(node, "riscv,misaligned-access-performance", &misaligned)) { + if (strcmp(misaligned, "emulated") == 0) + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_EMULATED; + if (strcmp(misaligned, "slow") == 0) + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_SLOW; + if (strcmp(misaligned, "fast") == 0) + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST; + } } /* We don't support systems with F but without D, so mask those out diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 337e3f2a17c8..fc9e8cd1589b 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -139,6 +139,24 @@ static long hwprobe_mid(struct riscv_hwprobe __user *pair, size_t key, return set_hwprobe(pair, key, id); } +static long hwprobe_misaligned(cpumask_t *cpus) +{ + long cpu, perf = -1; + + for_each_cpu(cpu, cpus) { + long this_perf = per_cpu(misaligned_access_speed, cpu); + if (perf == -1) + perf = this_perf; + + if (perf != this_perf) + perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + } + + if (perf == -1) + return RISCV_HWPROBE_MISALIGNED_UNKNOWN; + return perf; +} + static long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count, long key_offset, long cpu_count, @@ -207,6 +225,10 @@ long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count, ret = set_hwprobe(pairs + out, k, val); } break; + + case RISCV_HWPROBE_KEY_CPUPERF_0: + set_hwprobe(pairs + out, k, hwprobe_misaligned(&cpus)); + break; } if (ret < 0)
This allows userspace to select various routines to use based on the performance of misaligned access on the target hardware. Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com> --- Documentation/riscv/hwprobe.rst | 13 +++++++++++++ arch/riscv/include/asm/cpufeature.h | 2 ++ arch/riscv/include/asm/hwprobe.h | 2 +- arch/riscv/include/asm/smp.h | 7 +++++++ arch/riscv/include/uapi/asm/hwprobe.h | 7 +++++-- arch/riscv/kernel/cpufeature.c | 24 ++++++++++++++++++++++-- arch/riscv/kernel/sys_riscv.c | 22 ++++++++++++++++++++++ 7 files changed, 72 insertions(+), 5 deletions(-)