diff mbox series

[v1,4/5] RISC-V: hwprobe: Support probing of misaligned accesss performance

Message ID 20221013163551.6775-5-palmer@rivosinc.com (mailing list archive)
State Changes Requested, archived
Headers show
Series RISC-V Hardware Probing User Interface | expand

Commit Message

Palmer Dabbelt Oct. 13, 2022, 4:35 p.m. UTC
This allows userspace to select various routines to use based on the
performance of misaligned access on the target hardware.

Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 Documentation/riscv/hwprobe.rst       | 13 +++++++++++++
 arch/riscv/include/asm/cpufeature.h   |  2 ++
 arch/riscv/include/asm/hwprobe.h      |  2 +-
 arch/riscv/include/asm/smp.h          |  7 +++++++
 arch/riscv/include/uapi/asm/hwprobe.h |  7 +++++--
 arch/riscv/kernel/cpufeature.c        | 24 ++++++++++++++++++++++--
 arch/riscv/kernel/sys_riscv.c         | 22 ++++++++++++++++++++++
 7 files changed, 72 insertions(+), 5 deletions(-)

Comments

Heiko Stübner Nov. 29, 2022, 9:09 p.m. UTC | #1
Am Donnerstag, 13. Oktober 2022, 18:35:50 CET schrieb Palmer Dabbelt:
> This allows userspace to select various routines to use based on the
> performance of misaligned access on the target hardware.
> 
> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

[...]

> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> index cbda062de9bd..54bdcf9a5049 100644
> --- a/arch/riscv/include/asm/cpufeature.h
> +++ b/arch/riscv/include/asm/cpufeature.h
> @@ -18,4 +18,6 @@ struct riscv_cpuinfo {
>  
>  DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
>  
> +DECLARE_PER_CPU(long, misaligned_access_speed);

just my 2ct ... wouldn't it make sense to have struct riscv_cpuinfo
as the central instance for all cpu-related stuff, so
misaligned_access_speed could also be part of it?


> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 553d755483ed..1599e40cd170 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -222,6 +226,22 @@ void __init riscv_fill_hwcap(void)
>  			bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
>  		else
>  			bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
> +
> +		/*
> +		 * Check for the performance of misaligned accesses.
> +		 */
> +		cpu = hartid_to_cpuid_map(hartid);
> +		if (cpu < 0)
> +			continue;
> +
> +		if (of_property_read_string(node, "riscv,misaligned-access-performance", &misaligned)) {

I think this wants a "!" in front :-) .

of_property_read_string() returns 0 on success, so running this
results in a nullptr right now.

> +			if (strcmp(misaligned, "emulated") == 0)
> +				per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_EMULATED;
> +			if (strcmp(misaligned, "slow") == 0)
> +				per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_SLOW;
> +			if (strcmp(misaligned, "fast") == 0)
> +				per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
> +		}
>  	}
>  
>  	/* We don't support systems with F but without D, so mask those out


Heiko
Palmer Dabbelt Nov. 29, 2022, 9:18 p.m. UTC | #2
On Tue, 29 Nov 2022 13:09:49 PST (-0800), heiko@sntech.de wrote:
> Am Donnerstag, 13. Oktober 2022, 18:35:50 CET schrieb Palmer Dabbelt:
>> This allows userspace to select various routines to use based on the
>> performance of misaligned access on the target hardware.
>>
>> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
>
> [...]
>
>> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
>> index cbda062de9bd..54bdcf9a5049 100644
>> --- a/arch/riscv/include/asm/cpufeature.h
>> +++ b/arch/riscv/include/asm/cpufeature.h
>> @@ -18,4 +18,6 @@ struct riscv_cpuinfo {
>>
>>  DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
>>
>> +DECLARE_PER_CPU(long, misaligned_access_speed);
>
> just my 2ct ... wouldn't it make sense to have struct riscv_cpuinfo
> as the central instance for all cpu-related stuff, so
> misaligned_access_speed could also be part of it?

I remember going through this one a few times and ending up here despite 
some cleaner-looking ways of doing it.  That way does look cleaner, 
though, so I'll give it a shot and we'll see what happens...

>> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
>> index 553d755483ed..1599e40cd170 100644
>> --- a/arch/riscv/kernel/cpufeature.c
>> +++ b/arch/riscv/kernel/cpufeature.c
>> @@ -222,6 +226,22 @@ void __init riscv_fill_hwcap(void)
>>  			bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
>>  		else
>>  			bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
>> +
>> +		/*
>> +		 * Check for the performance of misaligned accesses.
>> +		 */
>> +		cpu = hartid_to_cpuid_map(hartid);
>> +		if (cpu < 0)
>> +			continue;
>> +
>> +		if (of_property_read_string(node, "riscv,misaligned-access-performance", &misaligned)) {
>
> I think this wants a "!" in front :-) .
>
> of_property_read_string() returns 0 on success, so running this
> results in a nullptr right now.

Thanks.  I'd not gotten around to actually running this so I bet it's 
broken in a bunch of ways.  Did you try it out?  I was really hoping to 
find some time to get at least the simple stuff in for this cycle, but 
too many things keep coming up.

>
>> +			if (strcmp(misaligned, "emulated") == 0)
>> +				per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_EMULATED;
>> +			if (strcmp(misaligned, "slow") == 0)
>> +				per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_SLOW;
>> +			if (strcmp(misaligned, "fast") == 0)
>> +				per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
>> +		}
>>  	}
>>
>>  	/* We don't support systems with F but without D, so mask those out
>
>
> Heiko
Heiko Stübner Nov. 29, 2022, 10:10 p.m. UTC | #3
Am Dienstag, 29. November 2022, 22:18:18 CET schrieb Palmer Dabbelt:
> On Tue, 29 Nov 2022 13:09:49 PST (-0800), heiko@sntech.de wrote:
> > Am Donnerstag, 13. Oktober 2022, 18:35:50 CET schrieb Palmer Dabbelt:
> >> This allows userspace to select various routines to use based on the
> >> performance of misaligned access on the target hardware.
> >>
> >> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
> >
> > [...]
> >
> >> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> >> index cbda062de9bd..54bdcf9a5049 100644
> >> --- a/arch/riscv/include/asm/cpufeature.h
> >> +++ b/arch/riscv/include/asm/cpufeature.h
> >> @@ -18,4 +18,6 @@ struct riscv_cpuinfo {
> >>
> >>  DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
> >>
> >> +DECLARE_PER_CPU(long, misaligned_access_speed);
> >
> > just my 2ct ... wouldn't it make sense to have struct riscv_cpuinfo
> > as the central instance for all cpu-related stuff, so
> > misaligned_access_speed could also be part of it?
> 
> I remember going through this one a few times and ending up here despite 
> some cleaner-looking ways of doing it.  That way does look cleaner, 
> though, so I'll give it a shot and we'll see what happens...
> 
> >> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> >> index 553d755483ed..1599e40cd170 100644
> >> --- a/arch/riscv/kernel/cpufeature.c
> >> +++ b/arch/riscv/kernel/cpufeature.c
> >> @@ -222,6 +226,22 @@ void __init riscv_fill_hwcap(void)
> >>  			bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
> >>  		else
> >>  			bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
> >> +
> >> +		/*
> >> +		 * Check for the performance of misaligned accesses.
> >> +		 */
> >> +		cpu = hartid_to_cpuid_map(hartid);
> >> +		if (cpu < 0)
> >> +			continue;
> >> +
> >> +		if (of_property_read_string(node, "riscv,misaligned-access-performance", &misaligned)) {
> >
> > I think this wants a "!" in front :-) .
> >
> > of_property_read_string() returns 0 on success, so running this
> > results in a nullptr right now.
> 
> Thanks.  I'd not gotten around to actually running this so I bet it's 
> broken in a bunch of ways.  Did you try it out?  I was really hoping to 
> find some time to get at least the simple stuff in for this cycle, but 
> too many things keep coming up.

I ran the code today, so bumped into the "!" issue.
I'm not yet "using" it, but that will come in the next days.

And no worries about that time-thingy, your series already provides
a nice pointer for the general direction to with things, so that is helpful
in its own right.

I will simply report any brokeness I find when it happens ;-) .


Heiko
Palmer Dabbelt Nov. 29, 2022, 10:44 p.m. UTC | #4
On Tue, 29 Nov 2022 14:10:02 PST (-0800), heiko@sntech.de wrote:
> Am Dienstag, 29. November 2022, 22:18:18 CET schrieb Palmer Dabbelt:
>> On Tue, 29 Nov 2022 13:09:49 PST (-0800), heiko@sntech.de wrote:
>> > Am Donnerstag, 13. Oktober 2022, 18:35:50 CET schrieb Palmer Dabbelt:
>> >> This allows userspace to select various routines to use based on the
>> >> performance of misaligned access on the target hardware.
>> >>
>> >> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
>> >
>> > [...]
>> >
>> >> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
>> >> index cbda062de9bd..54bdcf9a5049 100644
>> >> --- a/arch/riscv/include/asm/cpufeature.h
>> >> +++ b/arch/riscv/include/asm/cpufeature.h
>> >> @@ -18,4 +18,6 @@ struct riscv_cpuinfo {
>> >>
>> >>  DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
>> >>
>> >> +DECLARE_PER_CPU(long, misaligned_access_speed);
>> >
>> > just my 2ct ... wouldn't it make sense to have struct riscv_cpuinfo
>> > as the central instance for all cpu-related stuff, so
>> > misaligned_access_speed could also be part of it?
>>
>> I remember going through this one a few times and ending up here despite
>> some cleaner-looking ways of doing it.  That way does look cleaner,
>> though, so I'll give it a shot and we'll see what happens...
>>
>> >> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
>> >> index 553d755483ed..1599e40cd170 100644
>> >> --- a/arch/riscv/kernel/cpufeature.c
>> >> +++ b/arch/riscv/kernel/cpufeature.c
>> >> @@ -222,6 +226,22 @@ void __init riscv_fill_hwcap(void)
>> >>  			bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
>> >>  		else
>> >>  			bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
>> >> +
>> >> +		/*
>> >> +		 * Check for the performance of misaligned accesses.
>> >> +		 */
>> >> +		cpu = hartid_to_cpuid_map(hartid);
>> >> +		if (cpu < 0)
>> >> +			continue;
>> >> +
>> >> +		if (of_property_read_string(node, "riscv,misaligned-access-performance", &misaligned)) {
>> >
>> > I think this wants a "!" in front :-) .
>> >
>> > of_property_read_string() returns 0 on success, so running this
>> > results in a nullptr right now.
>>
>> Thanks.  I'd not gotten around to actually running this so I bet it's
>> broken in a bunch of ways.  Did you try it out?  I was really hoping to
>> find some time to get at least the simple stuff in for this cycle, but
>> too many things keep coming up.
>
> I ran the code today, so bumped into the "!" issue.
> I'm not yet "using" it, but that will come in the next days.
>
> And no worries about that time-thingy, your series already provides
> a nice pointer for the general direction to with things, so that is helpful
> in its own right.

I'm still worried about the release, as it's the last one before the 
glibc release and we'll need hooks there too.

> I will simply report any brokeness I find when it happens ;-) .

Awesome, thanks!

>
>
> Heiko
diff mbox series

Patch

diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
index b182fcf4cd30..f8aa9137d8aa 100644
--- a/Documentation/riscv/hwprobe.rst
+++ b/Documentation/riscv/hwprobe.rst
@@ -47,3 +47,16 @@  The following keys are defined:
       not minNum/maxNum") of the RISC-V ISA manual.
     * :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by
       version 2.2 of the RISC-V ISA manual.
+* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information
+  about the selected set of processors.
+    * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned
+      accesses is unknown.
+    * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via
+      software, either in or below the kernel.  These accesses are always
+      extremely slow.
+    * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in
+      hardware, but are slower than the cooresponding aligned accesses
+      sequences.
+    * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in
+      hardware and are faster than the cooresponding aligned accesses
+      sequences.
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index cbda062de9bd..54bdcf9a5049 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -18,4 +18,6 @@  struct riscv_cpuinfo {
 
 DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
 
+DECLARE_PER_CPU(long, misaligned_access_speed);
+
 #endif
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 7e52f1e1fe10..4e45e33015bc 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -8,6 +8,6 @@ 
 
 #include <uapi/asm/hwprobe.h>
 
-#define RISCV_HWPROBE_MAX_KEY 4
+#define RISCV_HWPROBE_MAX_KEY 5
 
 #endif
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index d3443be7eedc..5d98836cc446 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -26,6 +26,13 @@  struct riscv_ipi_ops {
  */
 extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
 #define cpuid_to_hartid_map(cpu)    __cpuid_to_hartid_map[cpu]
+static inline long hartid_to_cpuid_map(unsigned long hartid) {
+	long i;
+	for (i = 0; i < NR_CPUS; ++i)
+		if (cpuid_to_hartid_map(i) == hartid)
+			return i;
+	return -1;
+}
 
 /* print IPI stats */
 void show_ipi_stats(struct seq_file *p, int prec);
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index 07309376c6dc..e36b72f4e1af 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -24,8 +24,11 @@  struct riscv_hwprobe {
 #define RISCV_HWPROBE_KEY_IMA_EXT_0	4
 #define		RISCV_HWPROBE_IMA_FD		(1 << 0)
 #define		RISCV_HWPROBE_IMA_C		(1 << 1)
+#define RISCV_HWPROBE_KEY_CPUPERF_0	5
+#define		RISCV_HWPROBE_MISALIGNED_UNKNOWN	(0 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_EMULATED	(1 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_SLOW		(2 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_FAST		(3 << 0)
 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
 
-
-
 #endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 553d755483ed..1599e40cd170 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -15,6 +15,7 @@ 
 #include <asm/cacheflush.h>
 #include <asm/errata_list.h>
 #include <asm/hwcap.h>
+#include <asm/hwprobe.h>
 #include <asm/patch.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -31,6 +32,9 @@  static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
 __ro_after_init DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX);
 EXPORT_SYMBOL(riscv_isa_ext_keys);
 
+/* Performance information */
+DEFINE_PER_CPU(long, misaligned_access_speed);
+
 /**
  * riscv_isa_extension_base() - Get base extension word
  *
@@ -71,11 +75,11 @@  EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
 void __init riscv_fill_hwcap(void)
 {
 	struct device_node *node;
-	const char *isa;
+	const char *isa, *misaligned;
 	char print_str[NUM_ALPHA_EXTS + 1];
 	int i, j, rc;
 	static unsigned long isa2hwcap[256] = {0};
-	unsigned long hartid;
+	unsigned long hartid, cpu;
 
 	isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
 	isa2hwcap['m'] = isa2hwcap['M'] = COMPAT_HWCAP_ISA_M;
@@ -222,6 +226,22 @@  void __init riscv_fill_hwcap(void)
 			bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
 		else
 			bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
+
+		/*
+		 * Check for the performance of misaligned accesses.
+		 */
+		cpu = hartid_to_cpuid_map(hartid);
+		if (cpu < 0)
+			continue;
+
+		if (of_property_read_string(node, "riscv,misaligned-access-performance", &misaligned)) {
+			if (strcmp(misaligned, "emulated") == 0)
+				per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_EMULATED;
+			if (strcmp(misaligned, "slow") == 0)
+				per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_SLOW;
+			if (strcmp(misaligned, "fast") == 0)
+				per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
+		}
 	}
 
 	/* We don't support systems with F but without D, so mask those out
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 337e3f2a17c8..fc9e8cd1589b 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -139,6 +139,24 @@  static long hwprobe_mid(struct riscv_hwprobe __user *pair, size_t key,
 	return set_hwprobe(pair, key, id);
 }
 
+static long hwprobe_misaligned(cpumask_t *cpus)
+{
+	long cpu, perf = -1;
+
+	for_each_cpu(cpu, cpus) {
+		long this_perf = per_cpu(misaligned_access_speed, cpu);
+		if (perf == -1)
+			perf = this_perf;
+
+		if (perf != this_perf)
+			perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+	}
+
+	if (perf == -1)
+		return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+	return perf;
+}
+
 static
 long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
 		      long key_offset, long cpu_count,
@@ -207,6 +225,10 @@  long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
 				ret = set_hwprobe(pairs + out, k, val);
 			}
 			break;
+
+		case RISCV_HWPROBE_KEY_CPUPERF_0:
+			set_hwprobe(pairs + out, k, hwprobe_misaligned(&cpus));
+			break;
 		}
 
 		if (ret < 0)