diff mbox series

[v7] timekeeping: Add raw clock fallback for random_get_entropy()

Message ID 20220505001924.176087-1-Jason@zx2c4.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series [v7] timekeeping: Add raw clock fallback for random_get_entropy() | expand

Commit Message

Jason A. Donenfeld May 5, 2022, 12:19 a.m. UTC
The addition of random_get_entropy_fallback() provides access to
whichever time source has the highest frequency, which is useful for
gathering entropy on platforms without available cycle counters. It's
not necessarily as good as being able to quickly access a cycle counter
that the CPU has, but it's still something, even when it falls back to
being jiffies-based.

In the event that a given arch does not define get_cycles(), falling
back to the get_cycles() default implementation that returns 0 is really
not the best we can do. Instead, at least calling
random_get_entropy_fallback() would be preferable, because that always
needs to return _something_, even falling back to jiffies eventually.
It's not as though random_get_entropy_fallback() is super high precision
or guaranteed to be entropic, but basically anything that's not zero all
the time is better than returning zero all the time.

Finally, since random_get_entropy_fallback() is used during extremely
early boot when randomizing freelists in mm_init(), it can be called
before timekeeping has been initialized. In that case there really is
nothing we can do; jiffies hasn't even started ticking yet. So just give
up and return 0.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Theodore Ts'o <tytso@mit.edu>
---
Changes v6->v7:
- Return 0 if we're called before timekeeping has been initialized,
  reported by Nathan.

 include/linux/timex.h     |  8 ++++++++
 kernel/time/timekeeping.c | 10 ++++++++++
 2 files changed, 18 insertions(+)

Comments

kernel test robot May 6, 2022, 3:20 a.m. UTC | #1
Greeting,

FYI, we noticed the following commit (built with clang-15):

commit: 3aeaac747d194aa3cff28351b4a83267faa11b3a ("[PATCH v7] timekeeping: Add raw clock fallback for random_get_entropy()")
url: https://github.com/intel-lab-lkp/linux/commits/Jason-A-Donenfeld/timekeeping-Add-raw-clock-fallback-for-random_get_entropy/20220505-082221
base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git 107c948d1d3e61d10aee9d0f7c3d81bbee9842af
patch link: https://lore.kernel.org/lkml/20220505001924.176087-1-Jason@zx2c4.com

in testcase: boot

on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):


+---------------------------------+------------+------------+
|                                 | 107c948d1d | 3aeaac747d |
+---------------------------------+------------+------------+
| boot_successes                  | 14         | 0          |
| boot_failures                   | 0          | 12         |
| PANIC:early_exception           | 0          | 12         |
| RIP:random_get_entropy_fallback | 0          | 12         |
| BUG:kernel_hang_in_boot_stage   | 0          | 12         |
+---------------------------------+------------+------------+


If you fix the issue, kindly add following tag
Reported-by: kernel test robot <oliver.sang@intel.com>


[    0.000000][    T0] BIOS-e820: [mem 0x00000000fffc0000-0x00000000ffffffff] reserved
[    0.000000][    T0] BIOS-e820: [mem 0x0000000100000000-0x000000043fffffff] usable
[    0.000000][    T0] printk: debug: ignoring loglevel setting.
[    0.000000][    T0] printk: bootconsole [earlyser0] enabled
[    0.000000][    T0] NX (Execute Disable) protection: active
PANIC: early exception 0x0d IP 10:ffffffff812cef11 error 0 cr2 0xffff888004a3aff8
[    0.000000][    T0] CPU: 0 PID: 0 Comm: swapper Not tainted 5.18.0-rc5-00017-g3aeaac747d19 #4
[ 0.000000][ T0] RIP: 0010:random_get_entropy_fallback (kbuild/src/rand-3/kernel/time/timekeeping.c:194 kbuild/src/rand-3/kernel/time/timekeeping.c:2390) 
[ 0.000000][ T0] Code: 41 5d 41 5e 41 5f 5d c3 90 f3 0f 1e fa 55 48 89 e5 53 48 8b 1d 00 4e df 03 48 89 d8 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 48 89 df e8 d1 2f 3b 00 4c 8b 1b 48 89 df e8 f6
All code
========
   0:	41 5d                	pop    %r13
   2:	41 5e                	pop    %r14
   4:	41 5f                	pop    %r15
   6:	5d                   	pop    %rbp
   7:	c3                   	retq   
   8:	90                   	nop
   9:	f3 0f 1e fa          	endbr64 
   d:	55                   	push   %rbp
   e:	48 89 e5             	mov    %rsp,%rbp
  11:	53                   	push   %rbx
  12:	48 8b 1d 00 4e df 03 	mov    0x3df4e00(%rip),%rbx        # 0x3df4e19
  19:	48 89 d8             	mov    %rbx,%rax
  1c:	48 c1 e8 03          	shr    $0x3,%rax
  20:	48 b9 00 00 00 00 00 	movabs $0xdffffc0000000000,%rcx
  27:	fc ff df 
  2a:*	80 3c 08 00          	cmpb   $0x0,(%rax,%rcx,1)		<-- trapping instruction
  2e:	74 08                	je     0x38
  30:	48 89 df             	mov    %rbx,%rdi
  33:	e8 d1 2f 3b 00       	callq  0x3b3009
  38:	4c 8b 1b             	mov    (%rbx),%r11
  3b:	48 89 df             	mov    %rbx,%rdi
  3e:	e8                   	.byte 0xe8
  3f:	f6                   	.byte 0xf6

Code starting with the faulting instruction
===========================================
   0:	80 3c 08 00          	cmpb   $0x0,(%rax,%rcx,1)
   4:	74 08                	je     0xe
   6:	48 89 df             	mov    %rbx,%rdi
   9:	e8 d1 2f 3b 00       	callq  0x3b2fdf
   e:	4c 8b 1b             	mov    (%rbx),%r11
  11:	48 89 df             	mov    %rbx,%rdi
  14:	e8                   	.byte 0xe8
  15:	f6                   	.byte 0xf6
[    0.000000][    T0] RSP: 0000:ffffffff84a07d70 EFLAGS: 00010046 ORIG_RAX: 0000000000000000
[    0.000000][    T0] RAX: 0000000000000000 RBX: 0000000000000000 RCX: dffffc0000000000
[    0.000000][    T0] RDX: ffffffffff240a7a RSI: 00000000000001da RDI: ffffffffff2408a0
[    0.000000][    T0] RBP: ffffffff84a07d78 R08: 0000000000dbf764 R09: 000000000000000b
[    0.000000][    T0] R10: ffffffff86a00070 R11: ffffffff8583df04 R12: dffffc0000000000
[    0.000000][    T0] R13: ffffffff84a07e50 R14: ffffffffff2408a0 R15: 00000000000001da
[    0.000000][    T0] FS:  0000000000000000(0000) GS:ffffffff84a9e000(0000) knlGS:0000000000000000
[    0.000000][    T0] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.000000][    T0] CR2: ffff888004a3aff8 CR3: 0000000005876000 CR4: 00000000000406a0
[    0.000000][    T0] Call Trace:
[    0.000000][    T0]  <TASK>
[ 0.000000][ T0] add_device_randomness (kbuild/src/rand-3/drivers/char/random.c:1028) 
[ 0.000000][ T0] ? dmi_walk_early (kbuild/src/rand-3/drivers/firmware/dmi_scan.c:470) 
[ 0.000000][ T0] dmi_walk_early (kbuild/src/rand-3/drivers/firmware/dmi_scan.c:151) 
[ 0.000000][ T0] dmi_present (kbuild/src/rand-3/drivers/firmware/dmi_scan.c:605) 


To reproduce:

        # build kernel
	cd linux
	cp config-5.18.0-rc5-00017-g3aeaac747d19 .config
	make HOSTCC=clang-15 CC=clang-15 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
	make HOSTCC=clang-15 CC=clang-15 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
	cd <mod-install-dir>
	find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz


        git clone https://github.com/intel/lkp-tests.git
        cd lkp-tests
        bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email

        # if come across any failure that blocks the test,
        # please remove ~/.lkp and /lkp dir to run from a clean state.
Thomas Gleixner May 6, 2022, 7:59 a.m. UTC | #2
On Fri, May 06 2022 at 11:20, kernel test robot wrote:
> PANIC: early exception 0x0d IP 10:ffffffff812cef11 error 0 cr2 0xffff888004a3aff8
> [ 0.000000][ T0] add_device_randomness (kbuild/src/rand-3/drivers/char/random.c:1028) 
> [ 0.000000][ T0] ? dmi_walk_early (kbuild/src/rand-3/drivers/firmware/dmi_scan.c:470) 
> [ 0.000000][ T0] dmi_walk_early (kbuild/src/rand-3/drivers/firmware/dmi_scan.c:151) 
> [ 0.000000][ T0] dmi_present (kbuild/src/rand-3/drivers/firmware/dmi_scan.c:605)

Duh.

So this wants to be:

--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2380,6 +2381,20 @@ static int timekeeping_validate_timex(co
 	return 0;
 }
 
+/**
+ * random_get_entropy_fallback - Returns the raw clock source value,
+ * used by random.c for platforms with no valid random_get_entropy().
+ */
+unsigned long random_get_entropy_fallback(void)
+{
+	struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
+	struct clocksource *clock = READ_ONCE(tkr->clock);
+
+	if (!timekeeping_suspended && clock)
+		return clock->read(clock);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
 
 /**
  * do_adjtimex() - Accessor function to NTP __do_adjtimex function
Jason A. Donenfeld May 6, 2022, 10:12 a.m. UTC | #3
Hi Thomas,

On Fri, May 06, 2022 at 09:59:13AM +0200, Thomas Gleixner wrote:
> +/**
> + * random_get_entropy_fallback - Returns the raw clock source value,
> + * used by random.c for platforms with no valid random_get_entropy().
> + */
> +unsigned long random_get_entropy_fallback(void)
> +{
> +	struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
> +	struct clocksource *clock = READ_ONCE(tkr->clock);
> +
> +	if (!timekeeping_suspended && clock)
> +		return clock->read(clock);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(random_get_entropy_fallback);

I tried to address this already in
<https://lore.kernel.org/lkml/20220505002910.IAcnpEOE2zR2ibERl4Lh3Y_PMmtb0Rf43lVevgztJiM@z/>,
though yours looks better with the READ_ONCE() around clock, and I'll
send a v8 doing it that way. I didn't realize that clock could become
NULL again after becoming non-NULL.

I'm not quite sure I understand the purpose of !timekeeping_suspended
there, though. I'm not seeing the path where reading with it suspended
negatively affects things. I'll take your word for it though.

Jason
Thomas Gleixner May 6, 2022, 4:01 p.m. UTC | #4
On Fri, May 06 2022 at 12:12, Jason A. Donenfeld wrote:
> On Fri, May 06, 2022 at 09:59:13AM +0200, Thomas Gleixner wrote:
>> +/**
>> + * random_get_entropy_fallback - Returns the raw clock source value,
>> + * used by random.c for platforms with no valid random_get_entropy().
>> + */
>> +unsigned long random_get_entropy_fallback(void)
>> +{
>> +	struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono;
>> +	struct clocksource *clock = READ_ONCE(tkr->clock);
>> +
>> +	if (!timekeeping_suspended && clock)
>> +		return clock->read(clock);
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
>
> I tried to address this already in
> <https://lore.kernel.org/lkml/20220505002910.IAcnpEOE2zR2ibERl4Lh3Y_PMmtb0Rf43lVevgztJiM@z/>,
> though yours looks better with the READ_ONCE() around clock, and I'll
> send a v8 doing it that way. I didn't realize that clock could become
> NULL again after becoming non-NULL.

This happens at early boot where clock is NULL.

> I'm not quite sure I understand the purpose of !timekeeping_suspended
> there, though. I'm not seeing the path where reading with it suspended
> negatively affects things. I'll take your word for it though.

Some clocks are not accessible during suspend.

Thanks,

        tglx
diff mbox series

Patch

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 5745c90c8800..3871b06bd302 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -62,6 +62,8 @@ 
 #include <linux/types.h>
 #include <linux/param.h>
 
+unsigned long random_get_entropy_fallback(void);
+
 #include <asm/timex.h>
 
 #ifndef random_get_entropy
@@ -74,8 +76,14 @@ 
  *
  * By default we use get_cycles() for this purpose, but individual
  * architectures may override this in their asm/timex.h header file.
+ * If a given arch does not have get_cycles(), then we fallback to
+ * using random_get_entropy_fallback().
  */
+#ifdef get_cycles
 #define random_get_entropy()	((unsigned long)get_cycles())
+#else
+#define random_get_entropy()	random_get_entropy_fallback()
+#endif
 #endif
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index dcdcb85121e4..7cd2ec239cae 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -17,6 +17,7 @@ 
 #include <linux/clocksource.h>
 #include <linux/jiffies.h>
 #include <linux/time.h>
+#include <linux/timex.h>
 #include <linux/tick.h>
 #include <linux/stop_machine.h>
 #include <linux/pvclock_gtod.h>
@@ -2380,6 +2381,15 @@  static int timekeeping_validate_timex(const struct __kernel_timex *txc)
 	return 0;
 }
 
+/**
+ * random_get_entropy_fallback - Returns the raw clock source value,
+ * used by random.c for platforms with no valid random_get_entropy().
+ */
+unsigned long random_get_entropy_fallback(void)
+{
+	return tk_clock_read(&tk_core.timekeeper.tkr_mono);
+}
+EXPORT_SYMBOL_GPL(random_get_entropy_fallback);
 
 /**
  * do_adjtimex() - Accessor function to NTP __do_adjtimex function