diff mbox series

[RFC,04/11] x86/boot/KASLR: Introduce PRNG for faster shuffling

Message ID 20200205223950.1212394-5-kristen@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Finer grained kernel address space randomization | expand

Commit Message

Kristen Carlson Accardi Feb. 5, 2020, 10:39 p.m. UTC
From: Kees Cook <keescook@chromium.org>

This *might* improve shuffling speed at boot. Possibly only marginally.
This has not yet been tested, and would need to have some performance
tests run to determine if it helps before merging.

(notes from Kristen) - initial performance tests suggest that any
improvement is indeed marginal. However, this code is useful
for using a known seed.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
---
 arch/x86/boot/compressed/kaslr.c |  4 +--
 arch/x86/include/asm/kaslr.h     |  3 +-
 arch/x86/lib/kaslr.c             | 50 +++++++++++++++++++++++++++++++-
 arch/x86/mm/init.c               |  2 +-
 arch/x86/mm/kaslr.c              |  2 +-
 5 files changed, 55 insertions(+), 6 deletions(-)

Comments

Andy Lutomirski Feb. 6, 2020, 1:11 a.m. UTC | #1
> On Feb 5, 2020, at 2:39 PM, Kristen Carlson Accardi <kristen@linux.intel.com> wrote:
> 
> From: Kees Cook <keescook@chromium.org>
> 
> This *might* improve shuffling speed at boot. Possibly only marginally.
> This has not yet been tested, and would need to have some performance
> tests run to determine if it helps before merging.

Ugh, don’t do this. Use a real DRBG.  Someone is going to break the construction in your patch just to prove they can.

ChaCha20 is a good bet.

> 
> (notes from Kristen) - initial performance tests suggest that any
> improvement is indeed marginal. However, this code is useful
> for using a known seed.
> 
> Signed-off-by: Kees Cook <keescook@chromium.org>
> Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
> ---
> arch/x86/boot/compressed/kaslr.c |  4 +--
> arch/x86/include/asm/kaslr.h     |  3 +-
> arch/x86/lib/kaslr.c             | 50 +++++++++++++++++++++++++++++++-
> arch/x86/mm/init.c               |  2 +-
> arch/x86/mm/kaslr.c              |  2 +-
> 5 files changed, 55 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index d7408af55738..ae4dce76a9bd 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -598,7 +598,7 @@ static unsigned long slots_fetch_random(void)
>    if (slot_max == 0)
>        return 0;
> 
> -    slot = kaslr_get_random_long("Physical") % slot_max;
> +    slot = kaslr_get_random_seed("Physical") % slot_max;
> 
>    for (i = 0; i < slot_area_index; i++) {
>        if (slot >= slot_areas[i].num) {
> @@ -880,7 +880,7 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
>    slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
>         CONFIG_PHYSICAL_ALIGN + 1;
> 
> -    random_addr = kaslr_get_random_long("Virtual") % slots;
> +    random_addr = kaslr_get_random_seed("Virtual") % slots;
> 
>    return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
> }
> diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
> index db7ba2feb947..47d5b25e5b11 100644
> --- a/arch/x86/include/asm/kaslr.h
> +++ b/arch/x86/include/asm/kaslr.h
> @@ -2,7 +2,8 @@
> #ifndef _ASM_KASLR_H_
> #define _ASM_KASLR_H_
> 
> -unsigned long kaslr_get_random_long(const char *purpose);
> +unsigned long kaslr_get_random_seed(const char *purpose);
> +unsigned long kaslr_get_prandom_long(void);
> 
> #ifdef CONFIG_RANDOMIZE_MEMORY
> void kernel_randomize_memory(void);
> diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
> index 2b3eb8c948a3..41b5610855a3 100644
> --- a/arch/x86/lib/kaslr.c
> +++ b/arch/x86/lib/kaslr.c
> @@ -46,7 +46,7 @@ static inline u16 i8254(void)
>    return timer;
> }
> 
> -unsigned long kaslr_get_random_long(const char *purpose)
> +unsigned long kaslr_get_random_seed(const char *purpose)
> {
> #ifdef CONFIG_X86_64
>    const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
> @@ -96,3 +96,51 @@ unsigned long kaslr_get_random_long(const char *purpose)
> 
>    return random;
> }
> +
> +/*
> + * 64bit variant of Bob Jenkins' public domain PRNG
> + * 256 bits of internal state
> + */
> +struct prng_state {
> +    u64 a, b, c, d;
> +};
> +
> +static struct prng_state state;
> +static bool initialized;
> +
> +#define rot(x, k) (((x)<<(k))|((x)>>(64-(k))))
> +static u64 prng_u64(struct prng_state *x)
> +{
> +    u64 e;
> +
> +    e = x->a - rot(x->b, 7);
> +    x->a = x->b ^ rot(x->c, 13);
> +    x->b = x->c + rot(x->d, 37);
> +    x->c = x->d + e;
> +    x->d = e + x->a;
> +
> +    return x->d;
> +}
> +
> +static void prng_init(struct prng_state *state)
> +{
> +    int i;
> +
> +    state->a = kaslr_get_random_seed(NULL);
> +    state->b = kaslr_get_random_seed(NULL);
> +    state->c = kaslr_get_random_seed(NULL);
> +    state->d = kaslr_get_random_seed(NULL);
> +
> +    for (i = 0; i < 30; ++i)
> +        (void)prng_u64(state);
> +
> +    initialized = true;
> +}
> +
> +unsigned long kaslr_get_prandom_long(void)
> +{
> +    if (!initialized)
> +        prng_init(&state);
> +
> +    return prng_u64(&state);
> +}
> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
> index e7bb483557c9..c974dbab2293 100644
> --- a/arch/x86/mm/init.c
> +++ b/arch/x86/mm/init.c
> @@ -722,7 +722,7 @@ void __init poking_init(void)
>     */
>    poking_addr = TASK_UNMAPPED_BASE;
>    if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
> -        poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
> +        poking_addr += (kaslr_get_random_seed("Poking") & PAGE_MASK) %
>            (TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE);
> 
>    if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
> diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
> index dc6182eecefa..b30bd1db7543 100644
> --- a/arch/x86/mm/kaslr.c
> +++ b/arch/x86/mm/kaslr.c
> @@ -123,7 +123,7 @@ void __init kernel_randomize_memory(void)
>    for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
>        remain_entropy -= get_padding(&kaslr_regions[i]);
> 
> -    prandom_seed_state(&rand_state, kaslr_get_random_long("Memory"));
> +    prandom_seed_state(&rand_state, kaslr_get_random_seed("Memory"));
> 
>    for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) {
>        unsigned long entropy;
> -- 
> 2.24.1
>
Jason A. Donenfeld Feb. 6, 2020, 3:10 p.m. UTC | #2
Hey Kees,

On Wed, Feb 05, 2020 at 02:39:43PM -0800, Kristen Carlson Accardi wrote:
> +#define rot(x, k) (((x)<<(k))|((x)>>(64-(k))))
> +static u64 prng_u64(struct prng_state *x)
> +{
> +	u64 e;
> +
> +	e = x->a - rot(x->b, 7);
> +	x->a = x->b ^ rot(x->c, 13);
> +	x->b = x->c + rot(x->d, 37);
> +	x->c = x->d + e;
> +	x->d = e + x->a;
> +
> +	return x->d;
> +}

I haven't looked closely at where the original entropy sources are
coming from and how all this works, but on first glance, this prng
doesn't look like an especially cryptographically secure one. I realize
that isn't necessarily your intention (you're focused on speed), but
actually might this be sort of important? If I understand correctly, the
objective of this patch set is so that leaking the address of one
function doesn't leak the address of all other functions, as is the case
with fixed-offset kaslr. But if you leak the addresses of _some_ set of
functions, and your prng is bogus, might it be possible to figure out
the rest? For some prngs, if you give me the output stream of a few
numbers, I can predict the rest. For others, it's not this straight
forward, but there are some varieties of similar attacks. If any of that
set of concerns turns out to apply to your prng_u64 here, would that
undermine kaslr in similar ways as the current fixed-offset variety? Or
does it not matter because it's some kind of blinded fixed-size shuffle
with complex reasoning that makes this not a problem?

Jason
Jean-Philippe Aumasson Feb. 7, 2020, 7:23 a.m. UTC | #3
Let me share my 2 cents:

That permutation might be safe but afaict it hasn't been analyzed wrt
modern cryptographic techniques and there might well be differential
characteristics, statistical biases, etc.

What about just using SipHash's permutation, already in the kernel? It
works on 4*u64 words too, and 6 rounds would be enough.

Doing a basic ops count, we currently have 5 group operations and 3
rotations per round or 150 and 90 for the 30 init rounds. With SipHash it'd
be 48 and 36 with the proposed 6 rounds. Probably insignificant speed wise
as init is only done once but just to show that we'd get both better
security assurance and better performance.


On Thu, Feb 6, 2020 at 4:10 PM Jason A. Donenfeld <Jason@zx2c4.com> wrote:

> Hey Kees,
>
> On Wed, Feb 05, 2020 at 02:39:43PM -0800, Kristen Carlson Accardi wrote:
> > +#define rot(x, k) (((x)<<(k))|((x)>>(64-(k))))
> > +static u64 prng_u64(struct prng_state *x)
> > +{
> > +     u64 e;
> > +
> > +     e = x->a - rot(x->b, 7);
> > +     x->a = x->b ^ rot(x->c, 13);
> > +     x->b = x->c + rot(x->d, 37);
> > +     x->c = x->d + e;
> > +     x->d = e + x->a;
> > +
> > +     return x->d;
> > +}
>
> I haven't looked closely at where the original entropy sources are
> coming from and how all this works, but on first glance, this prng
> doesn't look like an especially cryptographically secure one. I realize
> that isn't necessarily your intention (you're focused on speed), but
> actually might this be sort of important? If I understand correctly, the
> objective of this patch set is so that leaking the address of one
> function doesn't leak the address of all other functions, as is the case
> with fixed-offset kaslr. But if you leak the addresses of _some_ set of
> functions, and your prng is bogus, might it be possible to figure out
> the rest? For some prngs, if you give me the output stream of a few
> numbers, I can predict the rest. For others, it's not this straight
> forward, but there are some varieties of similar attacks. If any of that
> set of concerns turns out to apply to your prng_u64 here, would that
> undermine kaslr in similar ways as the current fixed-offset variety? Or
> does it not matter because it's some kind of blinded fixed-size shuffle
> with complex reasoning that makes this not a problem?
>
> Jason
>
Kees Cook Feb. 7, 2020, 9:05 a.m. UTC | #4
On Fri, Feb 07, 2020 at 08:23:53AM +0100, Jean-Philippe Aumasson wrote:
> 
> On Thu, Feb 6, 2020 at 4:10 PM Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> 
> > Hey Kees,
> >
> > On Wed, Feb 05, 2020 at 02:39:43PM -0800, Kristen Carlson Accardi wrote:
> > > +#define rot(x, k) (((x)<<(k))|((x)>>(64-(k))))
> > > +static u64 prng_u64(struct prng_state *x)
> > > +{
> > > +     u64 e;
> > > +
> > > +     e = x->a - rot(x->b, 7);
> > > +     x->a = x->b ^ rot(x->c, 13);
> > > +     x->b = x->c + rot(x->d, 37);
> > > +     x->c = x->d + e;
> > > +     x->d = e + x->a;
> > > +
> > > +     return x->d;
> > > +}
> >
> > I haven't looked closely at where the original entropy sources are
> > coming from and how all this works, but on first glance, this prng
> > doesn't look like an especially cryptographically secure one. I realize
> > that isn't necessarily your intention (you're focused on speed), but
> > actually might this be sort of important? If I understand correctly, the
> > objective of this patch set is so that leaking the address of one
> > function doesn't leak the address of all other functions, as is the case
> > with fixed-offset kaslr. But if you leak the addresses of _some_ set of
> > functions, and your prng is bogus, might it be possible to figure out
> > the rest? For some prngs, if you give me the output stream of a few
> > numbers, I can predict the rest. For others, it's not this straight
> > forward, but there are some varieties of similar attacks. If any of that
> > set of concerns turns out to apply to your prng_u64 here, would that
> > undermine kaslr in similar ways as the current fixed-offset variety? Or
> > does it not matter because it's some kind of blinded fixed-size shuffle
> > with complex reasoning that makes this not a problem?
> 
> Let me share my 2 cents:
> 
> That permutation might be safe but afaict it hasn't been analyzed wrt
> modern cryptographic techniques and there might well be differential
> characteristics, statistical biases, etc.
> 
> What about just using SipHash's permutation, already in the kernel? It
> works on 4*u64 words too, and 6 rounds would be enough.
> 
> Doing a basic ops count, we currently have 5 group operations and 3
> rotations per round or 150 and 90 for the 30 init rounds. With SipHash it'd
> be 48 and 36 with the proposed 6 rounds. Probably insignificant speed wise
> as init is only done once but just to show that we'd get both better
> security assurance and better performance.

Yeah, this was never meant to be anything but a POC and after timing
tests, it seemed like an unneeded abstraction but was kept for this
RFC so it was possible to specify a stable seed at boot for debugging,
etc. I think this patch will not survive to v1. :)
Kristen Carlson Accardi Feb. 7, 2020, 4:52 p.m. UTC | #5
On Fri, 2020-02-07 at 01:05 -0800, Kees Cook wrote:
> On Fri, Feb 07, 2020 at 08:23:53AM +0100, Jean-Philippe Aumasson
> wrote:
> > On Thu, Feb 6, 2020 at 4:10 PM Jason A. Donenfeld <Jason@zx2c4.com>
> > wrote:
> > 
> > > Hey Kees,
> > > 
> > > On Wed, Feb 05, 2020 at 02:39:43PM -0800, Kristen Carlson Accardi
> > > wrote:
> > > > +#define rot(x, k) (((x)<<(k))|((x)>>(64-(k))))
> > > > +static u64 prng_u64(struct prng_state *x)
> > > > +{
> > > > +     u64 e;
> > > > +
> > > > +     e = x->a - rot(x->b, 7);
> > > > +     x->a = x->b ^ rot(x->c, 13);
> > > > +     x->b = x->c + rot(x->d, 37);
> > > > +     x->c = x->d + e;
> > > > +     x->d = e + x->a;
> > > > +
> > > > +     return x->d;
> > > > +}
> > > 
> > > I haven't looked closely at where the original entropy sources
> > > are
> > > coming from and how all this works, but on first glance, this
> > > prng
> > > doesn't look like an especially cryptographically secure one. I
> > > realize
> > > that isn't necessarily your intention (you're focused on speed),
> > > but
> > > actually might this be sort of important? If I understand
> > > correctly, the
> > > objective of this patch set is so that leaking the address of one
> > > function doesn't leak the address of all other functions, as is
> > > the case
> > > with fixed-offset kaslr. But if you leak the addresses of _some_
> > > set of
> > > functions, and your prng is bogus, might it be possible to figure
> > > out
> > > the rest? For some prngs, if you give me the output stream of a
> > > few
> > > numbers, I can predict the rest. For others, it's not this
> > > straight
> > > forward, but there are some varieties of similar attacks. If any
> > > of that
> > > set of concerns turns out to apply to your prng_u64 here, would
> > > that
> > > undermine kaslr in similar ways as the current fixed-offset
> > > variety? Or
> > > does it not matter because it's some kind of blinded fixed-size
> > > shuffle
> > > with complex reasoning that makes this not a problem?
> > 
> > Let me share my 2 cents:
> > 
> > That permutation might be safe but afaict it hasn't been analyzed
> > wrt
> > modern cryptographic techniques and there might well be
> > differential
> > characteristics, statistical biases, etc.
> > 
> > What about just using SipHash's permutation, already in the kernel?
> > It
> > works on 4*u64 words too, and 6 rounds would be enough.
> > 
> > Doing a basic ops count, we currently have 5 group operations and 3
> > rotations per round or 150 and 90 for the 30 init rounds. With
> > SipHash it'd
> > be 48 and 36 with the proposed 6 rounds. Probably insignificant
> > speed wise
> > as init is only done once but just to show that we'd get both
> > better
> > security assurance and better performance.
> 
> Yeah, this was never meant to be anything but a POC and after timing
> tests, it seemed like an unneeded abstraction but was kept for this
> RFC so it was possible to specify a stable seed at boot for
> debugging,
> etc. I think this patch will not survive to v1. :)

That's right, I'm going to drop it and go with the ChaCha20
implementation as was suggested.
diff mbox series

Patch

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index d7408af55738..ae4dce76a9bd 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -598,7 +598,7 @@  static unsigned long slots_fetch_random(void)
 	if (slot_max == 0)
 		return 0;
 
-	slot = kaslr_get_random_long("Physical") % slot_max;
+	slot = kaslr_get_random_seed("Physical") % slot_max;
 
 	for (i = 0; i < slot_area_index; i++) {
 		if (slot >= slot_areas[i].num) {
@@ -880,7 +880,7 @@  static unsigned long find_random_virt_addr(unsigned long minimum,
 	slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
 		 CONFIG_PHYSICAL_ALIGN + 1;
 
-	random_addr = kaslr_get_random_long("Virtual") % slots;
+	random_addr = kaslr_get_random_seed("Virtual") % slots;
 
 	return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
 }
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index db7ba2feb947..47d5b25e5b11 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -2,7 +2,8 @@ 
 #ifndef _ASM_KASLR_H_
 #define _ASM_KASLR_H_
 
-unsigned long kaslr_get_random_long(const char *purpose);
+unsigned long kaslr_get_random_seed(const char *purpose);
+unsigned long kaslr_get_prandom_long(void);
 
 #ifdef CONFIG_RANDOMIZE_MEMORY
 void kernel_randomize_memory(void);
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index 2b3eb8c948a3..41b5610855a3 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -46,7 +46,7 @@  static inline u16 i8254(void)
 	return timer;
 }
 
-unsigned long kaslr_get_random_long(const char *purpose)
+unsigned long kaslr_get_random_seed(const char *purpose)
 {
 #ifdef CONFIG_X86_64
 	const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
@@ -96,3 +96,51 @@  unsigned long kaslr_get_random_long(const char *purpose)
 
 	return random;
 }
+
+/*
+ * 64bit variant of Bob Jenkins' public domain PRNG
+ * 256 bits of internal state
+ */
+struct prng_state {
+	u64 a, b, c, d;
+};
+
+static struct prng_state state;
+static bool initialized;
+
+#define rot(x, k) (((x)<<(k))|((x)>>(64-(k))))
+static u64 prng_u64(struct prng_state *x)
+{
+	u64 e;
+
+	e = x->a - rot(x->b, 7);
+	x->a = x->b ^ rot(x->c, 13);
+	x->b = x->c + rot(x->d, 37);
+	x->c = x->d + e;
+	x->d = e + x->a;
+
+	return x->d;
+}
+
+static void prng_init(struct prng_state *state)
+{
+	int i;
+
+	state->a = kaslr_get_random_seed(NULL);
+	state->b = kaslr_get_random_seed(NULL);
+	state->c = kaslr_get_random_seed(NULL);
+	state->d = kaslr_get_random_seed(NULL);
+
+	for (i = 0; i < 30; ++i)
+		(void)prng_u64(state);
+
+	initialized = true;
+}
+
+unsigned long kaslr_get_prandom_long(void)
+{
+	if (!initialized)
+		prng_init(&state);
+
+	return prng_u64(&state);
+}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index e7bb483557c9..c974dbab2293 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -722,7 +722,7 @@  void __init poking_init(void)
 	 */
 	poking_addr = TASK_UNMAPPED_BASE;
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
-		poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
+		poking_addr += (kaslr_get_random_seed("Poking") & PAGE_MASK) %
 			(TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE);
 
 	if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index dc6182eecefa..b30bd1db7543 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -123,7 +123,7 @@  void __init kernel_randomize_memory(void)
 	for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
 		remain_entropy -= get_padding(&kaslr_regions[i]);
 
-	prandom_seed_state(&rand_state, kaslr_get_random_long("Memory"));
+	prandom_seed_state(&rand_state, kaslr_get_random_seed("Memory"));
 
 	for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++) {
 		unsigned long entropy;