diff mbox

[1/4] random: replace non-blocking pool with a Chacha20-based CRNG

Message ID 1462389951-29439-2-git-send-email-tytso@mit.edu (mailing list archive)
State RFC
Delegated to: Herbert Xu
Headers show

Commit Message

Theodore Ts'o May 4, 2016, 7:25 p.m. UTC
The CRNG is faster, and we don't pretend to track entropy usage in the
CRNG any more.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 crypto/chacha20_generic.c |  61 ----------
 drivers/char/random.c     | 283 +++++++++++++++++++++++++++++++++++-----------
 include/crypto/chacha20.h |   1 +
 lib/Makefile              |   2 +-
 lib/chacha20.c            |  79 +++++++++++++
 5 files changed, 295 insertions(+), 131 deletions(-)
 create mode 100644 lib/chacha20.c

Comments

Stephan Mueller May 4, 2016, 8:28 p.m. UTC | #1
Am Mittwoch, 4. Mai 2016, 15:25:48 schrieb Theodore Ts'o:

Hi Theodore,

> The CRNG is faster, and we don't pretend to track entropy usage in the
> CRNG any more.
> 
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> ---
>  crypto/chacha20_generic.c |  61 ----------
>  drivers/char/random.c     | 283
> +++++++++++++++++++++++++++++++++++----------- include/crypto/chacha20.h | 
>  1 +
>  lib/Makefile              |   2 +-
>  lib/chacha20.c            |  79 +++++++++++++
>  5 files changed, 295 insertions(+), 131 deletions(-)
>  create mode 100644 lib/chacha20.c
> 
> diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
> index da9c899..1cab831 100644
> --- a/crypto/chacha20_generic.c
> +++ b/crypto/chacha20_generic.c
> @@ -15,72 +15,11 @@
>  #include <linux/module.h>
>  #include <crypto/chacha20.h>
> 
> -static inline u32 rotl32(u32 v, u8 n)
> -{
> -	return (v << n) | (v >> (sizeof(v) * 8 - n));
> -}
> -
>  static inline u32 le32_to_cpuvp(const void *p)
>  {
>  	return le32_to_cpup(p);
>  }
> 
> -static void chacha20_block(u32 *state, void *stream)
> -{
> -	u32 x[16], *out = stream;
> -	int i;
> -
> -	for (i = 0; i < ARRAY_SIZE(x); i++)
> -		x[i] = state[i];
> -
> -	for (i = 0; i < 20; i += 2) {
> -		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
> -		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
> -		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
> -		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
> -
> -		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
> -		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
> -		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
> -		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
> -
> -		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
> -		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
> -		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
> -		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
> -
> -		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
> -		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
> -		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
> -		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
> -
> -		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
> -		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
> -		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
> -		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
> -
> -		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
> -		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
> -		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
> -		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
> -
> -		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
> -		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
> -		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
> -		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
> -
> -		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
> -		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
> -		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
> -		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
> -	}
> -
> -	for (i = 0; i < ARRAY_SIZE(x); i++)
> -		out[i] = cpu_to_le32(x[i] + state[i]);
> -
> -	state[12]++;
> -}
> -
>  static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
>  			     unsigned int bytes)
>  {
> diff --git a/drivers/char/random.c b/drivers/char/random.c
> index b583e53..91d5c2a 100644
> --- a/drivers/char/random.c
> +++ b/drivers/char/random.c
> @@ -260,6 +260,7 @@
>  #include <linux/irq.h>
>  #include <linux/syscalls.h>
>  #include <linux/completion.h>
> +#include <crypto/chacha20.h>
> 
>  #include <asm/processor.h>
>  #include <asm/uaccess.h>
> @@ -412,6 +413,18 @@ static struct fasync_struct *fasync;
>  static DEFINE_SPINLOCK(random_ready_list_lock);
>  static LIST_HEAD(random_ready_list);
> 
> +/*
> + * crng_init =  0 --> Uninitialized
> + *		2 --> Initialized
> + *		3 --> Initialized from input_pool
> + *
> + * crng_init is protected by primary_crng->lock, and only increases
> + * its value (from 0->1->2->3).
> + */
> +static int crng_init = 0;
> +#define crng_ready() (likely(crng_init >= 2))
> +static void process_random_ready_list(void);
> +
>  /**********************************************************************
>   *
>   * OS independent entropy store.   Here are the functions which handle
> @@ -441,10 +454,13 @@ struct entropy_store {
>  	__u8 last_data[EXTRACT_SIZE];
>  };
> 
> +static ssize_t extract_entropy(struct entropy_store *r, void *buf,
> +			       size_t nbytes, int min, int rsvd);
> +
> +static int crng_reseed(struct entropy_store *r);
>  static void push_to_pool(struct work_struct *work);
>  static __u32 input_pool_data[INPUT_POOL_WORDS];
>  static __u32 blocking_pool_data[OUTPUT_POOL_WORDS];
> -static __u32 nonblocking_pool_data[OUTPUT_POOL_WORDS];
> 
>  static struct entropy_store input_pool = {
>  	.poolinfo = &poolinfo_table[0],
> @@ -465,16 +481,6 @@ static struct entropy_store blocking_pool = {
>  					push_to_pool),
>  };
> 
> -static struct entropy_store nonblocking_pool = {
> -	.poolinfo = &poolinfo_table[1],
> -	.name = "nonblocking",
> -	.pull = &input_pool,
> -	.lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
> -	.pool = nonblocking_pool_data,
> -	.push_work = __WORK_INITIALIZER(nonblocking_pool.push_work,
> -					push_to_pool),
> -};
> -
>  static __u32 const twist_table[8] = {
>  	0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
>  	0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
> @@ -677,12 +683,6 @@ retry:
>  	if (!r->initialized && r->entropy_total > 128) {
>  		r->initialized = 1;
>  		r->entropy_total = 0;
> -		if (r == &nonblocking_pool) {
> -			prandom_reseed_late();
> -			process_random_ready_list();
> -			wake_up_all(&urandom_init_wait);
> -			pr_notice("random: %s pool is initialized\n", r-
>name);
> -		}
>  	}
> 
>  	trace_credit_entropy_bits(r->name, nbits,
> @@ -692,30 +692,27 @@ retry:
>  	if (r == &input_pool) {
>  		int entropy_bits = entropy_count >> ENTROPY_SHIFT;
> 
> +		if (crng_init < 3 && entropy_bits >= 128) {
> +			(void) crng_reseed(r);
> +			entropy_bits = r->entropy_count >> ENTROPY_SHIFT;
> +		}
> +
>  		/* should we wake readers? */
>  		if (entropy_bits >= random_read_wakeup_bits) {
>  			wake_up_interruptible(&random_read_wait);
>  			kill_fasync(&fasync, SIGIO, POLL_IN);
>  		}
>  		/* If the input pool is getting full, send some
> -		 * entropy to the two output pools, flipping back and
> -		 * forth between them, until the output pools are 75%
> -		 * full.
> +		 * entropy to the blocking pool until it is 75% full.
>  		 */
>  		if (entropy_bits > random_write_wakeup_bits &&
>  		    r->initialized &&
>  		    r->entropy_total >= 2*random_read_wakeup_bits) {
> -			static struct entropy_store *last = &blocking_pool;
>  			struct entropy_store *other = &blocking_pool;
> 
> -			if (last == &blocking_pool)
> -				other = &nonblocking_pool;
>  			if (other->entropy_count <=
> -			    3 * other->poolinfo->poolfracbits / 4)
> -				last = other;
> -			if (last->entropy_count <=
> -			    3 * last->poolinfo->poolfracbits / 4) {
> -				schedule_work(&last->push_work);
> +			    3 * other->poolinfo->poolfracbits / 4) {
> +				schedule_work(&other->push_work);
>  				r->entropy_total = 0;
>  			}
>  		}
> @@ -735,6 +732,156 @@ static void credit_entropy_bits_safe(struct
> entropy_store *r, int nbits)
> 
>  /*********************************************************************
>   *
> + * CRNG using CHACHA20
> + *
> + *********************************************************************/
> +
> +#define CRNG_RESEED_INTERVAL (300*HZ)
> +
> +struct crng_state {
> +	__u32		state[16];
> +	unsigned long	init_time;
> +	spinlock_t	lock;
> +};
> +
> +struct crng_state primary_crng = {
> +	.lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock),
> +};
> +static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
> +
> +static void _initialize_crng(struct crng_state *crng)
> +{
> +	int		i;
> +	unsigned long	rv;
> +
> +	memcpy(&crng->state[0], "expand 32-byte k", 16);
> +	for (i = 4; i < 16; i++) {
> +		if (!arch_get_random_seed_long(&rv) &&
> +		    !arch_get_random_long(&rv))
> +			rv = random_get_entropy();
> +		crng->state[i] ^= rv;
> +	}
> +	crng->init_time = jiffies - CRNG_RESEED_INTERVAL;
> +}
> +
> +static void initialize_crng(struct crng_state *crng)
> +{
> +	_initialize_crng(crng);
> +	spin_lock_init(&crng->lock);
> +}
> +
> +static int crng_fast_load(__u32 pool[4])
> +{
> +	int	i;
> +	__u32	*p;
> +
> +	if (!spin_trylock(&primary_crng.lock))
> +		return 0;
> +	if (crng_ready()) {
> +		spin_unlock(&primary_crng.lock);
> +		return 0;
> +	}
> +	p = &primary_crng.state[4];
> +	if (crng_init == 1)
> +		p += 4;
> +	for (i=0; i < 4; i++)
> +		*p ^= pool[i];
> +	if (++crng_init >= 2) {
> +		wake_up_interruptible(&crng_init_wait);
> +		pr_notice("random: fast init done\n");
> +	}
> +	spin_unlock(&primary_crng.lock);
> +	return 1;
> +}
> +
> +/* Returns 1 on success */
> +static int crng_reseed(struct entropy_store *r)
> +{
> +	unsigned long	flags;
> +	int		ret = 0;
> +	int		i, num, num_words;
> +	__u32		tmp[16];
> +
> +	spin_lock_irqsave(&primary_crng.lock, flags);
> +	num = extract_entropy(r, tmp, 32, 16, 0);
> +	if (num == 0)
> +		goto out;
> +	BUG_ON(num < 16 || num > 32);
> +	num_words = (num + 3) / 4;
> +	for (i = 0; i < num_words; i++)
> +		primary_crng.state[i+4] ^= tmp[i];
> +	primary_crng.init_time = jiffies;
> +	if (crng_init < 3) {
> +		crng_init = 3;
> +		process_random_ready_list();
> +		wake_up_interruptible(&crng_init_wait);
> +		pr_notice("random: crng init done\n");
> +	}
> +	ret = 1;
> +out:
> +	spin_unlock_irqrestore(&primary_crng.lock, flags);
> +	return ret;

Where did you add the memzero_explict of tmp?

> +}
> +
> +static inline void crng_wait_ready(void)
> +{
> +	wait_event_interruptible(crng_init_wait, crng_ready());
> +}
> +
> +static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
> +{
> +	unsigned long v, flags;
> +	struct crng_state *crng = &primary_crng;
> +
> +	if (crng_init > 2 &&
> +	    time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))
> +		crng_reseed(&input_pool);
> +	spin_lock_irqsave(&crng->lock, flags);
> +	if (arch_get_random_long(&v))
> +		crng->state[14] ^= v;
> +	chacha20_block(&crng->state[0], out);
> +	if (crng->state[12] == 0)
> +		crng->state[13]++;
> +	spin_unlock_irqrestore(&crng->lock, flags);
> +}
> +
> +static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
> +{
> +	ssize_t ret = 0, i;
> +	__u8 tmp[CHACHA20_BLOCK_SIZE];
> +	int large_request = (nbytes > 256);
> +
> +	while (nbytes) {
> +		if (large_request && need_resched()) {
> +			if (signal_pending(current)) {
> +				if (ret == 0)
> +					ret = -ERESTARTSYS;
> +				break;
> +			}
> +			schedule();
> +		}
> +
> +		extract_crng(tmp);
> +		i = min_t(int, nbytes, CHACHA20_BLOCK_SIZE);
> +		if (copy_to_user(buf, tmp, i)) {
> +			ret = -EFAULT;
> +			break;
> +		}
> +
> +		nbytes -= i;
> +		buf += i;
> +		ret += i;
> +	}
> +
> +	/* Wipe data just written to memory */
> +	memzero_explicit(tmp, sizeof(tmp));
> +
> +	return ret;
> +}
> +
> +
> +/*********************************************************************
> + *
>   * Entropy input management
>   *
>   *********************************************************************/
> @@ -749,12 +896,12 @@ struct timer_rand_state {
>  #define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, };
> 
>  /*
> - * Add device- or boot-specific data to the input and nonblocking
> - * pools to help initialize them to unique values.
> + * Add device- or boot-specific data to the input pool to help
> + * initialize it.
>   *
> - * None of this adds any entropy, it is meant to avoid the
> - * problem of the nonblocking pool having similar initial state
> - * across largely identical devices.
> + * None of this adds any entropy; it is meant to avoid the problem of
> + * the entropy pool having similar initial state across largely
> + * identical devices.
>   */
>  void add_device_randomness(const void *buf, unsigned int size)
>  {
> @@ -766,11 +913,6 @@ void add_device_randomness(const void *buf, unsigned
> int size) _mix_pool_bytes(&input_pool, buf, size);
>  	_mix_pool_bytes(&input_pool, &time, sizeof(time));
>  	spin_unlock_irqrestore(&input_pool.lock, flags);
> -
> -	spin_lock_irqsave(&nonblocking_pool.lock, flags);
> -	_mix_pool_bytes(&nonblocking_pool, buf, size);
> -	_mix_pool_bytes(&nonblocking_pool, &time, sizeof(time));
> -	spin_unlock_irqrestore(&nonblocking_pool.lock, flags);
>  }
>  EXPORT_SYMBOL(add_device_randomness);
> 
> @@ -801,7 +943,7 @@ static void add_timer_randomness(struct timer_rand_state
> *state, unsigned num) sample.jiffies = jiffies;
>  	sample.cycles = random_get_entropy();
>  	sample.num = num;
> -	r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
> +	r = &input_pool;
>  	mix_pool_bytes(r, &sample, sizeof(sample));
> 
>  	/*
> @@ -921,7 +1063,13 @@ void add_interrupt_randomness(int irq, int irq_flags)
>  	    !time_after(now, fast_pool->last + HZ))
>  		return;
> 
> -	r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
> +	if (!crng_ready() && crng_fast_load(fast_pool->pool)) {
> +		fast_pool->count = 0;
> +		fast_pool->last = now;
> +		return;
> +	}
> +
> +	r = &input_pool;
>  	if (!spin_trylock(&r->lock))
>  		return;
> 
> @@ -964,9 +1112,6 @@ EXPORT_SYMBOL_GPL(add_disk_randomness);
>   *
>   *********************************************************************/
> 
> -static ssize_t extract_entropy(struct entropy_store *r, void *buf,
> -			       size_t nbytes, int min, int rsvd);
> -
>  /*
>   * This utility inline function is responsible for transferring entropy
>   * from the primary pool to the secondary extraction pool. We make
> @@ -1252,15 +1397,26 @@ static ssize_t extract_entropy_user(struct
> entropy_store *r, void __user *buf, */
>  void get_random_bytes(void *buf, int nbytes)
>  {
> +	__u8 tmp[CHACHA20_BLOCK_SIZE];
> +
>  #if DEBUG_RANDOM_BOOT > 0
> -	if (unlikely(nonblocking_pool.initialized == 0))
> +	if (!crng_ready())
>  		printk(KERN_NOTICE "random: %pF get_random_bytes called "
> -		       "with %d bits of entropy available\n",
> -		       (void *) _RET_IP_,
> -		       nonblocking_pool.entropy_total);
> +		       "with crng_init = %d\n", (void *) _RET_IP_, crng_init);
>  #endif
>  	trace_get_random_bytes(nbytes, _RET_IP_);
> -	extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
> +
> +	while (nbytes >= CHACHA20_BLOCK_SIZE) {
> +		extract_crng(buf);
> +		buf += CHACHA20_BLOCK_SIZE;
> +		nbytes -= CHACHA20_BLOCK_SIZE;
> +	}
> +
> +	if (nbytes > 0) {
> +		extract_crng(tmp);
> +		memcpy(buf, tmp, nbytes);
> +		memzero_explicit(tmp, nbytes);
> +	}
>  }
>  EXPORT_SYMBOL(get_random_bytes);
> 
> @@ -1278,7 +1434,7 @@ int add_random_ready_callback(struct
> random_ready_callback *rdy) unsigned long flags;
>  	int err = -EALREADY;
> 
> -	if (likely(nonblocking_pool.initialized))
> +	if (crng_ready())
>  		return err;
> 
>  	owner = rdy->owner;
> @@ -1286,7 +1442,7 @@ int add_random_ready_callback(struct
> random_ready_callback *rdy) return -ENOENT;
> 
>  	spin_lock_irqsave(&random_ready_list_lock, flags);
> -	if (nonblocking_pool.initialized)
> +	if (crng_ready())
>  		goto out;
> 
>  	owner = NULL;
> @@ -1350,7 +1506,7 @@ void get_random_bytes_arch(void *buf, int nbytes)
>  	}
> 
>  	if (nbytes)
> -		extract_entropy(&nonblocking_pool, p, nbytes, 0, 0);
> +		get_random_bytes(p, nbytes);
>  }
>  EXPORT_SYMBOL(get_random_bytes_arch);
> 
> @@ -1395,7 +1551,7 @@ static int rand_initialize(void)
>  {
>  	init_std_data(&input_pool);
>  	init_std_data(&blocking_pool);
> -	init_std_data(&nonblocking_pool);
> +	_initialize_crng(&primary_crng);
>  	return 0;
>  }
>  early_initcall(rand_initialize);
> @@ -1459,16 +1615,10 @@ urandom_read(struct file *file, char __user *buf,
> size_t nbytes, loff_t *ppos) {
>  	int ret;
> 
> -	if (unlikely(nonblocking_pool.initialized == 0))
> -		printk_once(KERN_NOTICE "random: %s urandom read "
> -			    "with %d bits of entropy available\n",
> -			    current->comm, nonblocking_pool.entropy_total);
> -
> +	crng_wait_ready();
>  	nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3));
> -	ret = extract_entropy_user(&nonblocking_pool, buf, nbytes);
> -
> -	trace_urandom_read(8 * nbytes, ENTROPY_BITS(&nonblocking_pool),
> -			   ENTROPY_BITS(&input_pool));
> +	ret = extract_crng_user(buf, nbytes);
> +	trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS(&input_pool));
>  	return ret;
>  }
> 
> @@ -1514,10 +1664,7 @@ static ssize_t random_write(struct file *file, const
> char __user *buffer, {
>  	size_t ret;
> 
> -	ret = write_pool(&blocking_pool, buffer, count);
> -	if (ret)
> -		return ret;
> -	ret = write_pool(&nonblocking_pool, buffer, count);
> +	ret = write_pool(&input_pool, buffer, count);
>  	if (ret)
>  		return ret;
> 
> @@ -1568,7 +1715,6 @@ static long random_ioctl(struct file *f, unsigned int
> cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN))
>  			return -EPERM;
>  		input_pool.entropy_count = 0;
> -		nonblocking_pool.entropy_count = 0;
>  		blocking_pool.entropy_count = 0;
>  		return 0;
>  	default:
> @@ -1610,11 +1756,10 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf,
> size_t, count, if (flags & GRND_RANDOM)
>  		return _random_read(flags & GRND_NONBLOCK, buf, count);
> 
> -	if (unlikely(nonblocking_pool.initialized == 0)) {
> +	if (!crng_ready()) {
>  		if (flags & GRND_NONBLOCK)
>  			return -EAGAIN;
> -		wait_event_interruptible(urandom_init_wait,
> -					 nonblocking_pool.initialized);
> +		crng_wait_ready();
>  		if (signal_pending(current))
>  			return -ERESTARTSYS;
>  	}
> diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
> index 274bbae..20d20f68 100644
> --- a/include/crypto/chacha20.h
> +++ b/include/crypto/chacha20.h
> @@ -16,6 +16,7 @@ struct chacha20_ctx {
>  	u32 key[8];
>  };
> 
> +void chacha20_block(u32 *state, void *stream);
>  void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
>  int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
>  			   unsigned int keysize);
> diff --git a/lib/Makefile b/lib/Makefile
> index 7bd6fd4..9ba27cd 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -22,7 +22,7 @@ KCOV_INSTRUMENT_hweight.o := n
>  lib-y := ctype.o string.o vsprintf.o cmdline.o \
>  	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
>  	 idr.o int_sqrt.o extable.o \
> -	 sha1.o md5.o irq_regs.o argv_split.o \
> +	 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
>  	 proportions.o flex_proportions.o ratelimit.o show_mem.o \
>  	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
>  	 earlycpio.o seq_buf.o nmi_backtrace.o
> diff --git a/lib/chacha20.c b/lib/chacha20.c
> new file mode 100644
> index 0000000..250ceed
> --- /dev/null
> +++ b/lib/chacha20.c
> @@ -0,0 +1,79 @@
> +/*
> + * ChaCha20 256-bit cipher algorithm, RFC7539
> + *
> + * Copyright (C) 2015 Martin Willi
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/export.h>
> +#include <linux/bitops.h>
> +#include <linux/cryptohash.h>
> +#include <asm/unaligned.h>
> +#include <crypto/chacha20.h>
> +
> +static inline u32 rotl32(u32 v, u8 n)
> +{
> +	return (v << n) | (v >> (sizeof(v) * 8 - n));
> +}
> +
> +extern void chacha20_block(u32 *state, void *stream)
> +{
> +	u32 x[16], *out = stream;
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(x); i++)
> +		x[i] = state[i];
> +
> +	for (i = 0; i < 20; i += 2) {
> +		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
> +		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
> +		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
> +		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
> +
> +		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
> +		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
> +		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
> +		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
> +
> +		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
> +		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
> +		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
> +		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
> +
> +		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
> +		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
> +		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
> +		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
> +
> +		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
> +		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
> +		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
> +		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
> +
> +		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
> +		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
> +		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
> +		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
> +
> +		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
> +		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
> +		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
> +		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
> +
> +		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
> +		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
> +		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
> +		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
> +	}
> +
> +	for (i = 0; i < ARRAY_SIZE(x); i++)
> +		out[i] = cpu_to_le32(x[i] + state[i]);
> +
> +	state[12]++;
> +}
> +EXPORT_SYMBOL(chacha20_block);
Theodore Ts'o May 5, 2016, 1:10 a.m. UTC | #2
On Wed, May 04, 2016 at 10:28:24PM +0200, Stephan Mueller wrote:
> > +out:
> > +	spin_unlock_irqrestore(&primary_crng.lock, flags);
> > +	return ret;
> 
> Where did you add the memzero_explict of tmp?

Oops, sorry, somehow that change got lost in the patch updates.  Fixed now.

      	     	     	  	     - Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o May 5, 2016, 12:49 p.m. UTC | #3
On Wed, May 04, 2016 at 09:10:07PM -0400, Theodore Ts'o wrote:
> On Wed, May 04, 2016 at 10:28:24PM +0200, Stephan Mueller wrote:
> > > +out:
> > > +	spin_unlock_irqrestore(&primary_crng.lock, flags);
> > > +	return ret;
> > 
> > Where did you add the memzero_explict of tmp?
> 
> Oops, sorry, somehow that change got lost in the patch updates.  Fixed now.

Since that was the only change, instead of sending out the patch
series again, I've just updated it at:

git://git.kernel.org/pub/scm/linux/kernel/git/tytso/random.git dev


						- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
index da9c899..1cab831 100644
--- a/crypto/chacha20_generic.c
+++ b/crypto/chacha20_generic.c
@@ -15,72 +15,11 @@ 
 #include <linux/module.h>
 #include <crypto/chacha20.h>
 
-static inline u32 rotl32(u32 v, u8 n)
-{
-	return (v << n) | (v >> (sizeof(v) * 8 - n));
-}
-
 static inline u32 le32_to_cpuvp(const void *p)
 {
 	return le32_to_cpup(p);
 }
 
-static void chacha20_block(u32 *state, void *stream)
-{
-	u32 x[16], *out = stream;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(x); i++)
-		x[i] = state[i];
-
-	for (i = 0; i < 20; i += 2) {
-		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
-		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
-		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
-		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
-
-		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
-		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
-		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
-		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
-
-		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
-		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
-		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
-		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
-
-		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
-		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
-		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
-		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
-
-		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
-		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
-		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
-		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
-
-		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
-		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
-		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
-		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
-
-		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
-		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
-		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
-		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
-
-		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
-		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
-		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
-		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
-	}
-
-	for (i = 0; i < ARRAY_SIZE(x); i++)
-		out[i] = cpu_to_le32(x[i] + state[i]);
-
-	state[12]++;
-}
-
 static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
 			     unsigned int bytes)
 {
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b583e53..91d5c2a 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -260,6 +260,7 @@ 
 #include <linux/irq.h>
 #include <linux/syscalls.h>
 #include <linux/completion.h>
+#include <crypto/chacha20.h>
 
 #include <asm/processor.h>
 #include <asm/uaccess.h>
@@ -412,6 +413,18 @@  static struct fasync_struct *fasync;
 static DEFINE_SPINLOCK(random_ready_list_lock);
 static LIST_HEAD(random_ready_list);
 
+/*
+ * crng_init =  0 --> Uninitialized
+ *		2 --> Initialized
+ *		3 --> Initialized from input_pool
+ *
+ * crng_init is protected by primary_crng->lock, and only increases
+ * its value (from 0->1->2->3).
+ */
+static int crng_init = 0;
+#define crng_ready() (likely(crng_init >= 2))
+static void process_random_ready_list(void);
+
 /**********************************************************************
  *
  * OS independent entropy store.   Here are the functions which handle
@@ -441,10 +454,13 @@  struct entropy_store {
 	__u8 last_data[EXTRACT_SIZE];
 };
 
+static ssize_t extract_entropy(struct entropy_store *r, void *buf,
+			       size_t nbytes, int min, int rsvd);
+
+static int crng_reseed(struct entropy_store *r);
 static void push_to_pool(struct work_struct *work);
 static __u32 input_pool_data[INPUT_POOL_WORDS];
 static __u32 blocking_pool_data[OUTPUT_POOL_WORDS];
-static __u32 nonblocking_pool_data[OUTPUT_POOL_WORDS];
 
 static struct entropy_store input_pool = {
 	.poolinfo = &poolinfo_table[0],
@@ -465,16 +481,6 @@  static struct entropy_store blocking_pool = {
 					push_to_pool),
 };
 
-static struct entropy_store nonblocking_pool = {
-	.poolinfo = &poolinfo_table[1],
-	.name = "nonblocking",
-	.pull = &input_pool,
-	.lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
-	.pool = nonblocking_pool_data,
-	.push_work = __WORK_INITIALIZER(nonblocking_pool.push_work,
-					push_to_pool),
-};
-
 static __u32 const twist_table[8] = {
 	0x00000000, 0x3b6e20c8, 0x76dc4190, 0x4db26158,
 	0xedb88320, 0xd6d6a3e8, 0x9b64c2b0, 0xa00ae278 };
@@ -677,12 +683,6 @@  retry:
 	if (!r->initialized && r->entropy_total > 128) {
 		r->initialized = 1;
 		r->entropy_total = 0;
-		if (r == &nonblocking_pool) {
-			prandom_reseed_late();
-			process_random_ready_list();
-			wake_up_all(&urandom_init_wait);
-			pr_notice("random: %s pool is initialized\n", r->name);
-		}
 	}
 
 	trace_credit_entropy_bits(r->name, nbits,
@@ -692,30 +692,27 @@  retry:
 	if (r == &input_pool) {
 		int entropy_bits = entropy_count >> ENTROPY_SHIFT;
 
+		if (crng_init < 3 && entropy_bits >= 128) {
+			(void) crng_reseed(r);
+			entropy_bits = r->entropy_count >> ENTROPY_SHIFT;
+		}
+
 		/* should we wake readers? */
 		if (entropy_bits >= random_read_wakeup_bits) {
 			wake_up_interruptible(&random_read_wait);
 			kill_fasync(&fasync, SIGIO, POLL_IN);
 		}
 		/* If the input pool is getting full, send some
-		 * entropy to the two output pools, flipping back and
-		 * forth between them, until the output pools are 75%
-		 * full.
+		 * entropy to the blocking pool until it is 75% full.
 		 */
 		if (entropy_bits > random_write_wakeup_bits &&
 		    r->initialized &&
 		    r->entropy_total >= 2*random_read_wakeup_bits) {
-			static struct entropy_store *last = &blocking_pool;
 			struct entropy_store *other = &blocking_pool;
 
-			if (last == &blocking_pool)
-				other = &nonblocking_pool;
 			if (other->entropy_count <=
-			    3 * other->poolinfo->poolfracbits / 4)
-				last = other;
-			if (last->entropy_count <=
-			    3 * last->poolinfo->poolfracbits / 4) {
-				schedule_work(&last->push_work);
+			    3 * other->poolinfo->poolfracbits / 4) {
+				schedule_work(&other->push_work);
 				r->entropy_total = 0;
 			}
 		}
@@ -735,6 +732,156 @@  static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)
 
 /*********************************************************************
  *
+ * CRNG using CHACHA20
+ *
+ *********************************************************************/
+
+#define CRNG_RESEED_INTERVAL (300*HZ)
+
+struct crng_state {
+	__u32		state[16];
+	unsigned long	init_time;
+	spinlock_t	lock;
+};
+
+struct crng_state primary_crng = {
+	.lock = __SPIN_LOCK_UNLOCKED(primary_crng.lock),
+};
+static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
+
+static void _initialize_crng(struct crng_state *crng)
+{
+	int		i;
+	unsigned long	rv;
+
+	memcpy(&crng->state[0], "expand 32-byte k", 16);
+	for (i = 4; i < 16; i++) {
+		if (!arch_get_random_seed_long(&rv) &&
+		    !arch_get_random_long(&rv))
+			rv = random_get_entropy();
+		crng->state[i] ^= rv;
+	}
+	crng->init_time = jiffies - CRNG_RESEED_INTERVAL;
+}
+
+static void initialize_crng(struct crng_state *crng)
+{
+	_initialize_crng(crng);
+	spin_lock_init(&crng->lock);
+}
+
+static int crng_fast_load(__u32 pool[4])
+{
+	int	i;
+	__u32	*p;
+
+	if (!spin_trylock(&primary_crng.lock))
+		return 0;
+	if (crng_ready()) {
+		spin_unlock(&primary_crng.lock);
+		return 0;
+	}
+	p = &primary_crng.state[4];
+	if (crng_init == 1)
+		p += 4;
+	for (i=0; i < 4; i++)
+		*p ^= pool[i];
+	if (++crng_init >= 2) {
+		wake_up_interruptible(&crng_init_wait);
+		pr_notice("random: fast init done\n");
+	}
+	spin_unlock(&primary_crng.lock);
+	return 1;
+}
+
+/* Returns 1 on success */
+static int crng_reseed(struct entropy_store *r)
+{
+	unsigned long	flags;
+	int		ret = 0;
+	int		i, num, num_words;
+	__u32		tmp[16];
+
+	spin_lock_irqsave(&primary_crng.lock, flags);
+	num = extract_entropy(r, tmp, 32, 16, 0);
+	if (num == 0)
+		goto out;
+	BUG_ON(num < 16 || num > 32);
+	num_words = (num + 3) / 4;
+	for (i = 0; i < num_words; i++)
+		primary_crng.state[i+4] ^= tmp[i];
+	primary_crng.init_time = jiffies;
+	if (crng_init < 3) {
+		crng_init = 3;
+		process_random_ready_list();
+		wake_up_interruptible(&crng_init_wait);
+		pr_notice("random: crng init done\n");
+	}
+	ret = 1;
+out:
+	spin_unlock_irqrestore(&primary_crng.lock, flags);
+	return ret;
+}
+
+static inline void crng_wait_ready(void)
+{
+	wait_event_interruptible(crng_init_wait, crng_ready());
+}
+
+static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
+{
+	unsigned long v, flags;
+	struct crng_state *crng = &primary_crng;
+
+	if (crng_init > 2 &&
+	    time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))
+		crng_reseed(&input_pool);
+	spin_lock_irqsave(&crng->lock, flags);
+	if (arch_get_random_long(&v))
+		crng->state[14] ^= v;
+	chacha20_block(&crng->state[0], out);
+	if (crng->state[12] == 0)
+		crng->state[13]++;
+	spin_unlock_irqrestore(&crng->lock, flags);
+}
+
+static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
+{
+	ssize_t ret = 0, i;
+	__u8 tmp[CHACHA20_BLOCK_SIZE];
+	int large_request = (nbytes > 256);
+
+	while (nbytes) {
+		if (large_request && need_resched()) {
+			if (signal_pending(current)) {
+				if (ret == 0)
+					ret = -ERESTARTSYS;
+				break;
+			}
+			schedule();
+		}
+
+		extract_crng(tmp);
+		i = min_t(int, nbytes, CHACHA20_BLOCK_SIZE);
+		if (copy_to_user(buf, tmp, i)) {
+			ret = -EFAULT;
+			break;
+		}
+
+		nbytes -= i;
+		buf += i;
+		ret += i;
+	}
+
+	/* Wipe data just written to memory */
+	memzero_explicit(tmp, sizeof(tmp));
+
+	return ret;
+}
+
+
+/*********************************************************************
+ *
  * Entropy input management
  *
  *********************************************************************/
@@ -749,12 +896,12 @@  struct timer_rand_state {
 #define INIT_TIMER_RAND_STATE { INITIAL_JIFFIES, };
 
 /*
- * Add device- or boot-specific data to the input and nonblocking
- * pools to help initialize them to unique values.
+ * Add device- or boot-specific data to the input pool to help
+ * initialize it.
  *
- * None of this adds any entropy, it is meant to avoid the
- * problem of the nonblocking pool having similar initial state
- * across largely identical devices.
+ * None of this adds any entropy; it is meant to avoid the problem of
+ * the entropy pool having similar initial state across largely
+ * identical devices.
  */
 void add_device_randomness(const void *buf, unsigned int size)
 {
@@ -766,11 +913,6 @@  void add_device_randomness(const void *buf, unsigned int size)
 	_mix_pool_bytes(&input_pool, buf, size);
 	_mix_pool_bytes(&input_pool, &time, sizeof(time));
 	spin_unlock_irqrestore(&input_pool.lock, flags);
-
-	spin_lock_irqsave(&nonblocking_pool.lock, flags);
-	_mix_pool_bytes(&nonblocking_pool, buf, size);
-	_mix_pool_bytes(&nonblocking_pool, &time, sizeof(time));
-	spin_unlock_irqrestore(&nonblocking_pool.lock, flags);
 }
 EXPORT_SYMBOL(add_device_randomness);
 
@@ -801,7 +943,7 @@  static void add_timer_randomness(struct timer_rand_state *state, unsigned num)
 	sample.jiffies = jiffies;
 	sample.cycles = random_get_entropy();
 	sample.num = num;
-	r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
+	r = &input_pool;
 	mix_pool_bytes(r, &sample, sizeof(sample));
 
 	/*
@@ -921,7 +1063,13 @@  void add_interrupt_randomness(int irq, int irq_flags)
 	    !time_after(now, fast_pool->last + HZ))
 		return;
 
-	r = nonblocking_pool.initialized ? &input_pool : &nonblocking_pool;
+	if (!crng_ready() && crng_fast_load(fast_pool->pool)) {
+		fast_pool->count = 0;
+		fast_pool->last = now;
+		return;
+	}
+
+	r = &input_pool;
 	if (!spin_trylock(&r->lock))
 		return;
 
@@ -964,9 +1112,6 @@  EXPORT_SYMBOL_GPL(add_disk_randomness);
  *
  *********************************************************************/
 
-static ssize_t extract_entropy(struct entropy_store *r, void *buf,
-			       size_t nbytes, int min, int rsvd);
-
 /*
  * This utility inline function is responsible for transferring entropy
  * from the primary pool to the secondary extraction pool. We make
@@ -1252,15 +1397,26 @@  static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
  */
 void get_random_bytes(void *buf, int nbytes)
 {
+	__u8 tmp[CHACHA20_BLOCK_SIZE];
+
 #if DEBUG_RANDOM_BOOT > 0
-	if (unlikely(nonblocking_pool.initialized == 0))
+	if (!crng_ready())
 		printk(KERN_NOTICE "random: %pF get_random_bytes called "
-		       "with %d bits of entropy available\n",
-		       (void *) _RET_IP_,
-		       nonblocking_pool.entropy_total);
+		       "with crng_init = %d\n", (void *) _RET_IP_, crng_init);
 #endif
 	trace_get_random_bytes(nbytes, _RET_IP_);
-	extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
+
+	while (nbytes >= CHACHA20_BLOCK_SIZE) {
+		extract_crng(buf);
+		buf += CHACHA20_BLOCK_SIZE;
+		nbytes -= CHACHA20_BLOCK_SIZE;
+	}
+
+	if (nbytes > 0) {
+		extract_crng(tmp);
+		memcpy(buf, tmp, nbytes);
+		memzero_explicit(tmp, nbytes);
+	}
 }
 EXPORT_SYMBOL(get_random_bytes);
 
@@ -1278,7 +1434,7 @@  int add_random_ready_callback(struct random_ready_callback *rdy)
 	unsigned long flags;
 	int err = -EALREADY;
 
-	if (likely(nonblocking_pool.initialized))
+	if (crng_ready())
 		return err;
 
 	owner = rdy->owner;
@@ -1286,7 +1442,7 @@  int add_random_ready_callback(struct random_ready_callback *rdy)
 		return -ENOENT;
 
 	spin_lock_irqsave(&random_ready_list_lock, flags);
-	if (nonblocking_pool.initialized)
+	if (crng_ready())
 		goto out;
 
 	owner = NULL;
@@ -1350,7 +1506,7 @@  void get_random_bytes_arch(void *buf, int nbytes)
 	}
 
 	if (nbytes)
-		extract_entropy(&nonblocking_pool, p, nbytes, 0, 0);
+		get_random_bytes(p, nbytes);
 }
 EXPORT_SYMBOL(get_random_bytes_arch);
 
@@ -1395,7 +1551,7 @@  static int rand_initialize(void)
 {
 	init_std_data(&input_pool);
 	init_std_data(&blocking_pool);
-	init_std_data(&nonblocking_pool);
+	_initialize_crng(&primary_crng);
 	return 0;
 }
 early_initcall(rand_initialize);
@@ -1459,16 +1615,10 @@  urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 {
 	int ret;
 
-	if (unlikely(nonblocking_pool.initialized == 0))
-		printk_once(KERN_NOTICE "random: %s urandom read "
-			    "with %d bits of entropy available\n",
-			    current->comm, nonblocking_pool.entropy_total);
-
+	crng_wait_ready();
 	nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3));
-	ret = extract_entropy_user(&nonblocking_pool, buf, nbytes);
-
-	trace_urandom_read(8 * nbytes, ENTROPY_BITS(&nonblocking_pool),
-			   ENTROPY_BITS(&input_pool));
+	ret = extract_crng_user(buf, nbytes);
+	trace_urandom_read(8 * nbytes, 0, ENTROPY_BITS(&input_pool));
 	return ret;
 }
 
@@ -1514,10 +1664,7 @@  static ssize_t random_write(struct file *file, const char __user *buffer,
 {
 	size_t ret;
 
-	ret = write_pool(&blocking_pool, buffer, count);
-	if (ret)
-		return ret;
-	ret = write_pool(&nonblocking_pool, buffer, count);
+	ret = write_pool(&input_pool, buffer, count);
 	if (ret)
 		return ret;
 
@@ -1568,7 +1715,6 @@  static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
 		input_pool.entropy_count = 0;
-		nonblocking_pool.entropy_count = 0;
 		blocking_pool.entropy_count = 0;
 		return 0;
 	default:
@@ -1610,11 +1756,10 @@  SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
 	if (flags & GRND_RANDOM)
 		return _random_read(flags & GRND_NONBLOCK, buf, count);
 
-	if (unlikely(nonblocking_pool.initialized == 0)) {
+	if (!crng_ready()) {
 		if (flags & GRND_NONBLOCK)
 			return -EAGAIN;
-		wait_event_interruptible(urandom_init_wait,
-					 nonblocking_pool.initialized);
+		crng_wait_ready();
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 	}
diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
index 274bbae..20d20f68 100644
--- a/include/crypto/chacha20.h
+++ b/include/crypto/chacha20.h
@@ -16,6 +16,7 @@  struct chacha20_ctx {
 	u32 key[8];
 };
 
+void chacha20_block(u32 *state, void *stream);
 void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
 int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
 			   unsigned int keysize);
diff --git a/lib/Makefile b/lib/Makefile
index 7bd6fd4..9ba27cd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,7 @@  KCOV_INSTRUMENT_hweight.o := n
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
-	 sha1.o md5.o irq_regs.o argv_split.o \
+	 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
 	 proportions.o flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o nmi_backtrace.o
diff --git a/lib/chacha20.c b/lib/chacha20.c
new file mode 100644
index 0000000..250ceed
--- /dev/null
+++ b/lib/chacha20.c
@@ -0,0 +1,79 @@ 
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/bitops.h>
+#include <linux/cryptohash.h>
+#include <asm/unaligned.h>
+#include <crypto/chacha20.h>
+
+static inline u32 rotl32(u32 v, u8 n)
+{
+	return (v << n) | (v >> (sizeof(v) * 8 - n));
+}
+
+extern void chacha20_block(u32 *state, void *stream)
+{
+	u32 x[16], *out = stream;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(x); i++)
+		x[i] = state[i];
+
+	for (i = 0; i < 20; i += 2) {
+		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
+		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
+		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
+		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
+
+		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
+		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
+		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
+		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
+
+		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
+		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
+		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
+		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
+
+		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
+		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
+		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
+		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
+
+		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
+		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
+		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
+		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
+
+		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
+		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
+		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
+		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
+
+		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
+		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
+		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
+		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
+
+		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
+		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
+		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
+		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(x); i++)
+		out[i] = cpu_to_le32(x[i] + state[i]);
+
+	state[12]++;
+}
+EXPORT_SYMBOL(chacha20_block);