diff mbox

[1/2] hwrng: msm: add a spinlock and support for blocking reads

Message ID 1529594276-12210-1-git-send-email-timur@codeaurora.org (mailing list archive)
State Not Applicable, archived
Delegated to: Andy Gross
Headers show

Commit Message

Timur Tabi June 21, 2018, 3:17 p.m. UTC
The hwrng.read callback includes a boolean parameter called 'wait'
which indicates whether the function should block and wait for
more data.

When 'wait' is true, the driver spins on the DATA_AVAIL bit or until
a reasonable timeout.  The timeout can occur if there is a heavy load
on reading the PRNG.

The same code also needs a spinlock to protect against race conditions.

If multiple cores hammer on the PRNG, it's possible for a race
condition to occur between reading the status register and
reading the data register.  Add a spinlock to protect against
that.

1. Core 1 reads status register, shows data is available.
2. Core 2 also reads status register, same result
3. Core 2 reads data register, depleting all entropy
4. Core 1 reads data register, which returns 0

Signed-off-by: Timur Tabi <timur@codeaurora.org>
---
 drivers/char/hw_random/msm-rng.c | 57 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 7 deletions(-)

Comments

Vinod Koul June 22, 2018, 4:17 a.m. UTC | #1
On 21-06-18, 10:17, Timur Tabi wrote:
> The hwrng.read callback includes a boolean parameter called 'wait'
> which indicates whether the function should block and wait for
> more data.
> 
> When 'wait' is true, the driver spins on the DATA_AVAIL bit or until
> a reasonable timeout.  The timeout can occur if there is a heavy load
> on reading the PRNG.
> 
> The same code also needs a spinlock to protect against race conditions.
> 
> If multiple cores hammer on the PRNG, it's possible for a race
> condition to occur between reading the status register and
> reading the data register.  Add a spinlock to protect against
> that.
> 
> 1. Core 1 reads status register, shows data is available.
> 2. Core 2 also reads status register, same result
> 3. Core 2 reads data register, depleting all entropy
> 4. Core 1 reads data register, which returns 0
> 
> Signed-off-by: Timur Tabi <timur@codeaurora.org>
> ---
>  drivers/char/hw_random/msm-rng.c | 57 +++++++++++++++++++++++++++++++++++-----
>  1 file changed, 50 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/char/hw_random/msm-rng.c b/drivers/char/hw_random/msm-rng.c
> index 841fee845ec9..44580588b938 100644
> --- a/drivers/char/hw_random/msm-rng.c
> +++ b/drivers/char/hw_random/msm-rng.c
> @@ -15,9 +15,11 @@
>  #include <linux/err.h>
>  #include <linux/hw_random.h>
>  #include <linux/io.h>
> +#include <linux/iopoll.h>
>  #include <linux/module.h>
>  #include <linux/of.h>
>  #include <linux/platform_device.h>
> +#include <linux/spinlock.h>
>  
>  /* Device specific register offsets */
>  #define PRNG_DATA_OUT		0x0000
> @@ -35,10 +37,22 @@
>  #define MAX_HW_FIFO_SIZE	(MAX_HW_FIFO_DEPTH * 4)
>  #define WORD_SZ			4
>  
> +/*
> + * Normally, this would be the maximum time it takes to refill the FIFO,
> + * after a read.  Under heavy load, tests show that this delay is either
> + * below 50us or above 2200us.  The higher value is probably what happens
> + * when entropy is completely depleted.
> + *
> + * Since we don't want to wait 2ms in a spinlock, set the timeout to the
> + * lower value.  Under extreme situations, that timeout can extend to 100us.
> + */
> +#define TIMEOUT		50
> +
>  struct msm_rng {
>  	void __iomem *base;
>  	struct clk *clk;
>  	struct hwrng hwrng;
> +	spinlock_t lock;
>  };
>  
>  #define to_msm_rng(p)	container_of(p, struct msm_rng, hwrng)
> @@ -96,11 +110,39 @@ static int msm_rng_read(struct hwrng *hwrng, void *data, size_t max, bool wait)
>  
>  	/* read random data from hardware */
>  	do {
> -		val = readl_relaxed(rng->base + PRNG_STATUS);
> -		if (!(val & PRNG_STATUS_DATA_AVAIL))
> -			break;
> +		spin_lock(&rng->lock);
> +
> +		/*
> +		 * First check the status bit.  If 'wait' is true, then wait
> +		 * up to TIMEOUT microseconds for data to be available.
> +		 */
> +		if (wait) {
> +			int ret;
> +
> +			ret = readl_poll_timeout_atomic(rng->base + PRNG_STATUS,
> +				val, val & PRNG_STATUS_DATA_AVAIL, 0, TIMEOUT);
> +			if (ret) {
> +				/* Timed out */
> +				spin_unlock(&rng->lock);
> +				break;
> +			}
> +		} else {
> +			val = readl_relaxed(rng->base + PRNG_STATUS);
> +			if (!(val & PRNG_STATUS_DATA_AVAIL)) {
> +				spin_unlock(&rng->lock);
> +				break;
> +			}
> +		}
>  
>  		val = readl_relaxed(rng->base + PRNG_DATA_OUT);
> +		spin_unlock(&rng->lock);
> +
> +		/*
> +		 * Zero is technically a valid random number, but it's also
> +		 * the value returned if the PRNG is not enabled properly.
> +		 * To avoid accidentally returning all zeros, treat it as
> +		 * invalid and just return what we've already read.
> +		 */
>  		if (!val)
>  			break;
>  
> @@ -148,10 +190,11 @@ static int msm_rng_probe(struct platform_device *pdev)
>  	if (IS_ERR(rng->clk))
>  		return PTR_ERR(rng->clk);
>  
> -	rng->hwrng.name = KBUILD_MODNAME,
> -	rng->hwrng.init = msm_rng_init,
> -	rng->hwrng.cleanup = msm_rng_cleanup,
> -	rng->hwrng.read = msm_rng_read,
> +	rng->hwrng.name = KBUILD_MODNAME;
> +	rng->hwrng.init = msm_rng_init;
> +	rng->hwrng.cleanup = msm_rng_cleanup;
> +	rng->hwrng.read = msm_rng_read;

this should be a separate patch
Timur Tabi June 22, 2018, 4:18 a.m. UTC | #2
On 6/21/18 11:17 PM, Vinod wrote:
> this should be a separate patch

What exactly should be a separate patch?  This part?

-	rng->hwrng.name = KBUILD_MODNAME,
-	rng->hwrng.init = msm_rng_init,
-	rng->hwrng.cleanup = msm_rng_cleanup,
-	rng->hwrng.read = msm_rng_read,
+	rng->hwrng.name = KBUILD_MODNAME;
+	rng->hwrng.init = msm_rng_init;
+	rng->hwrng.cleanup = msm_rng_cleanup;
+	rng->hwrng.read = msm_rng_read;
Vinod Koul June 22, 2018, 4:24 a.m. UTC | #3
On 21-06-18, 23:18, Timur Tabi wrote:
> On 6/21/18 11:17 PM, Vinod wrote:
> > this should be a separate patch
> 
> What exactly should be a separate patch?  This part?
> 
> -	rng->hwrng.name = KBUILD_MODNAME,
> -	rng->hwrng.init = msm_rng_init,
> -	rng->hwrng.cleanup = msm_rng_cleanup,
> -	rng->hwrng.read = msm_rng_read,
> +	rng->hwrng.name = KBUILD_MODNAME;
> +	rng->hwrng.init = msm_rng_init;
> +	rng->hwrng.cleanup = msm_rng_cleanup;
> +	rng->hwrng.read = msm_rng_read;

yes
Timur Tabi June 22, 2018, 4:28 a.m. UTC | #4
On 6/21/18 11:24 PM, Vinod wrote:
> On 21-06-18, 23:18, Timur Tabi wrote:
>> On 6/21/18 11:17 PM, Vinod wrote:
>>> this should be a separate patch
>> What exactly should be a separate patch?  This part?
>>
>> -	rng->hwrng.name = KBUILD_MODNAME,
>> -	rng->hwrng.init = msm_rng_init,
>> -	rng->hwrng.cleanup = msm_rng_cleanup,
>> -	rng->hwrng.read = msm_rng_read,
>> +	rng->hwrng.name = KBUILD_MODNAME;
>> +	rng->hwrng.init = msm_rng_init;
>> +	rng->hwrng.cleanup = msm_rng_cleanup;
>> +	rng->hwrng.read = msm_rng_read;
> yes

II consider this to be a minor clean-up that's not worthy of its own 
patch, but I can make this its own patch if you really want.
Stephen Boyd June 22, 2018, 5:36 a.m. UTC | #5
Quoting Timur Tabi (2018-06-21 08:17:55)
> @@ -96,11 +110,39 @@ static int msm_rng_read(struct hwrng *hwrng, void *data, size_t max, bool wait)
>  
>         /* read random data from hardware */
>         do {
> -               val = readl_relaxed(rng->base + PRNG_STATUS);
> -               if (!(val & PRNG_STATUS_DATA_AVAIL))
> -                       break;
> +               spin_lock(&rng->lock);
> +
> +               /*
> +                * First check the status bit.  If 'wait' is true, then wait
> +                * up to TIMEOUT microseconds for data to be available.
> +                */
> +               if (wait) {
> +                       int ret;

Please don't do local variables like this. Put them at the top of the
function.

> +
> +                       ret = readl_poll_timeout_atomic(rng->base + PRNG_STATUS,
> +                               val, val & PRNG_STATUS_DATA_AVAIL, 0, TIMEOUT);
> +                       if (ret) {
> +                               /* Timed out */
> +                               spin_unlock(&rng->lock);
> +                               break;
> +                       }
> +               } else {
> +                       val = readl_relaxed(rng->base + PRNG_STATUS);
> +                       if (!(val & PRNG_STATUS_DATA_AVAIL)) {
> +                               spin_unlock(&rng->lock);
> +                               break;
> +                       }
> +               }
>  
>                 val = readl_relaxed(rng->base + PRNG_DATA_OUT);
> +               spin_unlock(&rng->lock);

Maybe this should be written as:

		spin_lock()
		if (wait) {
			has_data = readl_poll_timeout_atomic(...) == 0;
		} else {
			val = readl_relaxed(rng->base + PRNG_STATUS);
			has_data = val & PRNG_STATUS_DATA_AVAIL;
		}

		if (has_data)
			val = readl_relaxed(rng->base + PRNG_DATA_OUT);
		spin_unlock();

		if (!has_data)
			break;

> +
> +               /*
> +                * Zero is technically a valid random number, but it's also
> +                * the value returned if the PRNG is not enabled properly.
> +                * To avoid accidentally returning all zeros, treat it as
> +                * invalid and just return what we've already read.
> +                */
>                 if (!val)
>                         break;

Is this a related change? Looks like a nice comment that isn't related
to locking.

--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Timur Tabi June 22, 2018, 1:11 p.m. UTC | #6
On 6/22/18 12:36 AM, Stephen Boyd wrote:
> Quoting Timur Tabi (2018-06-21 08:17:55)
>> @@ -96,11 +110,39 @@ static int msm_rng_read(struct hwrng *hwrng, void *data, size_t max, bool wait)
>>   
>>          /* read random data from hardware */
>>          do {
>> -               val = readl_relaxed(rng->base + PRNG_STATUS);
>> -               if (!(val & PRNG_STATUS_DATA_AVAIL))
>> -                       break;
>> +               spin_lock(&rng->lock);
>> +
>> +               /*
>> +                * First check the status bit.  If 'wait' is true, then wait
>> +                * up to TIMEOUT microseconds for data to be available.
>> +                */
>> +               if (wait) {
>> +                       int ret;
> Please don't do local variables like this. Put them at the top of the
> function.

Ok.

> 
>> +
>> +                       ret = readl_poll_timeout_atomic(rng->base + PRNG_STATUS,
>> +                               val, val & PRNG_STATUS_DATA_AVAIL, 0, TIMEOUT);
>> +                       if (ret) {
>> +                               /* Timed out */
>> +                               spin_unlock(&rng->lock);
>> +                               break;
>> +                       }
>> +               } else {
>> +                       val = readl_relaxed(rng->base + PRNG_STATUS);
>> +                       if (!(val & PRNG_STATUS_DATA_AVAIL)) {
>> +                               spin_unlock(&rng->lock);
>> +                               break;
>> +                       }
>> +               }
>>   
>>                  val = readl_relaxed(rng->base + PRNG_DATA_OUT);
>> +               spin_unlock(&rng->lock);
> Maybe this should be written as:
> 
> 		spin_lock()
> 		if (wait) {
> 			has_data = readl_poll_timeout_atomic(...) == 0;
> 		} else {
> 			val = readl_relaxed(rng->base + PRNG_STATUS);
> 			has_data = val & PRNG_STATUS_DATA_AVAIL;
> 		}
> 
> 		if (has_data)
> 			val = readl_relaxed(rng->base + PRNG_DATA_OUT);
> 		spin_unlock();
> 
> 		if (!has_data)
> 			break;

That would work, but I don't really see this as better, just different.

>> +               /*
>> +                * Zero is technically a valid random number, but it's also
>> +                * the value returned if the PRNG is not enabled properly.
>> +                * To avoid accidentally returning all zeros, treat it as
>> +                * invalid and just return what we've already read.
>> +                */
>>                  if (!val)
>>                          break;
> Is this a related change? Looks like a nice comment that isn't related
> to locking.

It's slightly related in that the locking is needed to reduce the number 
of times we read zero from the DATA_OUT register.
Stanimir Varbanov June 22, 2018, 3:38 p.m. UTC | #7
Hi,

On 06/21/2018 06:17 PM, Timur Tabi wrote:
> The hwrng.read callback includes a boolean parameter called 'wait'
> which indicates whether the function should block and wait for
> more data.
> 
> When 'wait' is true, the driver spins on the DATA_AVAIL bit or until
> a reasonable timeout.  The timeout can occur if there is a heavy load
> on reading the PRNG.
> 
> The same code also needs a spinlock to protect against race conditions.
> 
> If multiple cores hammer on the PRNG, it's possible for a race
> condition to occur between reading the status register and
> reading the data register.  Add a spinlock to protect against
> that.

Before entering into the read function we already hold a mutex which
serializes data reading so I cannot imagine how below sequence could
happen. Can you explain how to reproduce this race?

> 
> 1. Core 1 reads status register, shows data is available.
> 2. Core 2 also reads status register, same result
> 3. Core 2 reads data register, depleting all entropy
> 4. Core 1 reads data register, which returns 0
> 
> Signed-off-by: Timur Tabi <timur@codeaurora.org>
> ---
>  drivers/char/hw_random/msm-rng.c | 57 +++++++++++++++++++++++++++++++++++-----
>  1 file changed, 50 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/char/hw_random/msm-rng.c b/drivers/char/hw_random/msm-rng.c
> index 841fee845ec9..44580588b938 100644
> --- a/drivers/char/hw_random/msm-rng.c
> +++ b/drivers/char/hw_random/msm-rng.c
> @@ -15,9 +15,11 @@
Timur Tabi June 22, 2018, 3:41 p.m. UTC | #8
On 6/22/18 10:38 AM, Stanimir Varbanov wrote:
> Before entering into the read function we already hold a mutex which
> serializes data reading so I cannot imagine how below sequence could
> happen. Can you explain how to reproduce this race?
> 
>> 1. Core 1 reads status register, shows data is available.
>> 2. Core 2 also reads status register, same result
>> 3. Core 2 reads data register, depleting all entropy
>> 4. Core 1 reads data register, which returns 0

I have a test which spawns 100 copies of rngtest on a 48-core machine. 
Without the spinlock, the driver returns no data much more often.

If there really is a mutex that serializes data reads across all cores, 
then I don't have an explanation.
Stephen Boyd June 22, 2018, 5:51 p.m. UTC | #9
Quoting Timur Tabi (2018-06-22 08:41:11)
> On 6/22/18 10:38 AM, Stanimir Varbanov wrote:
> > Before entering into the read function we already hold a mutex which
> > serializes data reading so I cannot imagine how below sequence could
> > happen. Can you explain how to reproduce this race?
> > 
> >> 1. Core 1 reads status register, shows data is available.
> >> 2. Core 2 also reads status register, same result
> >> 3. Core 2 reads data register, depleting all entropy
> >> 4. Core 1 reads data register, which returns 0
> 
> I have a test which spawns 100 copies of rngtest on a 48-core machine. 
> Without the spinlock, the driver returns no data much more often.
> 
> If there really is a mutex that serializes data reads across all cores, 
> then I don't have an explanation.
> 

Perhaps it's because you implemented the 'wait' functionality in this
driver? Before the patch there wasn't any sort of wait check so we would
bail out if there wasn't any data even if the caller requested that we
wait for randomness to be available.

--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Timur Tabi June 22, 2018, 6:03 p.m. UTC | #10
On 6/22/18 12:51 PM, Stephen Boyd wrote:
> Perhaps it's because you implemented the 'wait' functionality in this
> driver? Before the patch there wasn't any sort of wait check so we would
> bail out if there wasn't any data even if the caller requested that we
> wait for randomness to be available.

No, my tests showed the race condition (or at least something that looks 
like a race condition) even without the 'wait' feature.  I added support 
for 'wait' only recently, but I've been working with the crypto people 
for a month on everything else.
Timur Tabi June 22, 2018, 9:17 p.m. UTC | #11
On 06/22/2018 01:03 PM, Timur Tabi wrote:
> 
>> Perhaps it's because you implemented the 'wait' functionality in this
>> driver? Before the patch there wasn't any sort of wait check so we would
>> bail out if there wasn't any data even if the caller requested that we
>> wait for randomness to be available.
> 
> No, my tests showed the race condition (or at least something that looks 
> like a race condition) even without the 'wait' feature.  I added support 
> for 'wait' only recently, but I've been working with the crypto people 
> for a month on everything else.

Looks like I was wrong.

I created some new tests to reproduce the problem, but I can't reproduce 
it any more.  I can only assume that what I saw as a race condition back 
then was something else entirely.

So all of the spinlock code needs to go.  It looks like at this point, 
if Vinod can add support for QCOM8160 to his patches, the only thing I 
can contribute is support for 'wait'.
diff mbox

Patch

diff --git a/drivers/char/hw_random/msm-rng.c b/drivers/char/hw_random/msm-rng.c
index 841fee845ec9..44580588b938 100644
--- a/drivers/char/hw_random/msm-rng.c
+++ b/drivers/char/hw_random/msm-rng.c
@@ -15,9 +15,11 @@ 
 #include <linux/err.h>
 #include <linux/hw_random.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/spinlock.h>
 
 /* Device specific register offsets */
 #define PRNG_DATA_OUT		0x0000
@@ -35,10 +37,22 @@ 
 #define MAX_HW_FIFO_SIZE	(MAX_HW_FIFO_DEPTH * 4)
 #define WORD_SZ			4
 
+/*
+ * Normally, this would be the maximum time it takes to refill the FIFO,
+ * after a read.  Under heavy load, tests show that this delay is either
+ * below 50us or above 2200us.  The higher value is probably what happens
+ * when entropy is completely depleted.
+ *
+ * Since we don't want to wait 2ms in a spinlock, set the timeout to the
+ * lower value.  Under extreme situations, that timeout can extend to 100us.
+ */
+#define TIMEOUT		50
+
 struct msm_rng {
 	void __iomem *base;
 	struct clk *clk;
 	struct hwrng hwrng;
+	spinlock_t lock;
 };
 
 #define to_msm_rng(p)	container_of(p, struct msm_rng, hwrng)
@@ -96,11 +110,39 @@  static int msm_rng_read(struct hwrng *hwrng, void *data, size_t max, bool wait)
 
 	/* read random data from hardware */
 	do {
-		val = readl_relaxed(rng->base + PRNG_STATUS);
-		if (!(val & PRNG_STATUS_DATA_AVAIL))
-			break;
+		spin_lock(&rng->lock);
+
+		/*
+		 * First check the status bit.  If 'wait' is true, then wait
+		 * up to TIMEOUT microseconds for data to be available.
+		 */
+		if (wait) {
+			int ret;
+
+			ret = readl_poll_timeout_atomic(rng->base + PRNG_STATUS,
+				val, val & PRNG_STATUS_DATA_AVAIL, 0, TIMEOUT);
+			if (ret) {
+				/* Timed out */
+				spin_unlock(&rng->lock);
+				break;
+			}
+		} else {
+			val = readl_relaxed(rng->base + PRNG_STATUS);
+			if (!(val & PRNG_STATUS_DATA_AVAIL)) {
+				spin_unlock(&rng->lock);
+				break;
+			}
+		}
 
 		val = readl_relaxed(rng->base + PRNG_DATA_OUT);
+		spin_unlock(&rng->lock);
+
+		/*
+		 * Zero is technically a valid random number, but it's also
+		 * the value returned if the PRNG is not enabled properly.
+		 * To avoid accidentally returning all zeros, treat it as
+		 * invalid and just return what we've already read.
+		 */
 		if (!val)
 			break;
 
@@ -148,10 +190,11 @@  static int msm_rng_probe(struct platform_device *pdev)
 	if (IS_ERR(rng->clk))
 		return PTR_ERR(rng->clk);
 
-	rng->hwrng.name = KBUILD_MODNAME,
-	rng->hwrng.init = msm_rng_init,
-	rng->hwrng.cleanup = msm_rng_cleanup,
-	rng->hwrng.read = msm_rng_read,
+	rng->hwrng.name = KBUILD_MODNAME;
+	rng->hwrng.init = msm_rng_init;
+	rng->hwrng.cleanup = msm_rng_cleanup;
+	rng->hwrng.read = msm_rng_read;
+	spin_lock_init(&rng->lock);
 
 	ret = devm_hwrng_register(&pdev->dev, &rng->hwrng);
 	if (ret) {