diff mbox series

[v2,2/2] iio: pressure: dps310: Reset chip if MEAS_CFG is corrupt

Message ID 20220518144818.12957-3-eajames@linux.ibm.com (mailing list archive)
State Changes Requested
Headers show
Series iio: pressure: dps310: Reset chip if MEAS_CFG is corrupt | expand

Commit Message

Eddie James May 18, 2022, 2:48 p.m. UTC
Corruption of the MEAS_CFG register has been observed soon after
system boot. In order to recover this scenario, check MEAS_CFG if
measurement isn't ready, and if it's incorrect, reset the DPS310
and execute the startup procedure.

Fixes: ba6ec48e76bc ("iio: Add driver for Infineon DPS310")
Signed-off-by: Eddie James <eajames@linux.ibm.com>
---
 drivers/iio/pressure/dps310.c | 89 ++++++++++++++++++++++++++++-------
 1 file changed, 71 insertions(+), 18 deletions(-)

Comments

Joel Stanley May 24, 2022, 2:12 a.m. UTC | #1
On Wed, 18 May 2022 at 14:48, Eddie James <eajames@linux.ibm.com> wrote:
>
> Corruption of the MEAS_CFG register has been observed soon after
> system boot. In order to recover this scenario, check MEAS_CFG if
> measurement isn't ready, and if it's incorrect, reset the DPS310
> and execute the startup procedure.

I have some suggestions below on how to rework to make the code easier
to understand. But before we got to that, I had some high level
questions:


You don't seem to be setting the en bits in the CFG register after
doing the reset. Is that required?

Are we ok to sleep for 2.5ms in the iio_info->read_raw callback?


>
> Fixes: ba6ec48e76bc ("iio: Add driver for Infineon DPS310")
> Signed-off-by: Eddie James <eajames@linux.ibm.com>
> ---
>  drivers/iio/pressure/dps310.c | 89 ++++++++++++++++++++++++++++-------
>  1 file changed, 71 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c
> index f79b274bb38d..c6d02679ef33 100644
> --- a/drivers/iio/pressure/dps310.c
> +++ b/drivers/iio/pressure/dps310.c
> @@ -397,6 +397,39 @@ static int dps310_get_temp_k(struct dps310_data *data)
>         return scale_factors[ilog2(rc)];
>  }
>
> +/* Called with lock held */

Perhaps add this to your comment: Returns a negative value on error, a
positive value when the device is not ready (and may have been reset
due to corruption), and zero when the device is ready.

> +static int dps310_check_reset_meas_cfg(struct dps310_data *data, int ready_bit)
> +{
> +       int en = DPS310_PRS_EN | DPS310_TEMP_EN | DPS310_BACKGROUND;
> +       int meas_cfg;
> +       int rc = regmap_read(data->regmap, DPS310_MEAS_CFG, &meas_cfg);
> +
> +       if (rc < 0)
> +               return rc;
> +
> +       if (meas_cfg & ready_bit)
> +               return 0;
> +
> +       if ((meas_cfg & en) != en) {
> +               /* DPS310 register state corrupt, better start from scratch */
> +               rc = regmap_write(data->regmap, DPS310_RESET,
> +                                 DPS310_RESET_MAGIC);
> +               if (rc < 0)
> +                       return rc;
> +
> +               /* Wait for device chip access: 2.5ms in specification */
> +               usleep_range(2500, 12000);
> +               rc = dps310_startup(data);
> +               if (rc)
> +                       return rc;
> +
> +               dev_info(&data->client->dev,
> +                        "recovered from corrupted MEAS_CFG=%02x\n", meas_cfg);
> +       }
> +
> +       return 1;

I'm confused about this case. We get there when the device doesn't
have ready_bit set in meas_cfg and we've done a reset, but we also get
here when the bit isn't set and we haven't done anything to resolve it
(after re-reading the code I understand now, but perhaps reworking it
as follows will make it clear):

Could we write it like this:

if (meas_cfg & ready_bit) {
  /* Device ready, must be okay */
  return 0;
}

 if (meas_cfg & en) {
   /* Device okay (but not ready), no action required */
   return 1;
}

  /* DPS310 register state corrupt, better start from scratch */
...
 return 1;


> +}
> +
>  static int dps310_read_pres_raw(struct dps310_data *data)
>  {
>         int rc;
> @@ -409,15 +442,25 @@ static int dps310_read_pres_raw(struct dps310_data *data)
>         if (mutex_lock_interruptible(&data->lock))
>                 return -EINTR;
>
> -       rate = dps310_get_pres_samp_freq(data);
> -       timeout = DPS310_POLL_TIMEOUT_US(rate);
> -
> -       /* Poll for sensor readiness; base the timeout upon the sample rate. */
> -       rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
> -                                     ready & DPS310_PRS_RDY,
> -                                     DPS310_POLL_SLEEP_US(timeout), timeout);
> -       if (rc)
> -               goto done;
> +       rc = dps310_check_reset_meas_cfg(data, DPS310_PRS_RDY);

can we do this:

 if (rc < 0)
   goto done;

if (rc > 0) {

}

The rework I suggest makes it clearer that we've considered the '0'
case, when the device is ready before this code runs.

> +       if (rc) {
> +               if (rc < 0)
> +                       goto done;
> +
> +               rate = dps310_get_pres_samp_freq(data);
> +               timeout = DPS310_POLL_TIMEOUT_US(rate);
> +
> +               /*
> +                * Poll for sensor readiness; base the timeout upon the sample
> +                * rate.
> +                */
> +               rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG,
> +                                             ready, ready & DPS310_PRS_RDY,
> +                                             DPS310_POLL_SLEEP_US(timeout),
> +                                             timeout);
> +               if (rc)
> +                       goto done;
> +       }
>
>         rc = regmap_bulk_read(data->regmap, DPS310_PRS_BASE, val, sizeof(val));
>         if (rc < 0)
> @@ -458,15 +501,25 @@ static int dps310_read_temp_raw(struct dps310_data *data)
>         if (mutex_lock_interruptible(&data->lock))
>                 return -EINTR;
>
> -       rate = dps310_get_temp_samp_freq(data);
> -       timeout = DPS310_POLL_TIMEOUT_US(rate);
> -
> -       /* Poll for sensor readiness; base the timeout upon the sample rate. */
> -       rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
> -                                     ready & DPS310_TMP_RDY,
> -                                     DPS310_POLL_SLEEP_US(timeout), timeout);
> -       if (rc < 0)
> -               goto done;
> +       rc = dps310_check_reset_meas_cfg(data, DPS310_TMP_RDY);
> +       if (rc) {
> +               if (rc < 0)
> +                       goto done;
> +
> +               rate = dps310_get_temp_samp_freq(data);
> +               timeout = DPS310_POLL_TIMEOUT_US(rate);
> +
> +               /*
> +                * Poll for sensor readiness; base the timeout upon the sample
> +                * rate.
> +                */
> +               rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG,
> +                                             ready, ready & DPS310_TMP_RDY,
> +                                             DPS310_POLL_SLEEP_US(timeout),
> +                                             timeout);
> +               if (rc < 0)
> +                       goto done;
> +       }
>
>         rc = dps310_read_temp_ready(data);
>
> --
> 2.27.0
>
Eddie James May 24, 2022, 2:18 p.m. UTC | #2
On 5/23/22 21:12, Joel Stanley wrote:
> On Wed, 18 May 2022 at 14:48, Eddie James <eajames@linux.ibm.com> wrote:
>> Corruption of the MEAS_CFG register has been observed soon after
>> system boot. In order to recover this scenario, check MEAS_CFG if
>> measurement isn't ready, and if it's incorrect, reset the DPS310
>> and execute the startup procedure.
> I have some suggestions below on how to rework to make the code easier
> to understand. But before we got to that, I had some high level
> questions:
>
>
> You don't seem to be setting the en bits in the CFG register after
> doing the reset. Is that required?


It does set the enable bits in the startup procedure, called after the 
reset.


>
> Are we ok to sleep for 2.5ms in the iio_info->read_raw callback?


I believe it's safe... the code already has a mutex, so its not called 
in atomic context.


>
>
>> Fixes: ba6ec48e76bc ("iio: Add driver for Infineon DPS310")
>> Signed-off-by: Eddie James <eajames@linux.ibm.com>
>> ---
>>   drivers/iio/pressure/dps310.c | 89 ++++++++++++++++++++++++++++-------
>>   1 file changed, 71 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c
>> index f79b274bb38d..c6d02679ef33 100644
>> --- a/drivers/iio/pressure/dps310.c
>> +++ b/drivers/iio/pressure/dps310.c
>> @@ -397,6 +397,39 @@ static int dps310_get_temp_k(struct dps310_data *data)
>>          return scale_factors[ilog2(rc)];
>>   }
>>
>> +/* Called with lock held */
> Perhaps add this to your comment: Returns a negative value on error, a
> positive value when the device is not ready (and may have been reset
> due to corruption), and zero when the device is ready.


Good idea.


>
>> +static int dps310_check_reset_meas_cfg(struct dps310_data *data, int ready_bit)
>> +{
>> +       int en = DPS310_PRS_EN | DPS310_TEMP_EN | DPS310_BACKGROUND;
>> +       int meas_cfg;
>> +       int rc = regmap_read(data->regmap, DPS310_MEAS_CFG, &meas_cfg);
>> +
>> +       if (rc < 0)
>> +               return rc;
>> +
>> +       if (meas_cfg & ready_bit)
>> +               return 0;
>> +
>> +       if ((meas_cfg & en) != en) {
>> +               /* DPS310 register state corrupt, better start from scratch */
>> +               rc = regmap_write(data->regmap, DPS310_RESET,
>> +                                 DPS310_RESET_MAGIC);
>> +               if (rc < 0)
>> +                       return rc;
>> +
>> +               /* Wait for device chip access: 2.5ms in specification */
>> +               usleep_range(2500, 12000);
>> +               rc = dps310_startup(data);
>> +               if (rc)
>> +                       return rc;
>> +
>> +               dev_info(&data->client->dev,
>> +                        "recovered from corrupted MEAS_CFG=%02x\n", meas_cfg);
>> +       }
>> +
>> +       return 1;
> I'm confused about this case. We get there when the device doesn't
> have ready_bit set in meas_cfg and we've done a reset, but we also get
> here when the bit isn't set and we haven't done anything to resolve it
> (after re-reading the code I understand now, but perhaps reworking it
> as follows will make it clear):
>
> Could we write it like this:
>
> if (meas_cfg & ready_bit) {
>    /* Device ready, must be okay */
>    return 0;
> }
>
>   if (meas_cfg & en) {
>     /* Device okay (but not ready), no action required */
>     return 1;
> }
>
>    /* DPS310 register state corrupt, better start from scratch */
> ...
>   return 1;


Yea it could be clearer, I can update that.


>
>
>> +}
>> +
>>   static int dps310_read_pres_raw(struct dps310_data *data)
>>   {
>>          int rc;
>> @@ -409,15 +442,25 @@ static int dps310_read_pres_raw(struct dps310_data *data)
>>          if (mutex_lock_interruptible(&data->lock))
>>                  return -EINTR;
>>
>> -       rate = dps310_get_pres_samp_freq(data);
>> -       timeout = DPS310_POLL_TIMEOUT_US(rate);
>> -
>> -       /* Poll for sensor readiness; base the timeout upon the sample rate. */
>> -       rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
>> -                                     ready & DPS310_PRS_RDY,
>> -                                     DPS310_POLL_SLEEP_US(timeout), timeout);
>> -       if (rc)
>> -               goto done;
>> +       rc = dps310_check_reset_meas_cfg(data, DPS310_PRS_RDY);
> can we do this:
>
>   if (rc < 0)
>     goto done;
>
> if (rc > 0) {
>
> }
>
> The rework I suggest makes it clearer that we've considered the '0'
> case, when the device is ready before this code runs.


Sure. Thanks for the review, I'll get a v3 up.


Thanks,

Eddie


>
>> +       if (rc) {
>> +               if (rc < 0)
>> +                       goto done;
>> +
>> +               rate = dps310_get_pres_samp_freq(data);
>> +               timeout = DPS310_POLL_TIMEOUT_US(rate);
>> +
>> +               /*
>> +                * Poll for sensor readiness; base the timeout upon the sample
>> +                * rate.
>> +                */
>> +               rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG,
>> +                                             ready, ready & DPS310_PRS_RDY,
>> +                                             DPS310_POLL_SLEEP_US(timeout),
>> +                                             timeout);
>> +               if (rc)
>> +                       goto done;
>> +       }
>>
>>          rc = regmap_bulk_read(data->regmap, DPS310_PRS_BASE, val, sizeof(val));
>>          if (rc < 0)
>> @@ -458,15 +501,25 @@ static int dps310_read_temp_raw(struct dps310_data *data)
>>          if (mutex_lock_interruptible(&data->lock))
>>                  return -EINTR;
>>
>> -       rate = dps310_get_temp_samp_freq(data);
>> -       timeout = DPS310_POLL_TIMEOUT_US(rate);
>> -
>> -       /* Poll for sensor readiness; base the timeout upon the sample rate. */
>> -       rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
>> -                                     ready & DPS310_TMP_RDY,
>> -                                     DPS310_POLL_SLEEP_US(timeout), timeout);
>> -       if (rc < 0)
>> -               goto done;
>> +       rc = dps310_check_reset_meas_cfg(data, DPS310_TMP_RDY);
>> +       if (rc) {
>> +               if (rc < 0)
>> +                       goto done;
>> +
>> +               rate = dps310_get_temp_samp_freq(data);
>> +               timeout = DPS310_POLL_TIMEOUT_US(rate);
>> +
>> +               /*
>> +                * Poll for sensor readiness; base the timeout upon the sample
>> +                * rate.
>> +                */
>> +               rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG,
>> +                                             ready, ready & DPS310_TMP_RDY,
>> +                                             DPS310_POLL_SLEEP_US(timeout),
>> +                                             timeout);
>> +               if (rc < 0)
>> +                       goto done;
>> +       }
>>
>>          rc = dps310_read_temp_ready(data);
>>
>> --
>> 2.27.0
>>
diff mbox series

Patch

diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c
index f79b274bb38d..c6d02679ef33 100644
--- a/drivers/iio/pressure/dps310.c
+++ b/drivers/iio/pressure/dps310.c
@@ -397,6 +397,39 @@  static int dps310_get_temp_k(struct dps310_data *data)
 	return scale_factors[ilog2(rc)];
 }
 
+/* Called with lock held */
+static int dps310_check_reset_meas_cfg(struct dps310_data *data, int ready_bit)
+{
+	int en = DPS310_PRS_EN | DPS310_TEMP_EN | DPS310_BACKGROUND;
+	int meas_cfg;
+	int rc = regmap_read(data->regmap, DPS310_MEAS_CFG, &meas_cfg);
+
+	if (rc < 0)
+		return rc;
+
+	if (meas_cfg & ready_bit)
+		return 0;
+
+	if ((meas_cfg & en) != en) {
+		/* DPS310 register state corrupt, better start from scratch */
+		rc = regmap_write(data->regmap, DPS310_RESET,
+				  DPS310_RESET_MAGIC);
+		if (rc < 0)
+			return rc;
+
+		/* Wait for device chip access: 2.5ms in specification */
+		usleep_range(2500, 12000);
+		rc = dps310_startup(data);
+		if (rc)
+			return rc;
+
+		dev_info(&data->client->dev,
+			 "recovered from corrupted MEAS_CFG=%02x\n", meas_cfg);
+	}
+
+	return 1;
+}
+
 static int dps310_read_pres_raw(struct dps310_data *data)
 {
 	int rc;
@@ -409,15 +442,25 @@  static int dps310_read_pres_raw(struct dps310_data *data)
 	if (mutex_lock_interruptible(&data->lock))
 		return -EINTR;
 
-	rate = dps310_get_pres_samp_freq(data);
-	timeout = DPS310_POLL_TIMEOUT_US(rate);
-
-	/* Poll for sensor readiness; base the timeout upon the sample rate. */
-	rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
-				      ready & DPS310_PRS_RDY,
-				      DPS310_POLL_SLEEP_US(timeout), timeout);
-	if (rc)
-		goto done;
+	rc = dps310_check_reset_meas_cfg(data, DPS310_PRS_RDY);
+	if (rc) {
+		if (rc < 0)
+			goto done;
+
+		rate = dps310_get_pres_samp_freq(data);
+		timeout = DPS310_POLL_TIMEOUT_US(rate);
+
+		/*
+		 * Poll for sensor readiness; base the timeout upon the sample
+		 * rate.
+		 */
+		rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG,
+					      ready, ready & DPS310_PRS_RDY,
+					      DPS310_POLL_SLEEP_US(timeout),
+					      timeout);
+		if (rc)
+			goto done;
+	}
 
 	rc = regmap_bulk_read(data->regmap, DPS310_PRS_BASE, val, sizeof(val));
 	if (rc < 0)
@@ -458,15 +501,25 @@  static int dps310_read_temp_raw(struct dps310_data *data)
 	if (mutex_lock_interruptible(&data->lock))
 		return -EINTR;
 
-	rate = dps310_get_temp_samp_freq(data);
-	timeout = DPS310_POLL_TIMEOUT_US(rate);
-
-	/* Poll for sensor readiness; base the timeout upon the sample rate. */
-	rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG, ready,
-				      ready & DPS310_TMP_RDY,
-				      DPS310_POLL_SLEEP_US(timeout), timeout);
-	if (rc < 0)
-		goto done;
+	rc = dps310_check_reset_meas_cfg(data, DPS310_TMP_RDY);
+	if (rc) {
+		if (rc < 0)
+			goto done;
+
+		rate = dps310_get_temp_samp_freq(data);
+		timeout = DPS310_POLL_TIMEOUT_US(rate);
+
+		/*
+		 * Poll for sensor readiness; base the timeout upon the sample
+		 * rate.
+		 */
+		rc = regmap_read_poll_timeout(data->regmap, DPS310_MEAS_CFG,
+					      ready, ready & DPS310_TMP_RDY,
+					      DPS310_POLL_SLEEP_US(timeout),
+					      timeout);
+		if (rc < 0)
+			goto done;
+	}
 
 	rc = dps310_read_temp_ready(data);