diff mbox series

tpm, tpm_tis: Workaround failed command reception on Infineon devices

Message ID Z8lkSKOqBgt78pU2@earth.li (mailing list archive)
State New
Headers show
Series tpm, tpm_tis: Workaround failed command reception on Infineon devices | expand

Commit Message

Jonathan McDowell March 6, 2025, 9 a.m. UTC
From: Jonathan McDowell <noodles@meta.com>

Some Infineon devices have a issue where the status register will get
stuck with a quick REQUEST_USE / COMMAND_READY sequence. This is not
simply a matter of requiring a longer timeout; the work around is to
retry the command submission. Add appropriate logic to do this in the
send path.

This is fixed in later firmware revisions, but those are not always
available, and cannot generally be easily updated from outside a
firmware environment.

Testing has been performed with a simple repeated loop of doing a
TPM2_CC_GET_CAPABILITY for TPM_CAP_PROP_MANUFACTURER using the Go code
at:

 https://the.earth.li/~noodles/tpm-stuff/timeout-reproducer-simple.go

It can take several hours to reproduce, and millions of operations.

Signed-off-by: Jonathan McDowell <noodles@meta.com>
---
 drivers/char/tpm/tpm_tis_core.c | 17 ++++++++++++++---
 drivers/char/tpm/tpm_tis_core.h |  1 +
 include/linux/tpm.h             |  1 +
 3 files changed, 16 insertions(+), 3 deletions(-)

Comments

Jarkko Sakkinen March 6, 2025, 10:23 p.m. UTC | #1
On Thu, Mar 06, 2025 at 09:00:56AM +0000, Jonathan McDowell wrote:
> From: Jonathan McDowell <noodles@meta.com>
> 
> Some Infineon devices have a issue where the status register will get
> stuck with a quick REQUEST_USE / COMMAND_READY sequence. This is not
> simply a matter of requiring a longer timeout; the work around is to
> retry the command submission. Add appropriate logic to do this in the
> send path.
> 
> This is fixed in later firmware revisions, but those are not always
> available, and cannot generally be easily updated from outside a
> firmware environment.
> 
> Testing has been performed with a simple repeated loop of doing a
> TPM2_CC_GET_CAPABILITY for TPM_CAP_PROP_MANUFACTURER using the Go code
> at:
> 
>  https://the.earth.li/~noodles/tpm-stuff/timeout-reproducer-simple.go
> 
> It can take several hours to reproduce, and millions of operations.
> 
> Signed-off-by: Jonathan McDowell <noodles@meta.com>
> ---
>  drivers/char/tpm/tpm_tis_core.c | 17 ++++++++++++++---
>  drivers/char/tpm/tpm_tis_core.h |  1 +
>  include/linux/tpm.h             |  1 +
>  3 files changed, 16 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
> index 167d71747666..e4eae206a353 100644
> --- a/drivers/char/tpm/tpm_tis_core.c
> +++ b/drivers/char/tpm/tpm_tis_core.c
> @@ -464,7 +464,10 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
>  
>  		if (wait_for_tpm_stat(chip, TPM_STS_VALID, chip->timeout_c,
>  					&priv->int_queue, false) < 0) {
> -			rc = -ETIME;
> +			if (test_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags))
> +				rc = -EAGAIN;
> +			else
> +				rc = -ETIME;
>  			goto out_err;
>  		}
>  		status = tpm_tis_status(chip);
> @@ -481,7 +484,10 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
>  
>  	if (wait_for_tpm_stat(chip, TPM_STS_VALID, chip->timeout_c,
>  				&priv->int_queue, false) < 0) {
> -		rc = -ETIME;
> +		if (test_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags))
> +			rc = -EAGAIN;
> +		else
> +			rc = -ETIME;

I'd encapsulate this inside wait_for_tpm_stat().


>  		goto out_err;
>  	}
>  	status = tpm_tis_status(chip);
> @@ -546,9 +552,11 @@ static int tpm_tis_send_main(struct tpm_chip *chip, const u8 *buf, size_t len)
>  		if (rc >= 0)
>  			/* Data transfer done successfully */
>  			break;
> -		else if (rc != -EIO)
> +		else if (rc != EAGAIN && rc != -EIO)
>  			/* Data transfer failed, not recoverable */
>  			return rc;
> +
> +		usleep_range(priv->timeout_min, priv->timeout_max);
>  	}
>  
>  	/* go and do it */
> @@ -1144,6 +1152,9 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
>  		priv->timeout_max = TIS_TIMEOUT_MAX_ATML;
>  	}
>  
> +	if (priv->manufacturer_id == TPM_VID_IFX)
> +		set_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags);
> +
>  	if (is_bsw()) {
>  		priv->ilb_base_addr = ioremap(INTEL_LEGACY_BLK_BASE_ADDR,
>  					ILB_REMAP_SIZE);
> diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
> index 690ad8e9b731..ce97b58dc005 100644
> --- a/drivers/char/tpm/tpm_tis_core.h
> +++ b/drivers/char/tpm/tpm_tis_core.h
> @@ -89,6 +89,7 @@ enum tpm_tis_flags {
>  	TPM_TIS_INVALID_STATUS		= 1,
>  	TPM_TIS_DEFAULT_CANCELLATION	= 2,
>  	TPM_TIS_IRQ_TESTED		= 3,
> +	TPM_TIS_STATUS_WORKAROUND	= 4,

TPM_TIS_TIMEOUT_AGAIN or maybe *_REPEAT? The current name does
not tell anything.

>  };
>  
>  struct tpm_tis_data {
> diff --git a/include/linux/tpm.h b/include/linux/tpm.h
> index 20a40ade8030..6c3125300c00 100644
> --- a/include/linux/tpm.h
> +++ b/include/linux/tpm.h
> @@ -335,6 +335,7 @@ enum tpm2_cc_attrs {
>  #define TPM_VID_WINBOND  0x1050
>  #define TPM_VID_STM      0x104A
>  #define TPM_VID_ATML     0x1114
> +#define TPM_VID_IFX      0x15D1
>  
>  enum tpm_chip_flags {
>  	TPM_CHIP_FLAG_BOOTSTRAPPED		= BIT(0),
> -- 
> 2.48.1
> 

BR, Jarkko
Jonathan McDowell March 7, 2025, 4:36 p.m. UTC | #2
On Fri, Mar 07, 2025 at 12:23:11AM +0200, Jarkko Sakkinen wrote:
> On Thu, Mar 06, 2025 at 09:00:56AM +0000, Jonathan McDowell wrote:
> > From: Jonathan McDowell <noodles@meta.com>
> > 
> > Some Infineon devices have a issue where the status register will get
> > stuck with a quick REQUEST_USE / COMMAND_READY sequence. This is not
> > simply a matter of requiring a longer timeout; the work around is to
> > retry the command submission. Add appropriate logic to do this in the
> > send path.
> > 
> > This is fixed in later firmware revisions, but those are not always
> > available, and cannot generally be easily updated from outside a
> > firmware environment.
> > 
> > Testing has been performed with a simple repeated loop of doing a
> > TPM2_CC_GET_CAPABILITY for TPM_CAP_PROP_MANUFACTURER using the Go code
> > at:
> > 
> >  https://the.earth.li/~noodles/tpm-stuff/timeout-reproducer-simple.go
> > 
> > It can take several hours to reproduce, and millions of operations.
> > 
> > Signed-off-by: Jonathan McDowell <noodles@meta.com>
> > ---
> >  drivers/char/tpm/tpm_tis_core.c | 17 ++++++++++++++---
> >  drivers/char/tpm/tpm_tis_core.h |  1 +
> >  include/linux/tpm.h             |  1 +
> >  3 files changed, 16 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
> > index 167d71747666..e4eae206a353 100644
> > --- a/drivers/char/tpm/tpm_tis_core.c
> > +++ b/drivers/char/tpm/tpm_tis_core.c
> > @@ -464,7 +464,10 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
> >  
> >  		if (wait_for_tpm_stat(chip, TPM_STS_VALID, chip->timeout_c,
> >  					&priv->int_queue, false) < 0) {
> > -			rc = -ETIME;
> > +			if (test_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags))
> > +				rc = -EAGAIN;
> > +			else
> > +				rc = -ETIME;
> >  			goto out_err;
> >  		}
> >  		status = tpm_tis_status(chip);
> > @@ -481,7 +484,10 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
> >  
> >  	if (wait_for_tpm_stat(chip, TPM_STS_VALID, chip->timeout_c,
> >  				&priv->int_queue, false) < 0) {
> > -		rc = -ETIME;
> > +		if (test_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags))
> > +			rc = -EAGAIN;
> > +		else
> > +			rc = -ETIME;
> 
> I'd encapsulate this inside wait_for_tpm_stat().

I think that gets a bit more complicated; this is an errata in the send 
command path, for a stuck VALID bit, and the fix is to restart the whole 
command send (i.e. we need to kick the TPM with tpm_tis_ready() etc). 
I'm not sure returning EAGAIN in wait_for_tpm_stat() then makes 
tpm_tis_send_data() any simpler.

> >  		goto out_err;
> >  	}
> >  	status = tpm_tis_status(chip);
> > @@ -546,9 +552,11 @@ static int tpm_tis_send_main(struct tpm_chip *chip, const u8 *buf, size_t len)
> >  		if (rc >= 0)
> >  			/* Data transfer done successfully */
> >  			break;
> > -		else if (rc != -EIO)
> > +		else if (rc != EAGAIN && rc != -EIO)
> >  			/* Data transfer failed, not recoverable */
> >  			return rc;
> > +
> > +		usleep_range(priv->timeout_min, priv->timeout_max);
> >  	}
> >  
> >  	/* go and do it */
> > @@ -1144,6 +1152,9 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
> >  		priv->timeout_max = TIS_TIMEOUT_MAX_ATML;
> >  	}
> >  
> > +	if (priv->manufacturer_id == TPM_VID_IFX)
> > +		set_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags);
> > +
> >  	if (is_bsw()) {
> >  		priv->ilb_base_addr = ioremap(INTEL_LEGACY_BLK_BASE_ADDR,
> >  					ILB_REMAP_SIZE);
> > diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
> > index 690ad8e9b731..ce97b58dc005 100644
> > --- a/drivers/char/tpm/tpm_tis_core.h
> > +++ b/drivers/char/tpm/tpm_tis_core.h
> > @@ -89,6 +89,7 @@ enum tpm_tis_flags {
> >  	TPM_TIS_INVALID_STATUS		= 1,
> >  	TPM_TIS_DEFAULT_CANCELLATION	= 2,
> >  	TPM_TIS_IRQ_TESTED		= 3,
> > +	TPM_TIS_STATUS_WORKAROUND	= 4,
> 
> TPM_TIS_TIMEOUT_AGAIN or maybe *_REPEAT? The current name does
> not tell anything.

Yeah, TPM_TIS_STATUS_VALID_RETRY is perhaps clearer; it's not a timeout, 
and we're looking to do a retry based on STS_VALID.

> >  };
> >  
> >  struct tpm_tis_data {
> > diff --git a/include/linux/tpm.h b/include/linux/tpm.h
> > index 20a40ade8030..6c3125300c00 100644
> > --- a/include/linux/tpm.h
> > +++ b/include/linux/tpm.h
> > @@ -335,6 +335,7 @@ enum tpm2_cc_attrs {
> >  #define TPM_VID_WINBOND  0x1050
> >  #define TPM_VID_STM      0x104A
> >  #define TPM_VID_ATML     0x1114
> > +#define TPM_VID_IFX      0x15D1
> >  
> >  enum tpm_chip_flags {
> >  	TPM_CHIP_FLAG_BOOTSTRAPPED		= BIT(0),

J.
Jarkko Sakkinen March 7, 2025, 4:45 p.m. UTC | #3
On Fri, Mar 07, 2025 at 04:36:12PM +0000, Jonathan McDowell wrote:
> On Fri, Mar 07, 2025 at 12:23:11AM +0200, Jarkko Sakkinen wrote:
> > On Thu, Mar 06, 2025 at 09:00:56AM +0000, Jonathan McDowell wrote:
> > > From: Jonathan McDowell <noodles@meta.com>
> > > 
> > > Some Infineon devices have a issue where the status register will get
> > > stuck with a quick REQUEST_USE / COMMAND_READY sequence. This is not
> > > simply a matter of requiring a longer timeout; the work around is to
> > > retry the command submission. Add appropriate logic to do this in the
> > > send path.
> > > 
> > > This is fixed in later firmware revisions, but those are not always
> > > available, and cannot generally be easily updated from outside a
> > > firmware environment.
> > > 
> > > Testing has been performed with a simple repeated loop of doing a
> > > TPM2_CC_GET_CAPABILITY for TPM_CAP_PROP_MANUFACTURER using the Go code
> > > at:
> > > 
> > >  https://the.earth.li/~noodles/tpm-stuff/timeout-reproducer-simple.go
> > > 
> > > It can take several hours to reproduce, and millions of operations.
> > > 
> > > Signed-off-by: Jonathan McDowell <noodles@meta.com>
> > > ---
> > >  drivers/char/tpm/tpm_tis_core.c | 17 ++++++++++++++---
> > >  drivers/char/tpm/tpm_tis_core.h |  1 +
> > >  include/linux/tpm.h             |  1 +
> > >  3 files changed, 16 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
> > > index 167d71747666..e4eae206a353 100644
> > > --- a/drivers/char/tpm/tpm_tis_core.c
> > > +++ b/drivers/char/tpm/tpm_tis_core.c
> > > @@ -464,7 +464,10 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
> > >  
> > >  		if (wait_for_tpm_stat(chip, TPM_STS_VALID, chip->timeout_c,
> > >  					&priv->int_queue, false) < 0) {
> > > -			rc = -ETIME;
> > > +			if (test_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags))
> > > +				rc = -EAGAIN;
> > > +			else
> > > +				rc = -ETIME;
> > >  			goto out_err;
> > >  		}
> > >  		status = tpm_tis_status(chip);
> > > @@ -481,7 +484,10 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
> > >  
> > >  	if (wait_for_tpm_stat(chip, TPM_STS_VALID, chip->timeout_c,
> > >  				&priv->int_queue, false) < 0) {
> > > -		rc = -ETIME;
> > > +		if (test_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags))
> > > +			rc = -EAGAIN;
> > > +		else
> > > +			rc = -ETIME;
> > 
> > I'd encapsulate this inside wait_for_tpm_stat().
> 
> I think that gets a bit more complicated; this is an errata in the send 
> command path, for a stuck VALID bit, and the fix is to restart the whole 
> command send (i.e. we need to kick the TPM with tpm_tis_ready() etc). 
> I'm not sure returning EAGAIN in wait_for_tpm_stat() then makes 
> tpm_tis_send_data() any simpler.

OK, it is a fair argument. Let's keep it as it is.

> 
> > >  		goto out_err;
> > >  	}
> > >  	status = tpm_tis_status(chip);
> > > @@ -546,9 +552,11 @@ static int tpm_tis_send_main(struct tpm_chip *chip, const u8 *buf, size_t len)
> > >  		if (rc >= 0)
> > >  			/* Data transfer done successfully */
> > >  			break;
> > > -		else if (rc != -EIO)
> > > +		else if (rc != EAGAIN && rc != -EIO)
> > >  			/* Data transfer failed, not recoverable */
> > >  			return rc;
> > > +
> > > +		usleep_range(priv->timeout_min, priv->timeout_max);
> > >  	}
> > >  
> > >  	/* go and do it */
> > > @@ -1144,6 +1152,9 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
> > >  		priv->timeout_max = TIS_TIMEOUT_MAX_ATML;
> > >  	}
> > >  
> > > +	if (priv->manufacturer_id == TPM_VID_IFX)
> > > +		set_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags);
> > > +
> > >  	if (is_bsw()) {
> > >  		priv->ilb_base_addr = ioremap(INTEL_LEGACY_BLK_BASE_ADDR,
> > >  					ILB_REMAP_SIZE);
> > > diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
> > > index 690ad8e9b731..ce97b58dc005 100644
> > > --- a/drivers/char/tpm/tpm_tis_core.h
> > > +++ b/drivers/char/tpm/tpm_tis_core.h
> > > @@ -89,6 +89,7 @@ enum tpm_tis_flags {
> > >  	TPM_TIS_INVALID_STATUS		= 1,
> > >  	TPM_TIS_DEFAULT_CANCELLATION	= 2,
> > >  	TPM_TIS_IRQ_TESTED		= 3,
> > > +	TPM_TIS_STATUS_WORKAROUND	= 4,
> > 
> > TPM_TIS_TIMEOUT_AGAIN or maybe *_REPEAT? The current name does
> > not tell anything.
> 
> Yeah, TPM_TIS_STATUS_VALID_RETRY is perhaps clearer; it's not a timeout, 
> and we're looking to do a retry based on STS_VALID.

WFM


> 
> > >  };
> > >  
> > >  struct tpm_tis_data {
> > > diff --git a/include/linux/tpm.h b/include/linux/tpm.h
> > > index 20a40ade8030..6c3125300c00 100644
> > > --- a/include/linux/tpm.h
> > > +++ b/include/linux/tpm.h
> > > @@ -335,6 +335,7 @@ enum tpm2_cc_attrs {
> > >  #define TPM_VID_WINBOND  0x1050
> > >  #define TPM_VID_STM      0x104A
> > >  #define TPM_VID_ATML     0x1114
> > > +#define TPM_VID_IFX      0x15D1
> > >  
> > >  enum tpm_chip_flags {
> > >  	TPM_CHIP_FLAG_BOOTSTRAPPED		= BIT(0),
> 
> J.
> 
> -- 
> ... "What's the philosophical difference between a killfile and the
>     automoderation?" "A killfile throws away good posts.  Automoderation
>     throws away bad posts." -- Jonathan H N Chin to Calle Dybedahl

BR, Jarkko
diff mbox series

Patch

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 167d71747666..e4eae206a353 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -464,7 +464,10 @@  static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
 
 		if (wait_for_tpm_stat(chip, TPM_STS_VALID, chip->timeout_c,
 					&priv->int_queue, false) < 0) {
-			rc = -ETIME;
+			if (test_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags))
+				rc = -EAGAIN;
+			else
+				rc = -ETIME;
 			goto out_err;
 		}
 		status = tpm_tis_status(chip);
@@ -481,7 +484,10 @@  static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
 
 	if (wait_for_tpm_stat(chip, TPM_STS_VALID, chip->timeout_c,
 				&priv->int_queue, false) < 0) {
-		rc = -ETIME;
+		if (test_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags))
+			rc = -EAGAIN;
+		else
+			rc = -ETIME;
 		goto out_err;
 	}
 	status = tpm_tis_status(chip);
@@ -546,9 +552,11 @@  static int tpm_tis_send_main(struct tpm_chip *chip, const u8 *buf, size_t len)
 		if (rc >= 0)
 			/* Data transfer done successfully */
 			break;
-		else if (rc != -EIO)
+		else if (rc != EAGAIN && rc != -EIO)
 			/* Data transfer failed, not recoverable */
 			return rc;
+
+		usleep_range(priv->timeout_min, priv->timeout_max);
 	}
 
 	/* go and do it */
@@ -1144,6 +1152,9 @@  int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
 		priv->timeout_max = TIS_TIMEOUT_MAX_ATML;
 	}
 
+	if (priv->manufacturer_id == TPM_VID_IFX)
+		set_bit(TPM_TIS_STATUS_WORKAROUND, &priv->flags);
+
 	if (is_bsw()) {
 		priv->ilb_base_addr = ioremap(INTEL_LEGACY_BLK_BASE_ADDR,
 					ILB_REMAP_SIZE);
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index 690ad8e9b731..ce97b58dc005 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -89,6 +89,7 @@  enum tpm_tis_flags {
 	TPM_TIS_INVALID_STATUS		= 1,
 	TPM_TIS_DEFAULT_CANCELLATION	= 2,
 	TPM_TIS_IRQ_TESTED		= 3,
+	TPM_TIS_STATUS_WORKAROUND	= 4,
 };
 
 struct tpm_tis_data {
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 20a40ade8030..6c3125300c00 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -335,6 +335,7 @@  enum tpm2_cc_attrs {
 #define TPM_VID_WINBOND  0x1050
 #define TPM_VID_STM      0x104A
 #define TPM_VID_ATML     0x1114
+#define TPM_VID_IFX      0x15D1
 
 enum tpm_chip_flags {
 	TPM_CHIP_FLAG_BOOTSTRAPPED		= BIT(0),