diff mbox series

[crypto-2.6] crypto: ccp: add timeout support in the SEV command

Message ID 1534367485-4386-2-git-send-email-brijesh.singh@amd.com (mailing list archive)
State Accepted
Delegated to: Herbert Xu
Headers show
Series [crypto-2.6] crypto: ccp: add timeout support in the SEV command | expand

Commit Message

Brijesh Singh Aug. 15, 2018, 9:11 p.m. UTC
Currently, the CCP driver assumes that the SEV command issued to the PSP
will always return (i.e. it will never hang).  But recently, firmware bugs
have shown that a command can hang.  Since of the SEV commands are used
in probe routines, this can cause boot hangs and/or loss of virtualization
capabilities.

To protect against firmware bugs, add a timeout in the SEV command
execution flow.  If a command does not complete within the specified
timeout then return -ETIMEOUT and stop the driver from executing any
further commands since the state of the SEV firmware is unknown.

Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Gary Hook <Gary.Hook@amd.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
---
 drivers/crypto/ccp/psp-dev.c | 46 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 5 deletions(-)

Comments

Herbert Xu Sept. 4, 2018, 5:19 a.m. UTC | #1
On Wed, Aug 15, 2018 at 04:11:25PM -0500, Brijesh Singh wrote:
> Currently, the CCP driver assumes that the SEV command issued to the PSP
> will always return (i.e. it will never hang).  But recently, firmware bugs
> have shown that a command can hang.  Since of the SEV commands are used
> in probe routines, this can cause boot hangs and/or loss of virtualization
> capabilities.
> 
> To protect against firmware bugs, add a timeout in the SEV command
> execution flow.  If a command does not complete within the specified
> timeout then return -ETIMEOUT and stop the driver from executing any
> further commands since the state of the SEV firmware is unknown.
> 
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: Gary Hook <Gary.Hook@amd.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  drivers/crypto/ccp/psp-dev.c | 46 +++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 41 insertions(+), 5 deletions(-)

Patch applied.  Thanks.
Borislav Petkov Sept. 4, 2018, 8:11 a.m. UTC | #2
On Wed, Aug 15, 2018 at 04:11:25PM -0500, Brijesh Singh wrote:
> Currently, the CCP driver assumes that the SEV command issued to the PSP
> will always return (i.e. it will never hang).  But recently, firmware bugs
> have shown that a command can hang.  Since of the SEV commands are used
> in probe routines, this can cause boot hangs and/or loss of virtualization
> capabilities.
> 
> To protect against firmware bugs, add a timeout in the SEV command
> execution flow.  If a command does not complete within the specified
> timeout then return -ETIMEOUT and stop the driver from executing any
> further commands since the state of the SEV firmware is unknown.
> 
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: Gary Hook <Gary.Hook@amd.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> ---
>  drivers/crypto/ccp/psp-dev.c | 46 +++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 41 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
> index 218739b..72790d8 100644
> --- a/drivers/crypto/ccp/psp-dev.c
> +++ b/drivers/crypto/ccp/psp-dev.c
> @@ -38,6 +38,17 @@ static DEFINE_MUTEX(sev_cmd_mutex);
>  static struct sev_misc_dev *misc_dev;
>  static struct psp_device *psp_master;
>  
> +static int psp_cmd_timeout = 100;
> +module_param(psp_cmd_timeout, int, 0644);
> +MODULE_PARM_DESC(psp_cmd_timeout, " default timeout value, in seconds, for PSP commands");
> +
> +static int psp_probe_timeout = 5;
> +module_param(psp_probe_timeout, int, 0644);
> +MODULE_PARM_DESC(psp_probe_timeout, " default timeout value, in seconds, during PSP device probe");

Just a question: what prevents the user from supplying non-sensical
values here?

I think we should clamp them to only allowed values because I don't want
to be debugging some strange bugs due to that.

> +
> +static bool psp_dead;
> +static int psp_timeout;
> +
>  static struct psp_device *psp_alloc_struct(struct sp_device *sp)
>  {
>  	struct device *dev = sp->dev;
> @@ -82,10 +93,19 @@ static irqreturn_t psp_irq_handler(int irq, void *data)
>  	return IRQ_HANDLED;
>  }
>  
> -static void sev_wait_cmd_ioc(struct psp_device *psp, unsigned int *reg)
> +static int sev_wait_cmd_ioc(struct psp_device *psp,
> +			    unsigned int *reg, unsigned int timeout)
>  {
> -	wait_event(psp->sev_int_queue, psp->sev_int_rcvd);
> +	int ret;
> +
> +	ret = wait_event_timeout(psp->sev_int_queue,
> +			psp->sev_int_rcvd, timeout * HZ);

Align args at opening brace.

> +	if (!ret)
> +		return -ETIMEDOUT;
> +
>  	*reg = ioread32(psp->io_regs + psp->vdata->cmdresp_reg);
> +
> +	return 0;
>  }
>  
>  static int sev_cmd_buffer_len(int cmd)
> @@ -133,12 +153,15 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
>  	if (!psp)
>  		return -ENODEV;
>  
> +	if (psp_dead)
> +		return -EBUSY;
> +
>  	/* Get the physical address of the command buffer */
>  	phys_lsb = data ? lower_32_bits(__psp_pa(data)) : 0;
>  	phys_msb = data ? upper_32_bits(__psp_pa(data)) : 0;
>  
> -	dev_dbg(psp->dev, "sev command id %#x buffer 0x%08x%08x\n",
> -		cmd, phys_msb, phys_lsb);
> +	dev_dbg(psp->dev, "sev command id %#x buffer 0x%08x%08x timeout %us\n",
> +		cmd, phys_msb, phys_lsb, psp_timeout);
>  
>  	print_hex_dump_debug("(in):  ", DUMP_PREFIX_OFFSET, 16, 2, data,
>  			     sev_cmd_buffer_len(cmd), false);
> @@ -154,7 +177,18 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
>  	iowrite32(reg, psp->io_regs + psp->vdata->cmdresp_reg);
>  
>  	/* wait for command completion */
> -	sev_wait_cmd_ioc(psp, &reg);
> +	ret = sev_wait_cmd_ioc(psp, &reg, psp_timeout);
> +	if (ret) {
> +		if (psp_ret)
> +			*psp_ret = 0;
> +
> +		dev_err(psp->dev, "sev command %#x timed out, disabling PSP \n", cmd);
									   ^

Trailing space.
Brijesh Singh Sept. 10, 2018, 7:06 p.m. UTC | #3
Hi Boris,


On 09/04/2018 03:11 AM, Borislav Petkov wrote:
...

>> +
>> +static int psp_probe_timeout = 5;
>> +module_param(psp_probe_timeout, int, 0644);
>> +MODULE_PARM_DESC(psp_probe_timeout, " default timeout value, in seconds, during PSP device probe");
> 
> Just a question: what prevents the user from supplying non-sensical
> values here?
> 
> I think we should clamp them to only allowed values because I don't want
> to be debugging some strange bugs due to that.
> 

Nothing prevent user from supplying a bogus number. The main question
is, clamp with what number ?

IMO, if user is overriding the default timeout number then its possible
that user is dealing with a buggy firmware which does not work with
default timeout and silently clamping the value will not help them.


- Brijesh
Borislav Petkov Sept. 11, 2018, 1:53 p.m. UTC | #4
On Mon, Sep 10, 2018 at 02:06:57PM -0500, Brijesh Singh wrote:
> Nothing prevent user from supplying a bogus number. The main question
> is, clamp with what number ?

So you definitely want to forbid too large timeouts - that wouldn't make
any sense anyway. And too small either, because a too small timeout
would make a potentially functioning fw broken.

> IMO, if user is overriding the default timeout number then its possible
> that user is dealing with a buggy firmware which does not work with
> default timeout and silently clamping the value will not help them.

No one said "silently" - you simply say:

	"Correcting PSP	"Correcting PSP probe timeout to X seconds."

when loading the driver so that the user is aware that the value she
entered might not be an optimal one.
Brijesh Singh Sept. 12, 2018, 12:11 a.m. UTC | #5
On 9/4/18 12:19 AM, Herbert Xu wrote:
> On Wed, Aug 15, 2018 at 04:11:25PM -0500, Brijesh Singh wrote:
>> Currently, the CCP driver assumes that the SEV command issued to the PSP
>> will always return (i.e. it will never hang).  But recently, firmware bugs
>> have shown that a command can hang.  Since of the SEV commands are used
>> in probe routines, this can cause boot hangs and/or loss of virtualization
>> capabilities.
>>
>> To protect against firmware bugs, add a timeout in the SEV command
>> execution flow.  If a command does not complete within the specified
>> timeout then return -ETIMEOUT and stop the driver from executing any
>> further commands since the state of the SEV firmware is unknown.
>>
>> Cc: Tom Lendacky <thomas.lendacky@amd.com>
>> Cc: Gary Hook <Gary.Hook@amd.com>
>> Cc: Herbert Xu <herbert@gondor.apana.org.au>
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
>> ---
>>  drivers/crypto/ccp/psp-dev.c | 46 +++++++++++++++++++++++++++++++++++++++-----
>>  1 file changed, 41 insertions(+), 5 deletions(-)
> Patch applied.  Thanks.
Thanks Herbert.

Can you please include this patch in your next 4.19-rcX pull request?
Multiple folks are encountering this firmware bug on Ryzen systems. I
will submit the modified version of this patch for stable release (s) so
that we fix issues in 4.18,.17 and .16. thank you.

-Brijesh
Herbert Xu Sept. 13, 2018, 5:02 a.m. UTC | #6
On Tue, Sep 11, 2018 at 07:11:05PM -0500, Brijesh Singh wrote:
>
> Can you please include this patch in your next 4.19-rcX pull request?
> Multiple folks are encountering this firmware bug on Ryzen systems. I
> will submit the modified version of this patch for stable release (s) so
> that we fix issues in 4.18,.17 and .16. thank you.

OK, I will add it to the crypto tree.

Cheers,
diff mbox series

Patch

diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index 218739b..72790d8 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -38,6 +38,17 @@  static DEFINE_MUTEX(sev_cmd_mutex);
 static struct sev_misc_dev *misc_dev;
 static struct psp_device *psp_master;
 
+static int psp_cmd_timeout = 100;
+module_param(psp_cmd_timeout, int, 0644);
+MODULE_PARM_DESC(psp_cmd_timeout, " default timeout value, in seconds, for PSP commands");
+
+static int psp_probe_timeout = 5;
+module_param(psp_probe_timeout, int, 0644);
+MODULE_PARM_DESC(psp_probe_timeout, " default timeout value, in seconds, during PSP device probe");
+
+static bool psp_dead;
+static int psp_timeout;
+
 static struct psp_device *psp_alloc_struct(struct sp_device *sp)
 {
 	struct device *dev = sp->dev;
@@ -82,10 +93,19 @@  static irqreturn_t psp_irq_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static void sev_wait_cmd_ioc(struct psp_device *psp, unsigned int *reg)
+static int sev_wait_cmd_ioc(struct psp_device *psp,
+			    unsigned int *reg, unsigned int timeout)
 {
-	wait_event(psp->sev_int_queue, psp->sev_int_rcvd);
+	int ret;
+
+	ret = wait_event_timeout(psp->sev_int_queue,
+			psp->sev_int_rcvd, timeout * HZ);
+	if (!ret)
+		return -ETIMEDOUT;
+
 	*reg = ioread32(psp->io_regs + psp->vdata->cmdresp_reg);
+
+	return 0;
 }
 
 static int sev_cmd_buffer_len(int cmd)
@@ -133,12 +153,15 @@  static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
 	if (!psp)
 		return -ENODEV;
 
+	if (psp_dead)
+		return -EBUSY;
+
 	/* Get the physical address of the command buffer */
 	phys_lsb = data ? lower_32_bits(__psp_pa(data)) : 0;
 	phys_msb = data ? upper_32_bits(__psp_pa(data)) : 0;
 
-	dev_dbg(psp->dev, "sev command id %#x buffer 0x%08x%08x\n",
-		cmd, phys_msb, phys_lsb);
+	dev_dbg(psp->dev, "sev command id %#x buffer 0x%08x%08x timeout %us\n",
+		cmd, phys_msb, phys_lsb, psp_timeout);
 
 	print_hex_dump_debug("(in):  ", DUMP_PREFIX_OFFSET, 16, 2, data,
 			     sev_cmd_buffer_len(cmd), false);
@@ -154,7 +177,18 @@  static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
 	iowrite32(reg, psp->io_regs + psp->vdata->cmdresp_reg);
 
 	/* wait for command completion */
-	sev_wait_cmd_ioc(psp, &reg);
+	ret = sev_wait_cmd_ioc(psp, &reg, psp_timeout);
+	if (ret) {
+		if (psp_ret)
+			*psp_ret = 0;
+
+		dev_err(psp->dev, "sev command %#x timed out, disabling PSP \n", cmd);
+		psp_dead = true;
+
+		return ret;
+	}
+
+	psp_timeout = psp_cmd_timeout;
 
 	if (psp_ret)
 		*psp_ret = reg & PSP_CMDRESP_ERR_MASK;
@@ -888,6 +922,8 @@  void psp_pci_init(void)
 
 	psp_master = sp->psp_data;
 
+	psp_timeout = psp_probe_timeout;
+
 	if (sev_get_api_version())
 		goto err;