diff mbox series

[07/12] fpga: sec-mgr: expose hardware error info

Message ID 20210517023200.52707-8-mdf@kernel.org (mailing list archive)
State New
Headers show
Series FPGA Security Manager for 5.14 | expand

Commit Message

Moritz Fischer May 17, 2021, 2:31 a.m. UTC
From: Russ Weight <russell.h.weight@intel.com>

Extend the FPGA Security Manager class driver to include
an optional update/hw_errinfo sysfs node that can be used
to retrieve 64 bits of device specific error information
following a secure update failure.

The underlying driver must provide a get_hw_errinfo() callback
function to enable this feature. This data is treated as
opaque by the class driver. It is left to user-space software
or support personnel to interpret this data.

Signed-off-by: Russ Weight <russell.h.weight@intel.com>
Reviewed-by: Tom Rix <trix@redhat.com>
Signed-off-by: Moritz Fischer <mdf@kernel.org>
---
 .../ABI/testing/sysfs-class-fpga-sec-mgr      | 14 +++++++
 drivers/fpga/fpga-sec-mgr.c                   | 38 +++++++++++++++++++
 include/linux/fpga/fpga-sec-mgr.h             |  5 +++
 3 files changed, 57 insertions(+)

Comments

Greg KH May 17, 2021, 7:10 a.m. UTC | #1
On Sun, May 16, 2021 at 07:31:55PM -0700, Moritz Fischer wrote:
> From: Russ Weight <russell.h.weight@intel.com>
> 
> Extend the FPGA Security Manager class driver to include
> an optional update/hw_errinfo sysfs node that can be used
> to retrieve 64 bits of device specific error information
> following a secure update failure.
> 
> The underlying driver must provide a get_hw_errinfo() callback
> function to enable this feature. This data is treated as
> opaque by the class driver. It is left to user-space software
> or support personnel to interpret this data.

No, you don't provide "opaque" data to userspace, that's a sure way to
make it such that no one knows what this data is supposed to be, and so
it can not be maintained at all.


> 
> Signed-off-by: Russ Weight <russell.h.weight@intel.com>
> Reviewed-by: Tom Rix <trix@redhat.com>
> Signed-off-by: Moritz Fischer <mdf@kernel.org>
> ---
>  .../ABI/testing/sysfs-class-fpga-sec-mgr      | 14 +++++++
>  drivers/fpga/fpga-sec-mgr.c                   | 38 +++++++++++++++++++
>  include/linux/fpga/fpga-sec-mgr.h             |  5 +++
>  3 files changed, 57 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr b/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
> index 749f2d4c78d3..f1881ce39c63 100644
> --- a/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
> +++ b/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
> @@ -65,3 +65,17 @@ Description:	Read-only. Returns a string describing the failure
>  		idle state. If this file is read while a secure
>  		update is in progress, then the read will fail with
>  		EBUSY.
> +
> +What: 		/sys/class/fpga_sec_mgr/fpga_secX/update/hw_errinfo
> +Date:		June 2021
> +KernelVersion:	5.14
> +Contact:	Russ Weight <russell.h.weight@intel.com>
> +Description:	Read-only. Returns a 64 bit error value providing
> +		hardware specific information that may be useful in
> +		debugging errors that occur during FPGA image updates.
> +		This file is only visible if the underlying device
> +		supports it. The hw_errinfo value is only accessible
> +		when the secure update engine is in the idle state.
> +		If this file is read while a secure update is in
> +		progress, then the read will fail with EBUSY.
> +		Format: "0x%llx".
> diff --git a/drivers/fpga/fpga-sec-mgr.c b/drivers/fpga/fpga-sec-mgr.c
> index 48950843c2b4..cefe9182c3c3 100644
> --- a/drivers/fpga/fpga-sec-mgr.c
> +++ b/drivers/fpga/fpga-sec-mgr.c
> @@ -36,10 +36,17 @@ static void fpga_sec_set_error(struct fpga_sec_mgr *smgr, enum fpga_sec_err err_
>  	smgr->err_code = err_code;
>  }
>  
> +static void fpga_sec_set_hw_errinfo(struct fpga_sec_mgr *smgr)
> +{
> +	if (smgr->sops->get_hw_errinfo)
> +		smgr->hw_errinfo = smgr->sops->get_hw_errinfo(smgr);
> +}
> +
>  static void fpga_sec_dev_error(struct fpga_sec_mgr *smgr,
>  			       enum fpga_sec_err err_code)
>  {
>  	fpga_sec_set_error(smgr, err_code);
> +	fpga_sec_set_hw_errinfo(smgr);
>  	smgr->sops->cancel(smgr);
>  }
>  
> @@ -221,6 +228,23 @@ error_show(struct device *dev, struct device_attribute *attr, char *buf)
>  }
>  static DEVICE_ATTR_RO(error);
>  
> +static ssize_t
> +hw_errinfo_show(struct device *dev, struct device_attribute *attr, char *buf)
> +{
> +	struct fpga_sec_mgr *smgr = to_sec_mgr(dev);
> +	int ret;
> +
> +	mutex_lock(&smgr->lock);
> +	if (smgr->progress != FPGA_SEC_PROG_IDLE)
> +		ret = -EBUSY;

Why does the state matter here?

greg k-h
Russ Weight May 17, 2021, 7:49 p.m. UTC | #2
On 5/17/21 12:10 AM, Greg KH wrote:
> On Sun, May 16, 2021 at 07:31:55PM -0700, Moritz Fischer wrote:
>> From: Russ Weight <russell.h.weight@intel.com>
>>
>> Extend the FPGA Security Manager class driver to include
>> an optional update/hw_errinfo sysfs node that can be used
>> to retrieve 64 bits of device specific error information
>> following a secure update failure.
>>
>> The underlying driver must provide a get_hw_errinfo() callback
>> function to enable this feature. This data is treated as
>> opaque by the class driver. It is left to user-space software
>> or support personnel to interpret this data.
> No, you don't provide "opaque" data to userspace, that's a sure way to
> make it such that no one knows what this data is supposed to be, and so
> it can not be maintained at all.
My intent was that the data be opaque to the class driver layer, but not
to the parent driver or to the user-space code (which could process the
data based on the device type).

If this is not appropriate, it is an optional feature and can be removed.

>> Signed-off-by: Russ Weight <russell.h.weight@intel.com>
>> Reviewed-by: Tom Rix <trix@redhat.com>
>> Signed-off-by: Moritz Fischer <mdf@kernel.org>
>> ---
>>  .../ABI/testing/sysfs-class-fpga-sec-mgr      | 14 +++++++
>>  drivers/fpga/fpga-sec-mgr.c                   | 38 +++++++++++++++++++
>>  include/linux/fpga/fpga-sec-mgr.h             |  5 +++
>>  3 files changed, 57 insertions(+)
>>
>> diff --git a/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr b/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
>> index 749f2d4c78d3..f1881ce39c63 100644
>> --- a/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
>> +++ b/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
>> @@ -65,3 +65,17 @@ Description:	Read-only. Returns a string describing the failure
>>  		idle state. If this file is read while a secure
>>  		update is in progress, then the read will fail with
>>  		EBUSY.
>> +
>> +What: 		/sys/class/fpga_sec_mgr/fpga_secX/update/hw_errinfo
>> +Date:		June 2021
>> +KernelVersion:	5.14
>> +Contact:	Russ Weight <russell.h.weight@intel.com>
>> +Description:	Read-only. Returns a 64 bit error value providing
>> +		hardware specific information that may be useful in
>> +		debugging errors that occur during FPGA image updates.
>> +		This file is only visible if the underlying device
>> +		supports it. The hw_errinfo value is only accessible
>> +		when the secure update engine is in the idle state.
>> +		If this file is read while a secure update is in
>> +		progress, then the read will fail with EBUSY.
>> +		Format: "0x%llx".
>> diff --git a/drivers/fpga/fpga-sec-mgr.c b/drivers/fpga/fpga-sec-mgr.c
>> index 48950843c2b4..cefe9182c3c3 100644
>> --- a/drivers/fpga/fpga-sec-mgr.c
>> +++ b/drivers/fpga/fpga-sec-mgr.c
>> @@ -36,10 +36,17 @@ static void fpga_sec_set_error(struct fpga_sec_mgr *smgr, enum fpga_sec_err err_
>>  	smgr->err_code = err_code;
>>  }
>>  
>> +static void fpga_sec_set_hw_errinfo(struct fpga_sec_mgr *smgr)
>> +{
>> +	if (smgr->sops->get_hw_errinfo)
>> +		smgr->hw_errinfo = smgr->sops->get_hw_errinfo(smgr);
>> +}
>> +
>>  static void fpga_sec_dev_error(struct fpga_sec_mgr *smgr,
>>  			       enum fpga_sec_err err_code)
>>  {
>>  	fpga_sec_set_error(smgr, err_code);
>> +	fpga_sec_set_hw_errinfo(smgr);
>>  	smgr->sops->cancel(smgr);
>>  }
>>  
>> @@ -221,6 +228,23 @@ error_show(struct device *dev, struct device_attribute *attr, char *buf)
>>  }
>>  static DEVICE_ATTR_RO(error);
>>  
>> +static ssize_t
>> +hw_errinfo_show(struct device *dev, struct device_attribute *attr, char *buf)
>> +{
>> +	struct fpga_sec_mgr *smgr = to_sec_mgr(dev);
>> +	int ret;
>> +
>> +	mutex_lock(&smgr->lock);
>> +	if (smgr->progress != FPGA_SEC_PROG_IDLE)
>> +		ret = -EBUSY;
> Why does the state matter here?
The transfer and validation of image data can take up to 40 minutes for the N3000
acceleration card, so the transfer of data and the monitoring of the update are
happening in a worker thread. The hw_errinfo data is only valid when the update
engine is in the IDLE state. Returning EBUSY if the update is still in progress
simplifies the locking model.

- Russ

>
> greg k-h
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr b/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
index 749f2d4c78d3..f1881ce39c63 100644
--- a/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
+++ b/Documentation/ABI/testing/sysfs-class-fpga-sec-mgr
@@ -65,3 +65,17 @@  Description:	Read-only. Returns a string describing the failure
 		idle state. If this file is read while a secure
 		update is in progress, then the read will fail with
 		EBUSY.
+
+What: 		/sys/class/fpga_sec_mgr/fpga_secX/update/hw_errinfo
+Date:		June 2021
+KernelVersion:	5.14
+Contact:	Russ Weight <russell.h.weight@intel.com>
+Description:	Read-only. Returns a 64 bit error value providing
+		hardware specific information that may be useful in
+		debugging errors that occur during FPGA image updates.
+		This file is only visible if the underlying device
+		supports it. The hw_errinfo value is only accessible
+		when the secure update engine is in the idle state.
+		If this file is read while a secure update is in
+		progress, then the read will fail with EBUSY.
+		Format: "0x%llx".
diff --git a/drivers/fpga/fpga-sec-mgr.c b/drivers/fpga/fpga-sec-mgr.c
index 48950843c2b4..cefe9182c3c3 100644
--- a/drivers/fpga/fpga-sec-mgr.c
+++ b/drivers/fpga/fpga-sec-mgr.c
@@ -36,10 +36,17 @@  static void fpga_sec_set_error(struct fpga_sec_mgr *smgr, enum fpga_sec_err err_
 	smgr->err_code = err_code;
 }
 
+static void fpga_sec_set_hw_errinfo(struct fpga_sec_mgr *smgr)
+{
+	if (smgr->sops->get_hw_errinfo)
+		smgr->hw_errinfo = smgr->sops->get_hw_errinfo(smgr);
+}
+
 static void fpga_sec_dev_error(struct fpga_sec_mgr *smgr,
 			       enum fpga_sec_err err_code)
 {
 	fpga_sec_set_error(smgr, err_code);
+	fpga_sec_set_hw_errinfo(smgr);
 	smgr->sops->cancel(smgr);
 }
 
@@ -221,6 +228,23 @@  error_show(struct device *dev, struct device_attribute *attr, char *buf)
 }
 static DEVICE_ATTR_RO(error);
 
+static ssize_t
+hw_errinfo_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct fpga_sec_mgr *smgr = to_sec_mgr(dev);
+	int ret;
+
+	mutex_lock(&smgr->lock);
+	if (smgr->progress != FPGA_SEC_PROG_IDLE)
+		ret = -EBUSY;
+	else
+		ret = sysfs_emit(buf, "0x%llx\n", smgr->hw_errinfo);
+	mutex_unlock(&smgr->lock);
+
+	return ret;
+}
+static DEVICE_ATTR_RO(hw_errinfo);
+
 static ssize_t remaining_size_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
@@ -252,6 +276,7 @@  static ssize_t filename_store(struct device *dev, struct device_attribute *attr,
 	}
 
 	smgr->err_code = FPGA_SEC_ERR_NONE;
+	smgr->hw_errinfo = 0;
 	smgr->request_cancel = false;
 	smgr->progress = FPGA_SEC_PROG_READING;
 	reinit_completion(&smgr->update_done);
@@ -286,18 +311,31 @@  static ssize_t cancel_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_WO(cancel);
 
+static umode_t
+sec_mgr_update_visible(struct kobject *kobj, struct attribute *attr, int n)
+{
+	struct fpga_sec_mgr *smgr = to_sec_mgr(kobj_to_dev(kobj));
+
+	if (attr == &dev_attr_hw_errinfo.attr && !smgr->sops->get_hw_errinfo)
+		return 0;
+
+	return attr->mode;
+}
+
 static struct attribute *sec_mgr_update_attrs[] = {
 	&dev_attr_filename.attr,
 	&dev_attr_cancel.attr,
 	&dev_attr_status.attr,
 	&dev_attr_error.attr,
 	&dev_attr_remaining_size.attr,
+	&dev_attr_hw_errinfo.attr,
 	NULL,
 };
 
 static struct attribute_group sec_mgr_update_attr_group = {
 	.name = "update",
 	.attrs = sec_mgr_update_attrs,
+	.is_visible = sec_mgr_update_visible,
 };
 
 static ssize_t name_show(struct device *dev,
diff --git a/include/linux/fpga/fpga-sec-mgr.h b/include/linux/fpga/fpga-sec-mgr.h
index 0e1f50434024..a99bfd28f38c 100644
--- a/include/linux/fpga/fpga-sec-mgr.h
+++ b/include/linux/fpga/fpga-sec-mgr.h
@@ -40,6 +40,9 @@  enum fpga_sec_err {
  *			    function and is called at the completion
  *			    of the update, whether success or failure,
  *			    if the prepare function succeeded.
+ * @get_hw_errinfo:	    Optional: Return u64 hw specific error info.
+ *			    The software err_code may used to determine
+ *			    whether the hw error info is applicable.
  */
 struct fpga_sec_mgr_ops {
 	enum fpga_sec_err (*prepare)(struct fpga_sec_mgr *smgr);
@@ -47,6 +50,7 @@  struct fpga_sec_mgr_ops {
 	enum fpga_sec_err (*poll_complete)(struct fpga_sec_mgr *smgr);
 	enum fpga_sec_err (*cancel)(struct fpga_sec_mgr *smgr);
 	void (*cleanup)(struct fpga_sec_mgr *smgr);
+	u64 (*get_hw_errinfo)(struct fpga_sec_mgr *smgr);
 };
 
 /* Update progress codes */
@@ -72,6 +76,7 @@  struct fpga_sec_mgr {
 	enum fpga_sec_prog progress;
 	enum fpga_sec_prog err_state;	/* progress state at time of failure */
 	enum fpga_sec_err err_code;	/* security manager error code */
+	u64 hw_errinfo;			/* 64 bits of HW specific error info */
 	bool request_cancel;
 	bool driver_unload;
 	void *priv;