diff mbox series

[v3,17/27] powerpc/powernv/pmem: Implement the Read Error Log command

Message ID 20200221032720.33893-18-alastair@au1.ibm.com (mailing list archive)
State New, archived
Headers show
Series Add support for OpenCAPI Persistent Memory devices | expand

Commit Message

Alastair D'Silva Feb. 21, 2020, 3:27 a.m. UTC
From: Alastair D'Silva <alastair@d-silva.org>

The read error log command extracts information from the controller's
internal error log.

This patch exposes this information in 2 ways:
- During probe, if an error occurs & a log is available, print it to the
  console
- After probe, make the error log available to userspace via an IOCTL.
  Userspace is notified of pending error logs in a later patch
  ("powerpc/powernv/pmem: Forward events to userspace")

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
---
 arch/powerpc/platforms/powernv/pmem/ocxl.c    | 269 ++++++++++++++++++
 .../platforms/powernv/pmem/ocxl_internal.h    |   1 +
 include/uapi/nvdimm/ocxl-pmem.h               |  46 +++
 3 files changed, 316 insertions(+)
 create mode 100644 include/uapi/nvdimm/ocxl-pmem.h

Comments

Frederic Barrat March 3, 2020, 10:36 a.m. UTC | #1
Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> From: Alastair D'Silva <alastair@d-silva.org>
> 
> The read error log command extracts information from the controller's
> internal error log.
> 
> This patch exposes this information in 2 ways:
> - During probe, if an error occurs & a log is available, print it to the
>    console
> - After probe, make the error log available to userspace via an IOCTL.
>    Userspace is notified of pending error logs in a later patch
>    ("powerpc/powernv/pmem: Forward events to userspace")
> 
> Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> ---
>   arch/powerpc/platforms/powernv/pmem/ocxl.c    | 269 ++++++++++++++++++
>   .../platforms/powernv/pmem/ocxl_internal.h    |   1 +
>   include/uapi/nvdimm/ocxl-pmem.h               |  46 +++
>   3 files changed, 316 insertions(+)
>   create mode 100644 include/uapi/nvdimm/ocxl-pmem.h
> 
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index 63109a870d2c..2b64504f9129 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -447,10 +447,219 @@ static int file_release(struct inode *inode, struct file *file)
>   	return 0;
>   }
>   
> +/**
> + * error_log_header_parse() - Parse the first 64 bits of the error log command response
> + * @ocxlpmem: the device metadata
> + * @length: out, returns the number of bytes in the response (excluding the 64 bit header)
> + */
> +static int error_log_header_parse(struct ocxlpmem *ocxlpmem, u16 *length)
> +{
> +	int rc;
> +	u64 val;
> +


Empty line in the middle of declarations


> +	u16 data_identifier;
> +	u32 data_length;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset,
> +				     OCXL_LITTLE_ENDIAN, &val);
> +	if (rc)
> +		return rc;
> +
> +	data_identifier = val >> 48;
> +	data_length = val & 0xFFFF;
> +
> +	if (data_identifier != 0x454C) { // 'EL'
> +		dev_err(&ocxlpmem->dev,
> +			"Bad data identifier for error log data, expected 'EL', got '%2s' (%#x), data_length=%u\n",
> +			(char *)&data_identifier,
> +			(unsigned int)data_identifier, data_length);
> +		return -EINVAL;
> +	}
> +
> +	*length = data_length;
> +	return 0;
> +}
> +
> +static int error_log_offset_0x08(struct ocxlpmem *ocxlpmem,
> +				 u32 *log_identifier, u32 *program_ref_code)
> +{
> +	int rc;
> +	u64 val;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x08,
> +				     OCXL_LITTLE_ENDIAN, &val);
> +	if (rc)
> +		return rc;
> +
> +	*log_identifier = val >> 32;
> +	*program_ref_code = val & 0xFFFFFFFF;
> +
> +	return 0;
> +}
> +
> +static int read_error_log(struct ocxlpmem *ocxlpmem,
> +			  struct ioctl_ocxl_pmem_error_log *log, bool buf_is_user)
> +{
> +	u64 val;
> +	u16 user_buf_length;
> +	u16 buf_length;
> +	u16 i;
> +	int rc;
> +
> +	if (log->buf_size % 8)
> +		return -EINVAL;
> +
> +	rc = ocxlpmem_chi(ocxlpmem, &val);
> +	if (rc)
> +		goto out;



"out" will unlock a mutex not yet taken.



> +
> +	if (!(val & GLOBAL_MMIO_CHI_ELA))
> +		return -EAGAIN;
> +
> +	user_buf_length = log->buf_size;
> +
> +	mutex_lock(&ocxlpmem->admin_command.lock);
> +
> +	rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_ERRLOG);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_execute(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_complete_timeout(ocxlpmem, ADMIN_COMMAND_ERRLOG);
> +	if (rc < 0) {
> +		dev_warn(&ocxlpmem->dev, "Read error log timed out\n");
> +		goto out;
> +	}
> +
> +	rc = admin_response(ocxlpmem);
> +	if (rc < 0)
> +		goto out;
> +	if (rc != STATUS_SUCCESS) {
> +		warn_status(ocxlpmem, "Unexpected status from retrieve error log", rc);
> +		goto out;
> +	}
> +
> +
> +	rc = error_log_header_parse(ocxlpmem, &log->buf_size);
> +	if (rc)
> +		goto out;
> +	// log->buf_size now contains the returned buffer size, not the user size
> +
> +	rc = error_log_offset_0x08(ocxlpmem, &log->log_identifier,
> +				       &log->program_reference_code);
> +	if (rc)
> +		goto out;



Offset 0x08 gets a preferential treatment compared to 0x10 below and 
it's not clear why.
I would create a subfonction which parses all the fields linearly.



> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x10,
> +				     OCXL_LITTLE_ENDIAN, &val);
> +	if (rc)
> +		goto out;
> +
> +	log->error_log_type = val >> 56;
> +	log->action_flags = (log->error_log_type == OCXL_PMEM_ERROR_LOG_TYPE_GENERAL) ?
> +			    (val >> 32) & 0xFFFFFF : 0;
> +	log->power_on_seconds = val & 0xFFFFFFFF;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x18,
> +				     OCXL_LITTLE_ENDIAN, &log->timestamp);
> +	if (rc)
> +		goto out;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x20,
> +				     OCXL_HOST_ENDIAN, &log->wwid[0]);



A bit of a moot point, but is there a reason why some of those MMIO ops 
use OCXL_LITTLE_ENDIAN and the others OCXL_HOST_ENDIAN?



> +	if (rc)
> +		goto out;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x28,
> +				     OCXL_HOST_ENDIAN, &log->wwid[1]);
> +	if (rc)
> +		goto out;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x30,
> +				     OCXL_HOST_ENDIAN, (u64 *)log->fw_revision);
> +	if (rc)
> +		goto out;
> +	log->fw_revision[8] = '\0';
> +
> +	buf_length = (user_buf_length < log->buf_size) ?
> +		     user_buf_length : log->buf_size;
> +	for (i = 0; i < buf_length + 0x48; i += 8) {
> +		u64 val;
> +
> +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +					     ocxlpmem->admin_command.data_offset + i,
> +					     OCXL_HOST_ENDIAN, &val);
> +		if (rc)
> +			goto out;
> +
> +		if (buf_is_user) {
> +			if (copy_to_user(&log->buf[i], &val, sizeof(u64))) {
> +				rc = -EFAULT;
> +				goto out;
> +			}
> +		} else
> +			log->buf[i] = val;
> +	}



I think it could be a bit simplified by keeping the handling of the user 
buffer out of this function. Always call it with a kernel buffer. And 
have only one copy_to_user() call on the ioctl() path. You'd need to 
allocate a kernel buf on the ioctl path, but you're already doing it on 
the probe() path, so it should be doable to share code.



> +
> +	rc = admin_response_handled(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +out:
> +	mutex_unlock(&ocxlpmem->admin_command.lock);
> +	return rc;
> +
> +}
> +
> +static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
> +		struct ioctl_ocxl_pmem_error_log __user *uarg)
> +{
> +	struct ioctl_ocxl_pmem_error_log args;
> +	int rc;
> +
> +	if (copy_from_user(&args, uarg, sizeof(args)))
> +		return -EFAULT;
> +
> +	rc = read_error_log(ocxlpmem, &args, true);
> +	if (rc)
> +		return rc;
> +
> +	if (copy_to_user(uarg, &args, sizeof(args)))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
> +static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> +{
> +	struct ocxlpmem *ocxlpmem = file->private_data;
> +	int rc = -EINVAL;
> +
> +	switch (cmd) {
> +	case IOCTL_OCXL_PMEM_ERROR_LOG:
> +		rc = ioctl_error_log(ocxlpmem,
> +				     (struct ioctl_ocxl_pmem_error_log __user *)args);
> +		break;
> +	}
> +	return rc;
> +}
> +
>   static const struct file_operations fops = {
>   	.owner		= THIS_MODULE,
>   	.open		= file_open,
>   	.release	= file_release,
> +	.unlocked_ioctl = file_ioctl,
> +	.compat_ioctl   = file_ioctl,
>   };
>   
>   /**
> @@ -527,6 +736,60 @@ static int read_device_metadata(struct ocxlpmem *ocxlpmem)
>   	return 0;
>   }
>   
> +static const char *decode_error_log_type(u8 error_log_type)
> +{
> +	switch (error_log_type) {
> +	case 0x00:
> +		return "general";
> +	case 0x01:
> +		return "predictive failure";
> +	case 0x02:
> +		return "thermal warning";
> +	case 0x03:
> +		return "data loss";
> +	case 0x04:
> +		return "health & performance";
> +	default:
> +		return "unknown";
> +	}
> +}
> +
> +static void dump_error_log(struct ocxlpmem *ocxlpmem)
> +{
> +	struct ioctl_ocxl_pmem_error_log log;
> +	u32 buf_size;
> +	u8 *buf;
> +	int rc;
> +
> +	if (ocxlpmem->admin_command.data_size == 0)
> +		return;
> +
> +	buf_size = ocxlpmem->admin_command.data_size - 0x48;
> +	buf = kzalloc(buf_size, GFP_KERNEL);
> +	if (!buf)
> +		return;
> +
> +	log.buf = buf;
> +	log.buf_size = buf_size;
> +
> +	rc = read_error_log(ocxlpmem, &log, false);
> +	if (rc < 0)
> +		goto out;
> +
> +	dev_warn(&ocxlpmem->dev,
> +		 "OCXL PMEM Error log: WWID=0x%016llx%016llx LID=0x%x PRC=%x type=0x%x %s, Uptime=%u seconds timestamp=0x%llx\n",
> +		 log.wwid[0], log.wwid[1],
> +		 log.log_identifier, log.program_reference_code,
> +		 log.error_log_type,
> +		 decode_error_log_type(log.error_log_type),
> +		 log.power_on_seconds, log.timestamp);
> +	print_hex_dump(KERN_WARNING, "buf", DUMP_PREFIX_OFFSET, 16, 1, buf,
> +		       log.buf_size, false);


dev_warn already logs a warning. Isn't KERN_DEBUG more appropriate for 
the hex dump?



> +
> +out:
> +	kfree(buf);
> +}
> +
>   /**
>    * probe_function0() - Set up function 0 for an OpenCAPI persistent memory device
>    * This is important as it enables templates higher than 0 across all other functions,
> @@ -568,6 +831,7 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	struct ocxlpmem *ocxlpmem;
>   	int rc;
>   	u16 elapsed, timeout;
> +	u64 chi;
>   
>   	if (PCI_FUNC(pdev->devfn) == 0)
>   		return probe_function0(pdev);
> @@ -667,6 +931,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	return 0;
>   
>   err:
> +	if (ocxlpmem &&
> +		    (ocxlpmem_chi(ocxlpmem, &chi) == 0) &&
> +		    (chi & GLOBAL_MMIO_CHI_ELA))
> +		dump_error_log(ocxlpmem);
> +
>   	/*
>   	 * Further cleanup is done in the release handler via free_ocxlpmem()
>   	 * This allows us to keep the character device live to handle IOCTLs to
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> index d2d81fec7bb1..b953ee522ed4 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> @@ -5,6 +5,7 @@
>   #include <linux/cdev.h>
>   #include <misc/ocxl.h>
>   #include <linux/libnvdimm.h>
> +#include <uapi/nvdimm/ocxl-pmem.h>


Can't we limit the extra include to ocxl.c?

Completely unrelated, but ocxl.c contains most of the code for this 
driver. We should consider renaming it to ocxlpmem.c or something along 
those lines, since it does a lot more than just interfacing with the 
opencapi interface. And would avoid confusion with an other already 
existing ocxl.c file.



>   #include <linux/mm.h>
>   
>   #define LABEL_AREA_SIZE	(1UL << PA_SECTION_SHIFT)
> diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
> new file mode 100644
> index 000000000000..b10f8ac0c20f
> --- /dev/null
> +++ b/include/uapi/nvdimm/ocxl-pmem.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> +/* Copyright 2017 IBM Corp. */
> +#ifndef _UAPI_OCXL_SCM_H
> +#define _UAPI_OCXL_SCM_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +#define OCXL_PMEM_ERROR_LOG_ACTION_RESET	(1 << (32-32))
> +#define OCXL_PMEM_ERROR_LOG_ACTION_CHKFW	(1 << (53-32))
> +#define OCXL_PMEM_ERROR_LOG_ACTION_REPLACE	(1 << (54-32))
> +#define OCXL_PMEM_ERROR_LOG_ACTION_DUMP		(1 << (55-32))
> +
> +#define OCXL_PMEM_ERROR_LOG_TYPE_GENERAL		(0x00)
> +#define OCXL_PMEM_ERROR_LOG_TYPE_PREDICTIVE_FAILURE	(0x01)
> +#define OCXL_PMEM_ERROR_LOG_TYPE_THERMAL_WARNING	(0x02)
> +#define OCXL_PMEM_ERROR_LOG_TYPE_DATA_LOSS		(0x03)
> +#define OCXL_PMEM_ERROR_LOG_TYPE_HEALTH_PERFORMANCE	(0x04)
> +
> +struct ioctl_ocxl_pmem_error_log {
> +	__u32 log_identifier; /* out */
> +	__u32 program_reference_code; /* out */
> +	__u32 action_flags; /* out, recommended course of action */
> +	__u32 power_on_seconds; /* out, Number of seconds the controller has been on when the error occurred */
> +	__u64 timestamp; /* out, relative time since the current IPL */
> +	__u64 wwid[2]; /* out, the NAA formatted WWID associated with the controller */
> +	char  fw_revision[8+1]; /* out, firmware revision as null terminated text */


The 8+1 size will make the compiler add some padding here. Are we 
confident that all the compilers, at least on powerpc, will do the same 
thing and we can guarantee a kernel ABI? I would play it safe and have a 
discussion with folks who understand compilers better.



> +	__u16 buf_size; /* in/out, buffer size provided/required.
> +			 * If required is greater than provided, the buffer
> +			 * will be truncated to the amount provided. If its
> +			 * less, then only the required bytes will be populated.
> +			 * If it is 0, then there are no more error log entries.
> +			 */
> +	__u8  error_log_type;
> +	__u8  reserved1;
> +	__u32 reserved2;
> +	__u64 reserved3[2];
> +	__u8 *buf; /* pointer to output buffer */
> +};
> +
> +/* ioctl numbers */
> +#define OCXL_PMEM_MAGIC 0x5C


Randomly picked?
See (and add entry in) Documentation/userspace-api/ioctl/ioctl-number.rst


   Fred



> +/* SCM devices */
> +#define IOCTL_OCXL_PMEM_ERROR_LOG			_IOWR(OCXL_PMEM_MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
> +
> +#endif /* _UAPI_OCXL_SCM_H */
>
Andrew Donnellan March 4, 2020, 5:58 a.m. UTC | #2
On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <alastair@d-silva.org>
> 
> The read error log command extracts information from the controller's
> internal error log.
> 
> This patch exposes this information in 2 ways:
> - During probe, if an error occurs & a log is available, print it to the
>    console
> - After probe, make the error log available to userspace via an IOCTL.
>    Userspace is notified of pending error logs in a later patch
>    ("powerpc/powernv/pmem: Forward events to userspace")
> 
> Signed-off-by: Alastair D'Silva <alastair@d-silva.org>

A few minor style checks at 
https://openpower.xyz/job/snowpatch/job/snowpatch-linux-checkpatch/11787//artifact/linux/checkpatch.log

We should also add some documentation for the user interfaces we're 
adding (same applies for all the remaining patches in this series that 
add more interfaces).

> ---
>   arch/powerpc/platforms/powernv/pmem/ocxl.c    | 269 ++++++++++++++++++
>   .../platforms/powernv/pmem/ocxl_internal.h    |   1 +
>   include/uapi/nvdimm/ocxl-pmem.h               |  46 +++
>   3 files changed, 316 insertions(+)
>   create mode 100644 include/uapi/nvdimm/ocxl-pmem.h
> 
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index 63109a870d2c..2b64504f9129 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -447,10 +447,219 @@ static int file_release(struct inode *inode, struct file *file)
>   	return 0;
>   }
>   
> +/**
> + * error_log_header_parse() - Parse the first 64 bits of the error log command response
> + * @ocxlpmem: the device metadata
> + * @length: out, returns the number of bytes in the response (excluding the 64 bit header)
> + */
> +static int error_log_header_parse(struct ocxlpmem *ocxlpmem, u16 *length)
> +{
> +	int rc;
> +	u64 val;
> +
> +	u16 data_identifier;
> +	u32 data_length;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset,
> +				     OCXL_LITTLE_ENDIAN, &val);
> +	if (rc)
> +		return rc;
> +
> +	data_identifier = val >> 48;
> +	data_length = val & 0xFFFF;
> +
> +	if (data_identifier != 0x454C) { // 'EL'
> +		dev_err(&ocxlpmem->dev,
> +			"Bad data identifier for error log data, expected 'EL', got '%2s' (%#x), data_length=%u\n",
> +			(char *)&data_identifier,
> +			(unsigned int)data_identifier, data_length);
> +		return -EINVAL;

This should be something other than EINVAL I think

> +	}
> +
> +	*length = data_length;
> +	return 0;
> +}
> +
> +static int error_log_offset_0x08(struct ocxlpmem *ocxlpmem,
> +				 u32 *log_identifier, u32 *program_ref_code)
> +{
> +	int rc;
> +	u64 val;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x08,
> +				     OCXL_LITTLE_ENDIAN, &val);
> +	if (rc)
> +		return rc;
> +
> +	*log_identifier = val >> 32;
> +	*program_ref_code = val & 0xFFFFFFFF;
> +
> +	return 0;
> +}
> +
> +static int read_error_log(struct ocxlpmem *ocxlpmem,
> +			  struct ioctl_ocxl_pmem_error_log *log, bool buf_is_user)
> +{
> +	u64 val;
> +	u16 user_buf_length;
> +	u16 buf_length;
> +	u16 i;
> +	int rc;
> +
> +	if (log->buf_size % 8)
> +		return -EINVAL;
> +
> +	rc = ocxlpmem_chi(ocxlpmem, &val);
> +	if (rc)
> +		goto out;
> +
> +	if (!(val & GLOBAL_MMIO_CHI_ELA))
> +		return -EAGAIN;
> +
> +	user_buf_length = log->buf_size;
> +
> +	mutex_lock(&ocxlpmem->admin_command.lock);
> +
> +	rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_ERRLOG);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_execute(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_complete_timeout(ocxlpmem, ADMIN_COMMAND_ERRLOG);
> +	if (rc < 0) {
> +		dev_warn(&ocxlpmem->dev, "Read error log timed out\n");
> +		goto out;
> +	}
> +
> +	rc = admin_response(ocxlpmem);
> +	if (rc < 0)
> +		goto out;
> +	if (rc != STATUS_SUCCESS) {
> +		warn_status(ocxlpmem, "Unexpected status from retrieve error log", rc);
> +		goto out;
> +	}
> +
> +
> +	rc = error_log_header_parse(ocxlpmem, &log->buf_size);
> +	if (rc)
> +		goto out;
> +	// log->buf_size now contains the returned buffer size, not the user size

In the event that the log is truncated to fit the user buffer, we return 
the full log size, I assume this is intentional to signal it's truncated 
as per the nd stuff?

> +
> +	rc = error_log_offset_0x08(ocxlpmem, &log->log_identifier,
> +				       &log->program_reference_code);
> +	if (rc)
> +		goto out;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x10,
> +				     OCXL_LITTLE_ENDIAN, &val);
> +	if (rc)
> +		goto out;
> +
> +	log->error_log_type = val >> 56;
> +	log->action_flags = (log->error_log_type == OCXL_PMEM_ERROR_LOG_TYPE_GENERAL) ?
> +			    (val >> 32) & 0xFFFFFF : 0;
> +	log->power_on_seconds = val & 0xFFFFFFFF;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x18,
> +				     OCXL_LITTLE_ENDIAN, &log->timestamp);
> +	if (rc)
> +		goto out;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x20,
> +				     OCXL_HOST_ENDIAN, &log->wwid[0]);
> +	if (rc)
> +		goto out;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x28,
> +				     OCXL_HOST_ENDIAN, &log->wwid[1]);
> +	if (rc)
> +		goto out;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +				     ocxlpmem->admin_command.data_offset + 0x30,
> +				     OCXL_HOST_ENDIAN, (u64 *)log->fw_revision);

Why the difference between HOST and LITTLE_ENDIAN between these fields?

> +	if (rc)
> +		goto out;
> +	log->fw_revision[8] = '\0';
> +
> +	buf_length = (user_buf_length < log->buf_size) ?
> +		     user_buf_length : log->buf_size;
> +	for (i = 0; i < buf_length + 0x48; i += 8) {

+ 0x48 here doesn't look right...

> +		u64 val;
> +
> +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +					     ocxlpmem->admin_command.data_offset + i,

...did you mean to add 0x48 here?

> +					     OCXL_HOST_ENDIAN, &val);
> +		if (rc)
> +			goto out;
> +
> +		if (buf_is_user) {
> +			if (copy_to_user(&log->buf[i], &val, sizeof(u64))) {
> +				rc = -EFAULT;
> +				goto out;
> +			}
> +		} else
> +			log->buf[i] = val;

Please use braces consistently on both sides of if/else.

> +	}
> +
> +	rc = admin_response_handled(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +out:
> +	mutex_unlock(&ocxlpmem->admin_command.lock);
> +	return rc;
> +
> +}
> +
> +static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
> +		struct ioctl_ocxl_pmem_error_log __user *uarg)
> +{
> +	struct ioctl_ocxl_pmem_error_log args;
> +	int rc;
> +
> +	if (copy_from_user(&args, uarg, sizeof(args)))
> +		return -EFAULT;
> +
> +	rc = read_error_log(ocxlpmem, &args, true);
> +	if (rc)
> +		return rc;
> +
> +	if (copy_to_user(uarg, &args, sizeof(args)))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
> +static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
> +{
> +	struct ocxlpmem *ocxlpmem = file->private_data;
> +	int rc = -EINVAL;
> +
> +	switch (cmd) {
> +	case IOCTL_OCXL_PMEM_ERROR_LOG:
> +		rc = ioctl_error_log(ocxlpmem,
> +				     (struct ioctl_ocxl_pmem_error_log __user *)args);
> +		break;
> +	}
> +	return rc;
> +}
> +
>   static const struct file_operations fops = {
>   	.owner		= THIS_MODULE,
>   	.open		= file_open,
>   	.release	= file_release,
> +	.unlocked_ioctl = file_ioctl,
> +	.compat_ioctl   = file_ioctl,
>   };
>   
>   /**
> @@ -527,6 +736,60 @@ static int read_device_metadata(struct ocxlpmem *ocxlpmem)
>   	return 0;
>   }
>   
> +static const char *decode_error_log_type(u8 error_log_type)
> +{
> +	switch (error_log_type) {
> +	case 0x00:
> +		return "general";
> +	case 0x01:
> +		return "predictive failure";
> +	case 0x02:
> +		return "thermal warning";
> +	case 0x03:
> +		return "data loss";
> +	case 0x04:
> +		return "health & performance";
> +	default:
> +		return "unknown";
> +	}
> +}
> +
> +static void dump_error_log(struct ocxlpmem *ocxlpmem)
> +{
> +	struct ioctl_ocxl_pmem_error_log log;
> +	u32 buf_size;
> +	u8 *buf;
> +	int rc;
> +
> +	if (ocxlpmem->admin_command.data_size == 0)
> +		return;
> +
> +	buf_size = ocxlpmem->admin_command.data_size - 0x48;
> +	buf = kzalloc(buf_size, GFP_KERNEL);
> +	if (!buf)
> +		return;
> +
> +	log.buf = buf;
> +	log.buf_size = buf_size;
> +
> +	rc = read_error_log(ocxlpmem, &log, false);
> +	if (rc < 0)
> +		goto out;
> +
> +	dev_warn(&ocxlpmem->dev,
> +		 "OCXL PMEM Error log: WWID=0x%016llx%016llx LID=0x%x PRC=%x type=0x%x %s, Uptime=%u seconds timestamp=0x%llx\n",
> +		 log.wwid[0], log.wwid[1],
> +		 log.log_identifier, log.program_reference_code,
> +		 log.error_log_type,
> +		 decode_error_log_type(log.error_log_type),
> +		 log.power_on_seconds, log.timestamp);
> +	print_hex_dump(KERN_WARNING, "buf", DUMP_PREFIX_OFFSET, 16, 1, buf,
> +		       log.buf_size, false); > +
> +out:
> +	kfree(buf);
> +}
> +
>   /**
>    * probe_function0() - Set up function 0 for an OpenCAPI persistent memory device
>    * This is important as it enables templates higher than 0 across all other functions,
> @@ -568,6 +831,7 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	struct ocxlpmem *ocxlpmem;
>   	int rc;
>   	u16 elapsed, timeout;
> +	u64 chi;
>   
>   	if (PCI_FUNC(pdev->devfn) == 0)
>   		return probe_function0(pdev);
> @@ -667,6 +931,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	return 0;
>   
>   err:
> +	if (ocxlpmem &&
> +		    (ocxlpmem_chi(ocxlpmem, &chi) == 0) &&
> +		    (chi & GLOBAL_MMIO_CHI_ELA))
> +		dump_error_log(ocxlpmem);
> +
>   	/*
>   	 * Further cleanup is done in the release handler via free_ocxlpmem()
>   	 * This allows us to keep the character device live to handle IOCTLs to
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> index d2d81fec7bb1..b953ee522ed4 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> @@ -5,6 +5,7 @@
>   #include <linux/cdev.h>
>   #include <misc/ocxl.h>
>   #include <linux/libnvdimm.h>
> +#include <uapi/nvdimm/ocxl-pmem.h>
>   #include <linux/mm.h>
>   
>   #define LABEL_AREA_SIZE	(1UL << PA_SECTION_SHIFT)
> diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
> new file mode 100644
> index 000000000000..b10f8ac0c20f
> --- /dev/null
> +++ b/include/uapi/nvdimm/ocxl-pmem.h
> @@ -0,0 +1,46 @@
> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> +/* Copyright 2017 IBM Corp. */
> +#ifndef _UAPI_OCXL_SCM_H
> +#define _UAPI_OCXL_SCM_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +#define OCXL_PMEM_ERROR_LOG_ACTION_RESET	(1 << (32-32))
> +#define OCXL_PMEM_ERROR_LOG_ACTION_CHKFW	(1 << (53-32))
> +#define OCXL_PMEM_ERROR_LOG_ACTION_REPLACE	(1 << (54-32))
> +#define OCXL_PMEM_ERROR_LOG_ACTION_DUMP		(1 << (55-32))
> +
> +#define OCXL_PMEM_ERROR_LOG_TYPE_GENERAL		(0x00)
> +#define OCXL_PMEM_ERROR_LOG_TYPE_PREDICTIVE_FAILURE	(0x01)
> +#define OCXL_PMEM_ERROR_LOG_TYPE_THERMAL_WARNING	(0x02)
> +#define OCXL_PMEM_ERROR_LOG_TYPE_DATA_LOSS		(0x03)
> +#define OCXL_PMEM_ERROR_LOG_TYPE_HEALTH_PERFORMANCE	(0x04)
> +
> +struct ioctl_ocxl_pmem_error_log {
> +	__u32 log_identifier; /* out */
> +	__u32 program_reference_code; /* out */
> +	__u32 action_flags; /* out, recommended course of action */
> +	__u32 power_on_seconds; /* out, Number of seconds the controller has been on when the error occurred */
> +	__u64 timestamp; /* out, relative time since the current IPL */
> +	__u64 wwid[2]; /* out, the NAA formatted WWID associated with the controller */
> +	char  fw_revision[8+1]; /* out, firmware revision as null terminated text */
> +	__u16 buf_size; /* in/out, buffer size provided/required.
> +			 * If required is greater than provided, the buffer
> +			 * will be truncated to the amount provided. If its
> +			 * less, then only the required bytes will be populated.
> +			 * If it is 0, then there are no more error log entries.
> +			 */
> +	__u8  error_log_type;
> +	__u8  reserved1;
> +	__u32 reserved2;
> +	__u64 reserved3[2];
> +	__u8 *buf; /* pointer to output buffer */
> +};
> +
> +/* ioctl numbers */
> +#define OCXL_PMEM_MAGIC 0x5C
> +/* SCM devices */
> +#define IOCTL_OCXL_PMEM_ERROR_LOG			_IOWR(OCXL_PMEM_MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
> +
> +#endif /* _UAPI_OCXL_SCM_H */
>
Alastair D'Silva March 5, 2020, 4:31 a.m. UTC | #3
On Tue, 2020-03-03 at 11:36 +0100, Frederic Barrat wrote:
> 
> Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <alastair@d-silva.org>
> > 
> > The read error log command extracts information from the
> > controller's
> > internal error log.
> > 
> > This patch exposes this information in 2 ways:
> > - During probe, if an error occurs & a log is available, print it
> > to the
> >    console
> > - After probe, make the error log available to userspace via an
> > IOCTL.
> >    Userspace is notified of pending error logs in a later patch
> >    ("powerpc/powernv/pmem: Forward events to userspace")
> > 
> > Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> > ---
> >   arch/powerpc/platforms/powernv/pmem/ocxl.c    | 269
> > ++++++++++++++++++
> >   .../platforms/powernv/pmem/ocxl_internal.h    |   1 +
> >   include/uapi/nvdimm/ocxl-pmem.h               |  46 +++
> >   3 files changed, 316 insertions(+)
> >   create mode 100644 include/uapi/nvdimm/ocxl-pmem.h
> > 
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index 63109a870d2c..2b64504f9129 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -447,10 +447,219 @@ static int file_release(struct inode *inode,
> > struct file *file)
> >   	return 0;
> >   }
> >   
> > +/**
> > + * error_log_header_parse() - Parse the first 64 bits of the error
> > log command response
> > + * @ocxlpmem: the device metadata
> > + * @length: out, returns the number of bytes in the response
> > (excluding the 64 bit header)
> > + */
> > +static int error_log_header_parse(struct ocxlpmem *ocxlpmem, u16
> > *length)
> > +{
> > +	int rc;
> > +	u64 val;
> > +
> 
> Empty line in the middle of declarations
> 

Ok

> 
> > +	u16 data_identifier;
> > +	u32 data_length;
> > +
> > +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +				     ocxlpmem-
> > >admin_command.data_offset,
> > +				     OCXL_LITTLE_ENDIAN, &val);
> > +	if (rc)
> > +		return rc;
> > +
> > +	data_identifier = val >> 48;
> > +	data_length = val & 0xFFFF;
> > +
> > +	if (data_identifier != 0x454C) { // 'EL'
> > +		dev_err(&ocxlpmem->dev,
> > +			"Bad data identifier for error log data,
> > expected 'EL', got '%2s' (%#x), data_length=%u\n",
> > +			(char *)&data_identifier,
> > +			(unsigned int)data_identifier, data_length);
> > +		return -EINVAL;
> > +	}
> > +
> > +	*length = data_length;
> > +	return 0;
> > +}
> > +
> > +static int error_log_offset_0x08(struct ocxlpmem *ocxlpmem,
> > +				 u32 *log_identifier, u32
> > *program_ref_code)
> > +{
> > +	int rc;
> > +	u64 val;
> > +
> > +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +				     ocxlpmem-
> > >admin_command.data_offset + 0x08,
> > +				     OCXL_LITTLE_ENDIAN, &val);
> > +	if (rc)
> > +		return rc;
> > +
> > +	*log_identifier = val >> 32;
> > +	*program_ref_code = val & 0xFFFFFFFF;
> > +
> > +	return 0;
> > +}
> > +
> > +static int read_error_log(struct ocxlpmem *ocxlpmem,
> > +			  struct ioctl_ocxl_pmem_error_log *log, bool
> > buf_is_user)
> > +{
> > +	u64 val;
> > +	u16 user_buf_length;
> > +	u16 buf_length;
> > +	u16 i;
> > +	int rc;
> > +
> > +	if (log->buf_size % 8)
> > +		return -EINVAL;
> > +
> > +	rc = ocxlpmem_chi(ocxlpmem, &val);
> > +	if (rc)
> > +		goto out;
> 
> 
> "out" will unlock a mutex not yet taken.
> 

Thanks, that should have been a return.

> 
> 
> > +
> > +	if (!(val & GLOBAL_MMIO_CHI_ELA))
> > +		return -EAGAIN;
> > +
> > +	user_buf_length = log->buf_size;
> > +
> > +	mutex_lock(&ocxlpmem->admin_command.lock);
> > +
> > +	rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_ERRLOG);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = admin_command_execute(ocxlpmem);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = admin_command_complete_timeout(ocxlpmem,
> > ADMIN_COMMAND_ERRLOG);
> > +	if (rc < 0) {
> > +		dev_warn(&ocxlpmem->dev, "Read error log timed out\n");
> > +		goto out;
> > +	}
> > +
> > +	rc = admin_response(ocxlpmem);
> > +	if (rc < 0)
> > +		goto out;
> > +	if (rc != STATUS_SUCCESS) {
> > +		warn_status(ocxlpmem, "Unexpected status from retrieve
> > error log", rc);
> > +		goto out;
> > +	}
> > +
> > +
> > +	rc = error_log_header_parse(ocxlpmem, &log->buf_size);
> > +	if (rc)
> > +		goto out;
> > +	// log->buf_size now contains the returned buffer size, not the
> > user size
> > +
> > +	rc = error_log_offset_0x08(ocxlpmem, &log->log_identifier,
> > +				       &log->program_reference_code);
> > +	if (rc)
> > +		goto out;
> 
> 
> Offset 0x08 gets a preferential treatment compared to 0x10 below and 
> it's not clear why.
> I would create a subfonction which parses all the fields linearly.
> 

I'll inline the contents of error_log_offset_0x08() - I can't see a big
benefit to factoring out the guts of that function.

> 
> 
> > +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +				     ocxlpmem-
> > >admin_command.data_offset + 0x10,
> > +				     OCXL_LITTLE_ENDIAN, &val);
> > +	if (rc)
> > +		goto out;
> > +
> > +	log->error_log_type = val >> 56;
> > +	log->action_flags = (log->error_log_type ==
> > OCXL_PMEM_ERROR_LOG_TYPE_GENERAL) ?
> > +			    (val >> 32) & 0xFFFFFF : 0;
> > +	log->power_on_seconds = val & 0xFFFFFFFF;
> > +
> > +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +				     ocxlpmem-
> > >admin_command.data_offset + 0x18,
> > +				     OCXL_LITTLE_ENDIAN, &log-
> > >timestamp);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +				     ocxlpmem-
> > >admin_command.data_offset + 0x20,
> > +				     OCXL_HOST_ENDIAN, &log->wwid[0]);
> 
> 
> A bit of a moot point, but is there a reason why some of those MMIO
> ops 
> use OCXL_LITTLE_ENDIAN and the others OCXL_HOST_ENDIAN?
> 

Some are little endian values, and some are binary data. WWIDs should
be LE though.

> 
> 
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +				     ocxlpmem-
> > >admin_command.data_offset + 0x28,
> > +				     OCXL_HOST_ENDIAN, &log->wwid[1]);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +				     ocxlpmem-
> > >admin_command.data_offset + 0x30,
> > +				     OCXL_HOST_ENDIAN, (u64 *)log-
> > >fw_revision);
> > +	if (rc)
> > +		goto out;
> > +	log->fw_revision[8] = '\0';
> > +
> > +	buf_length = (user_buf_length < log->buf_size) ?
> > +		     user_buf_length : log->buf_size;
> > +	for (i = 0; i < buf_length + 0x48; i += 8) {
> > +		u64 val;
> > +
> > +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +					     ocxlpmem-
> > >admin_command.data_offset + i,
> > +					     OCXL_HOST_ENDIAN, &val);
> > +		if (rc)
> > +			goto out;
> > +
> > +		if (buf_is_user) {
> > +			if (copy_to_user(&log->buf[i], &val,
> > sizeof(u64))) {
> > +				rc = -EFAULT;
> > +				goto out;
> > +			}
> > +		} else
> > +			log->buf[i] = val;
> > +	}
> 
> 
> I think it could be a bit simplified by keeping the handling of the
> user 
> buffer out of this function. Always call it with a kernel buffer.
> And 
> have only one copy_to_user() call on the ioctl() path. You'd need to 
> allocate a kernel buf on the ioctl path, but you're already doing it
> on 
> the probe() path, so it should be doable to share code.

Hmm, the problem then is that on the IOCTL side, I'll have to save,
modify, then restore the buf member of struct
ioctl_ocxl_pmem_error_log, which would be uglier.

> 
> 
> > +
> > +	rc = admin_response_handled(ocxlpmem);
> > +	if (rc)
> > +		goto out;
> > +
> > +out:
> > +	mutex_unlock(&ocxlpmem->admin_command.lock);
> > +	return rc;
> > +
> > +}
> > +
> > +static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
> > +		struct ioctl_ocxl_pmem_error_log __user *uarg)
> > +{
> > +	struct ioctl_ocxl_pmem_error_log args;
> > +	int rc;
> > +
> > +	if (copy_from_user(&args, uarg, sizeof(args)))
> > +		return -EFAULT;
> > +
> > +	rc = read_error_log(ocxlpmem, &args, true);
> > +	if (rc)
> > +		return rc;
> > +
> > +	if (copy_to_user(uarg, &args, sizeof(args)))
> > +		return -EFAULT;
> > +
> > +	return 0;
> > +}
> > +
> > +static long file_ioctl(struct file *file, unsigned int cmd,
> > unsigned long args)
> > +{
> > +	struct ocxlpmem *ocxlpmem = file->private_data;
> > +	int rc = -EINVAL;
> > +
> > +	switch (cmd) {
> > +	case IOCTL_OCXL_PMEM_ERROR_LOG:
> > +		rc = ioctl_error_log(ocxlpmem,
> > +				     (struct ioctl_ocxl_pmem_error_log
> > __user *)args);
> > +		break;
> > +	}
> > +	return rc;
> > +}
> > +
> >   static const struct file_operations fops = {
> >   	.owner		= THIS_MODULE,
> >   	.open		= file_open,
> >   	.release	= file_release,
> > +	.unlocked_ioctl = file_ioctl,
> > +	.compat_ioctl   = file_ioctl,
> >   };
> >   
> >   /**
> > @@ -527,6 +736,60 @@ static int read_device_metadata(struct
> > ocxlpmem *ocxlpmem)
> >   	return 0;
> >   }
> >   
> > +static const char *decode_error_log_type(u8 error_log_type)
> > +{
> > +	switch (error_log_type) {
> > +	case 0x00:
> > +		return "general";
> > +	case 0x01:
> > +		return "predictive failure";
> > +	case 0x02:
> > +		return "thermal warning";
> > +	case 0x03:
> > +		return "data loss";
> > +	case 0x04:
> > +		return "health & performance";
> > +	default:
> > +		return "unknown";
> > +	}
> > +}
> > +
> > +static void dump_error_log(struct ocxlpmem *ocxlpmem)
> > +{
> > +	struct ioctl_ocxl_pmem_error_log log;
> > +	u32 buf_size;
> > +	u8 *buf;
> > +	int rc;
> > +
> > +	if (ocxlpmem->admin_command.data_size == 0)
> > +		return;
> > +
> > +	buf_size = ocxlpmem->admin_command.data_size - 0x48;
> > +	buf = kzalloc(buf_size, GFP_KERNEL);
> > +	if (!buf)
> > +		return;
> > +
> > +	log.buf = buf;
> > +	log.buf_size = buf_size;
> > +
> > +	rc = read_error_log(ocxlpmem, &log, false);
> > +	if (rc < 0)
> > +		goto out;
> > +
> > +	dev_warn(&ocxlpmem->dev,
> > +		 "OCXL PMEM Error log: WWID=0x%016llx%016llx LID=0x%x
> > PRC=%x type=0x%x %s, Uptime=%u seconds timestamp=0x%llx\n",
> > +		 log.wwid[0], log.wwid[1],
> > +		 log.log_identifier, log.program_reference_code,
> > +		 log.error_log_type,
> > +		 decode_error_log_type(log.error_log_type),
> > +		 log.power_on_seconds, log.timestamp);
> > +	print_hex_dump(KERN_WARNING, "buf", DUMP_PREFIX_OFFSET, 16, 1,
> > buf,
> > +		       log.buf_size, false);
> 
> dev_warn already logs a warning. Isn't KERN_DEBUG more appropriate
> for 
> the hex dump?
> 
> 

The hex dump is associated binary data for the warning, it doesn't
replicate the contents of the message.

> 
> > +
> > +out:
> > +	kfree(buf);
> > +}
> > +
> >   /**
> >    * probe_function0() - Set up function 0 for an OpenCAPI
> > persistent memory device
> >    * This is important as it enables templates higher than 0 across
> > all other functions,
> > @@ -568,6 +831,7 @@ static int probe(struct pci_dev *pdev, const
> > struct pci_device_id *ent)
> >   	struct ocxlpmem *ocxlpmem;
> >   	int rc;
> >   	u16 elapsed, timeout;
> > +	u64 chi;
> >   
> >   	if (PCI_FUNC(pdev->devfn) == 0)
> >   		return probe_function0(pdev);
> > @@ -667,6 +931,11 @@ static int probe(struct pci_dev *pdev, const
> > struct pci_device_id *ent)
> >   	return 0;
> >   
> >   err:
> > +	if (ocxlpmem &&
> > +		    (ocxlpmem_chi(ocxlpmem, &chi) == 0) &&
> > +		    (chi & GLOBAL_MMIO_CHI_ELA))
> > +		dump_error_log(ocxlpmem);
> > +
> >   	/*
> >   	 * Further cleanup is done in the release handler via
> > free_ocxlpmem()
> >   	 * This allows us to keep the character device live to handle
> > IOCTLs to
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > index d2d81fec7bb1..b953ee522ed4 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > @@ -5,6 +5,7 @@
> >   #include <linux/cdev.h>
> >   #include <misc/ocxl.h>
> >   #include <linux/libnvdimm.h>
> > +#include <uapi/nvdimm/ocxl-pmem.h>
> 
> Can't we limit the extra include to ocxl.c?
> 

Yes, there are no consumers referred to in ocxl_interal.[hc]

> Completely unrelated, but ocxl.c contains most of the code for this 
> driver. We should consider renaming it to ocxlpmem.c or something
> along 
> those lines, since it does a lot more than just interfacing with the 
> opencapi interface. And would avoid confusion with an other already 
> existing ocxl.c file.
> 

Ok, my thinking was that it's already in a pmem directory, but I can
see arguments both ways.

> 
> >   #include <linux/mm.h>
> >   
> >   #define LABEL_AREA_SIZE	(1UL << PA_SECTION_SHIFT)
> > diff --git a/include/uapi/nvdimm/ocxl-pmem.h
> > b/include/uapi/nvdimm/ocxl-pmem.h
> > new file mode 100644
> > index 000000000000..b10f8ac0c20f
> > --- /dev/null
> > +++ b/include/uapi/nvdimm/ocxl-pmem.h
> > @@ -0,0 +1,46 @@
> > +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
> > +/* Copyright 2017 IBM Corp. */
> > +#ifndef _UAPI_OCXL_SCM_H
> > +#define _UAPI_OCXL_SCM_H
> > +
> > +#include <linux/types.h>
> > +#include <linux/ioctl.h>
> > +
> > +#define OCXL_PMEM_ERROR_LOG_ACTION_RESET	(1 << (32-32))
> > +#define OCXL_PMEM_ERROR_LOG_ACTION_CHKFW	(1 << (53-32))
> > +#define OCXL_PMEM_ERROR_LOG_ACTION_REPLACE	(1 << (54-32))
> > +#define OCXL_PMEM_ERROR_LOG_ACTION_DUMP		(1 << (55-32))
> > +
> > +#define OCXL_PMEM_ERROR_LOG_TYPE_GENERAL		(0x00)
> > +#define OCXL_PMEM_ERROR_LOG_TYPE_PREDICTIVE_FAILURE	(0x01)
> > +#define OCXL_PMEM_ERROR_LOG_TYPE_THERMAL_WARNING	(0x02)
> > +#define OCXL_PMEM_ERROR_LOG_TYPE_DATA_LOSS		(0x03)
> > +#define OCXL_PMEM_ERROR_LOG_TYPE_HEALTH_PERFORMANCE	(0x04)
> > +
> > +struct ioctl_ocxl_pmem_error_log {
> > +	__u32 log_identifier; /* out */
> > +	__u32 program_reference_code; /* out */
> > +	__u32 action_flags; /* out, recommended course of action */
> > +	__u32 power_on_seconds; /* out, Number of seconds the
> > controller has been on when the error occurred */
> > +	__u64 timestamp; /* out, relative time since the current IPL */
> > +	__u64 wwid[2]; /* out, the NAA formatted WWID associated with
> > the controller */
> > +	char  fw_revision[8+1]; /* out, firmware revision as null
> > terminated text */
> 
> The 8+1 size will make the compiler add some padding here. Are we 
> confident that all the compilers, at least on powerpc, will do the
> same 
> thing and we can guarantee a kernel ABI? I would play it safe and
> have a 
> discussion with folks who understand compilers better.
> 

I'll add some explicit padding.

> 
> 
> > +	__u16 buf_size; /* in/out, buffer size provided/required.
> > +			 * If required is greater than provided, the
> > buffer
> > +			 * will be truncated to the amount provided. If
> > its
> > +			 * less, then only the required bytes will be
> > populated.
> > +			 * If it is 0, then there are no more error log
> > entries.
> > +			 */
> > +	__u8  error_log_type;
> > +	__u8  reserved1;
> > +	__u32 reserved2;
> > +	__u64 reserved3[2];
> > +	__u8 *buf; /* pointer to output buffer */
> > +};
> > +
> > +/* ioctl numbers */
> > +#define OCXL_PMEM_MAGIC 0x5C
> 
> Randomly picked?
> See (and add entry in) Documentation/userspace-api/ioctl/ioctl-
> number.rst
> 
Ok

> 
>    Fred
> 
> 
> 
> > +/* SCM devices */
> > +#define IOCTL_OCXL_PMEM_ERROR_LOG			_IOWR(OCXL_PMEM
> > _MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
> > +
> > +#endif /* _UAPI_OCXL_SCM_H */
> >
Frederic Barrat March 5, 2020, 9:33 a.m. UTC | #4
>>> +	if (rc)
>>> +		goto out;
>>> +
>>> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
>>> +				     ocxlpmem-
>>>> admin_command.data_offset + 0x28,
>>> +				     OCXL_HOST_ENDIAN, &log->wwid[1]);
>>> +	if (rc)
>>> +		goto out;
>>> +
>>> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
>>> +				     ocxlpmem-
>>>> admin_command.data_offset + 0x30,
>>> +				     OCXL_HOST_ENDIAN, (u64 *)log-
>>>> fw_revision);
>>> +	if (rc)
>>> +		goto out;
>>> +	log->fw_revision[8] = '\0';
>>> +
>>> +	buf_length = (user_buf_length < log->buf_size) ?
>>> +		     user_buf_length : log->buf_size;
>>> +	for (i = 0; i < buf_length + 0x48; i += 8) {
>>> +		u64 val;
>>> +
>>> +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
>>> +					     ocxlpmem-
>>>> admin_command.data_offset + i,
>>> +					     OCXL_HOST_ENDIAN, &val);
>>> +		if (rc)
>>> +			goto out;
>>> +
>>> +		if (buf_is_user) {
>>> +			if (copy_to_user(&log->buf[i], &val,
>>> sizeof(u64))) {
>>> +				rc = -EFAULT;
>>> +				goto out;
>>> +			}
>>> +		} else
>>> +			log->buf[i] = val;
>>> +	}
>>
>>
>> I think it could be a bit simplified by keeping the handling of the
>> user
>> buffer out of this function. Always call it with a kernel buffer.
>> And
>> have only one copy_to_user() call on the ioctl() path. You'd need to
>> allocate a kernel buf on the ioctl path, but you're already doing it
>> on
>> the probe() path, so it should be doable to share code.
> 
> Hmm, the problem then is that on the IOCTL side, I'll have to save,
> modify, then restore the buf member of struct
> ioctl_ocxl_pmem_error_log, which would be uglier.


buf is just an output buffer. All you'd need to do is allocate a kernel 
buf, like it's already done for the "probe" case in dump_error_log(). 
And add a global copy_to_user() of the buf at the end of the ioctl path, 
instead of having multiple smaller copy_to_user() in the loop here.
copy_to_user() is a bit expensive so it's usually better to regroup 
them. I think it's easy here and make sense since that function is also 
trying to handle both a kernel and user space bufffers.
But we're not in a critical path, and after this patch, there are others 
copying out mmio content to user buffers and those don't have a kernel 
buffer to handle, so the copy_to_user() in a loop makes things easier.
So I guess the conclusion is whatever you think is the easiest...



>>
>>
>>> +
>>> +	rc = admin_response_handled(ocxlpmem);
>>> +	if (rc)
>>> +		goto out;
>>> +
>>> +out:
>>> +	mutex_unlock(&ocxlpmem->admin_command.lock);
>>> +	return rc;
>>> +
>>> +}
>>> +
>>> +static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
>>> +		struct ioctl_ocxl_pmem_error_log __user *uarg)
>>> +{
>>> +	struct ioctl_ocxl_pmem_error_log args;
>>> +	int rc;
>>> +
>>> +	if (copy_from_user(&args, uarg, sizeof(args)))
>>> +		return -EFAULT;
>>> +
>>> +	rc = read_error_log(ocxlpmem, &args, true);
>>> +	if (rc)
>>> +		return rc;
>>> +
>>> +	if (copy_to_user(uarg, &args, sizeof(args)))
>>> +		return -EFAULT;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static long file_ioctl(struct file *file, unsigned int cmd,
>>> unsigned long args)
>>> +{
>>> +	struct ocxlpmem *ocxlpmem = file->private_data;
>>> +	int rc = -EINVAL;
>>> +
>>> +	switch (cmd) {
>>> +	case IOCTL_OCXL_PMEM_ERROR_LOG:
>>> +		rc = ioctl_error_log(ocxlpmem,
>>> +				     (struct ioctl_ocxl_pmem_error_log
>>> __user *)args);
>>> +		break;
>>> +	}
>>> +	return rc;
>>> +}
>>> +
>>>    static const struct file_operations fops = {
>>>    	.owner		= THIS_MODULE,
>>>    	.open		= file_open,
>>>    	.release	= file_release,
>>> +	.unlocked_ioctl = file_ioctl,
>>> +	.compat_ioctl   = file_ioctl,
>>>    };
>>>    
>>>    /**
>>> @@ -527,6 +736,60 @@ static int read_device_metadata(struct
>>> ocxlpmem *ocxlpmem)
>>>    	return 0;
>>>    }
>>>    
>>> +static const char *decode_error_log_type(u8 error_log_type)
>>> +{
>>> +	switch (error_log_type) {
>>> +	case 0x00:
>>> +		return "general";
>>> +	case 0x01:
>>> +		return "predictive failure";
>>> +	case 0x02:
>>> +		return "thermal warning";
>>> +	case 0x03:
>>> +		return "data loss";
>>> +	case 0x04:
>>> +		return "health & performance";
>>> +	default:
>>> +		return "unknown";
>>> +	}
>>> +}
>>> +
>>> +static void dump_error_log(struct ocxlpmem *ocxlpmem)
>>> +{
>>> +	struct ioctl_ocxl_pmem_error_log log;
>>> +	u32 buf_size;
>>> +	u8 *buf;
>>> +	int rc;
>>> +
>>> +	if (ocxlpmem->admin_command.data_size == 0)
>>> +		return;
>>> +
>>> +	buf_size = ocxlpmem->admin_command.data_size - 0x48;
>>> +	buf = kzalloc(buf_size, GFP_KERNEL);
>>> +	if (!buf)
>>> +		return;
>>> +
>>> +	log.buf = buf;
>>> +	log.buf_size = buf_size;
>>> +
>>> +	rc = read_error_log(ocxlpmem, &log, false);
>>> +	if (rc < 0)
>>> +		goto out;
>>> +
>>> +	dev_warn(&ocxlpmem->dev,
>>> +		 "OCXL PMEM Error log: WWID=0x%016llx%016llx LID=0x%x
>>> PRC=%x type=0x%x %s, Uptime=%u seconds timestamp=0x%llx\n",
>>> +		 log.wwid[0], log.wwid[1],
>>> +		 log.log_identifier, log.program_reference_code,
>>> +		 log.error_log_type,
>>> +		 decode_error_log_type(log.error_log_type),
>>> +		 log.power_on_seconds, log.timestamp);
>>> +	print_hex_dump(KERN_WARNING, "buf", DUMP_PREFIX_OFFSET, 16, 1,
>>> buf,
>>> +		       log.buf_size, false);
>>
>> dev_warn already logs a warning. Isn't KERN_DEBUG more appropriate
>> for
>> the hex dump?
>>
>>
> 
> The hex dump is associated binary data for the warning, it doesn't
> replicate the contents of the message.


My point is not about duplicating, it's about exposing an hexadecimal 
dump where it makes sense. Those DEBUG and WARNING tags are used for 
filtering content. For example to know what to display on the console. A 
warning to mention that a device hits a serious error is perfectly fine. 
A hexadecimal dump which is going to be meaningless to most everybody is 
not. The system is not crashing, so it's not like the console is our 
last hope. I think the dump is debug data and should be tagged as such.

   Fred



>>
>>> +
>>> +out:
>>> +	kfree(buf);
>>> +}
>>> +
>>>    /**
>>>     * probe_function0() - Set up function 0 for an OpenCAPI
>>> persistent memory device
>>>     * This is important as it enables templates higher than 0 across
>>> all other functions,
>>> @@ -568,6 +831,7 @@ static int probe(struct pci_dev *pdev, const
>>> struct pci_device_id *ent)
>>>    	struct ocxlpmem *ocxlpmem;
>>>    	int rc;
>>>    	u16 elapsed, timeout;
>>> +	u64 chi;
>>>    
>>>    	if (PCI_FUNC(pdev->devfn) == 0)
>>>    		return probe_function0(pdev);
>>> @@ -667,6 +931,11 @@ static int probe(struct pci_dev *pdev, const
>>> struct pci_device_id *ent)
>>>    	return 0;
>>>    
>>>    err:
>>> +	if (ocxlpmem &&
>>> +		    (ocxlpmem_chi(ocxlpmem, &chi) == 0) &&
>>> +		    (chi & GLOBAL_MMIO_CHI_ELA))
>>> +		dump_error_log(ocxlpmem);
>>> +
>>>    	/*
>>>    	 * Further cleanup is done in the release handler via
>>> free_ocxlpmem()
>>>    	 * This allows us to keep the character device live to handle
>>> IOCTLs to
>>> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
>>> b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
>>> index d2d81fec7bb1..b953ee522ed4 100644
>>> --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
>>> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
>>> @@ -5,6 +5,7 @@
>>>    #include <linux/cdev.h>
>>>    #include <misc/ocxl.h>
>>>    #include <linux/libnvdimm.h>
>>> +#include <uapi/nvdimm/ocxl-pmem.h>
>>
>> Can't we limit the extra include to ocxl.c?
>>
> 
> Yes, there are no consumers referred to in ocxl_interal.[hc]
> 
>> Completely unrelated, but ocxl.c contains most of the code for this
>> driver. We should consider renaming it to ocxlpmem.c or something
>> along
>> those lines, since it does a lot more than just interfacing with the
>> opencapi interface. And would avoid confusion with an other already
>> existing ocxl.c file.
>>
> 
> Ok, my thinking was that it's already in a pmem directory, but I can
> see arguments both ways.
> 
>>
>>>    #include <linux/mm.h>
>>>    
>>>    #define LABEL_AREA_SIZE	(1UL << PA_SECTION_SHIFT)
>>> diff --git a/include/uapi/nvdimm/ocxl-pmem.h
>>> b/include/uapi/nvdimm/ocxl-pmem.h
>>> new file mode 100644
>>> index 000000000000..b10f8ac0c20f
>>> --- /dev/null
>>> +++ b/include/uapi/nvdimm/ocxl-pmem.h
>>> @@ -0,0 +1,46 @@
>>> +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
>>> +/* Copyright 2017 IBM Corp. */
>>> +#ifndef _UAPI_OCXL_SCM_H
>>> +#define _UAPI_OCXL_SCM_H
>>> +
>>> +#include <linux/types.h>
>>> +#include <linux/ioctl.h>
>>> +
>>> +#define OCXL_PMEM_ERROR_LOG_ACTION_RESET	(1 << (32-32))
>>> +#define OCXL_PMEM_ERROR_LOG_ACTION_CHKFW	(1 << (53-32))
>>> +#define OCXL_PMEM_ERROR_LOG_ACTION_REPLACE	(1 << (54-32))
>>> +#define OCXL_PMEM_ERROR_LOG_ACTION_DUMP		(1 << (55-32))
>>> +
>>> +#define OCXL_PMEM_ERROR_LOG_TYPE_GENERAL		(0x00)
>>> +#define OCXL_PMEM_ERROR_LOG_TYPE_PREDICTIVE_FAILURE	(0x01)
>>> +#define OCXL_PMEM_ERROR_LOG_TYPE_THERMAL_WARNING	(0x02)
>>> +#define OCXL_PMEM_ERROR_LOG_TYPE_DATA_LOSS		(0x03)
>>> +#define OCXL_PMEM_ERROR_LOG_TYPE_HEALTH_PERFORMANCE	(0x04)
>>> +
>>> +struct ioctl_ocxl_pmem_error_log {
>>> +	__u32 log_identifier; /* out */
>>> +	__u32 program_reference_code; /* out */
>>> +	__u32 action_flags; /* out, recommended course of action */
>>> +	__u32 power_on_seconds; /* out, Number of seconds the
>>> controller has been on when the error occurred */
>>> +	__u64 timestamp; /* out, relative time since the current IPL */
>>> +	__u64 wwid[2]; /* out, the NAA formatted WWID associated with
>>> the controller */
>>> +	char  fw_revision[8+1]; /* out, firmware revision as null
>>> terminated text */
>>
>> The 8+1 size will make the compiler add some padding here. Are we
>> confident that all the compilers, at least on powerpc, will do the
>> same
>> thing and we can guarantee a kernel ABI? I would play it safe and
>> have a
>> discussion with folks who understand compilers better.
>>
> 
> I'll add some explicit padding.
> 
>>
>>
>>> +	__u16 buf_size; /* in/out, buffer size provided/required.
>>> +			 * If required is greater than provided, the
>>> buffer
>>> +			 * will be truncated to the amount provided. If
>>> its
>>> +			 * less, then only the required bytes will be
>>> populated.
>>> +			 * If it is 0, then there are no more error log
>>> entries.
>>> +			 */
>>> +	__u8  error_log_type;
>>> +	__u8  reserved1;
>>> +	__u32 reserved2;
>>> +	__u64 reserved3[2];
>>> +	__u8 *buf; /* pointer to output buffer */
>>> +};
>>> +
>>> +/* ioctl numbers */
>>> +#define OCXL_PMEM_MAGIC 0x5C
>>
>> Randomly picked?
>> See (and add entry in) Documentation/userspace-api/ioctl/ioctl-
>> number.rst
>>
> Ok
> 
>>
>>     Fred
>>
>>
>>
>>> +/* SCM devices */
>>> +#define IOCTL_OCXL_PMEM_ERROR_LOG			_IOWR(OCXL_PMEM
>>> _MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
>>> +
>>> +#endif /* _UAPI_OCXL_SCM_H */
>>>
diff mbox series

Patch

diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
index 63109a870d2c..2b64504f9129 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
@@ -447,10 +447,219 @@  static int file_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+/**
+ * error_log_header_parse() - Parse the first 64 bits of the error log command response
+ * @ocxlpmem: the device metadata
+ * @length: out, returns the number of bytes in the response (excluding the 64 bit header)
+ */
+static int error_log_header_parse(struct ocxlpmem *ocxlpmem, u16 *length)
+{
+	int rc;
+	u64 val;
+
+	u16 data_identifier;
+	u32 data_length;
+
+	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+				     ocxlpmem->admin_command.data_offset,
+				     OCXL_LITTLE_ENDIAN, &val);
+	if (rc)
+		return rc;
+
+	data_identifier = val >> 48;
+	data_length = val & 0xFFFF;
+
+	if (data_identifier != 0x454C) { // 'EL'
+		dev_err(&ocxlpmem->dev,
+			"Bad data identifier for error log data, expected 'EL', got '%2s' (%#x), data_length=%u\n",
+			(char *)&data_identifier,
+			(unsigned int)data_identifier, data_length);
+		return -EINVAL;
+	}
+
+	*length = data_length;
+	return 0;
+}
+
+static int error_log_offset_0x08(struct ocxlpmem *ocxlpmem,
+				 u32 *log_identifier, u32 *program_ref_code)
+{
+	int rc;
+	u64 val;
+
+	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+				     ocxlpmem->admin_command.data_offset + 0x08,
+				     OCXL_LITTLE_ENDIAN, &val);
+	if (rc)
+		return rc;
+
+	*log_identifier = val >> 32;
+	*program_ref_code = val & 0xFFFFFFFF;
+
+	return 0;
+}
+
+static int read_error_log(struct ocxlpmem *ocxlpmem,
+			  struct ioctl_ocxl_pmem_error_log *log, bool buf_is_user)
+{
+	u64 val;
+	u16 user_buf_length;
+	u16 buf_length;
+	u16 i;
+	int rc;
+
+	if (log->buf_size % 8)
+		return -EINVAL;
+
+	rc = ocxlpmem_chi(ocxlpmem, &val);
+	if (rc)
+		goto out;
+
+	if (!(val & GLOBAL_MMIO_CHI_ELA))
+		return -EAGAIN;
+
+	user_buf_length = log->buf_size;
+
+	mutex_lock(&ocxlpmem->admin_command.lock);
+
+	rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_ERRLOG);
+	if (rc)
+		goto out;
+
+	rc = admin_command_execute(ocxlpmem);
+	if (rc)
+		goto out;
+
+	rc = admin_command_complete_timeout(ocxlpmem, ADMIN_COMMAND_ERRLOG);
+	if (rc < 0) {
+		dev_warn(&ocxlpmem->dev, "Read error log timed out\n");
+		goto out;
+	}
+
+	rc = admin_response(ocxlpmem);
+	if (rc < 0)
+		goto out;
+	if (rc != STATUS_SUCCESS) {
+		warn_status(ocxlpmem, "Unexpected status from retrieve error log", rc);
+		goto out;
+	}
+
+
+	rc = error_log_header_parse(ocxlpmem, &log->buf_size);
+	if (rc)
+		goto out;
+	// log->buf_size now contains the returned buffer size, not the user size
+
+	rc = error_log_offset_0x08(ocxlpmem, &log->log_identifier,
+				       &log->program_reference_code);
+	if (rc)
+		goto out;
+
+	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+				     ocxlpmem->admin_command.data_offset + 0x10,
+				     OCXL_LITTLE_ENDIAN, &val);
+	if (rc)
+		goto out;
+
+	log->error_log_type = val >> 56;
+	log->action_flags = (log->error_log_type == OCXL_PMEM_ERROR_LOG_TYPE_GENERAL) ?
+			    (val >> 32) & 0xFFFFFF : 0;
+	log->power_on_seconds = val & 0xFFFFFFFF;
+
+	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+				     ocxlpmem->admin_command.data_offset + 0x18,
+				     OCXL_LITTLE_ENDIAN, &log->timestamp);
+	if (rc)
+		goto out;
+
+	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+				     ocxlpmem->admin_command.data_offset + 0x20,
+				     OCXL_HOST_ENDIAN, &log->wwid[0]);
+	if (rc)
+		goto out;
+
+	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+				     ocxlpmem->admin_command.data_offset + 0x28,
+				     OCXL_HOST_ENDIAN, &log->wwid[1]);
+	if (rc)
+		goto out;
+
+	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+				     ocxlpmem->admin_command.data_offset + 0x30,
+				     OCXL_HOST_ENDIAN, (u64 *)log->fw_revision);
+	if (rc)
+		goto out;
+	log->fw_revision[8] = '\0';
+
+	buf_length = (user_buf_length < log->buf_size) ?
+		     user_buf_length : log->buf_size;
+	for (i = 0; i < buf_length + 0x48; i += 8) {
+		u64 val;
+
+		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+					     ocxlpmem->admin_command.data_offset + i,
+					     OCXL_HOST_ENDIAN, &val);
+		if (rc)
+			goto out;
+
+		if (buf_is_user) {
+			if (copy_to_user(&log->buf[i], &val, sizeof(u64))) {
+				rc = -EFAULT;
+				goto out;
+			}
+		} else
+			log->buf[i] = val;
+	}
+
+	rc = admin_response_handled(ocxlpmem);
+	if (rc)
+		goto out;
+
+out:
+	mutex_unlock(&ocxlpmem->admin_command.lock);
+	return rc;
+
+}
+
+static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
+		struct ioctl_ocxl_pmem_error_log __user *uarg)
+{
+	struct ioctl_ocxl_pmem_error_log args;
+	int rc;
+
+	if (copy_from_user(&args, uarg, sizeof(args)))
+		return -EFAULT;
+
+	rc = read_error_log(ocxlpmem, &args, true);
+	if (rc)
+		return rc;
+
+	if (copy_to_user(uarg, &args, sizeof(args)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
+{
+	struct ocxlpmem *ocxlpmem = file->private_data;
+	int rc = -EINVAL;
+
+	switch (cmd) {
+	case IOCTL_OCXL_PMEM_ERROR_LOG:
+		rc = ioctl_error_log(ocxlpmem,
+				     (struct ioctl_ocxl_pmem_error_log __user *)args);
+		break;
+	}
+	return rc;
+}
+
 static const struct file_operations fops = {
 	.owner		= THIS_MODULE,
 	.open		= file_open,
 	.release	= file_release,
+	.unlocked_ioctl = file_ioctl,
+	.compat_ioctl   = file_ioctl,
 };
 
 /**
@@ -527,6 +736,60 @@  static int read_device_metadata(struct ocxlpmem *ocxlpmem)
 	return 0;
 }
 
+static const char *decode_error_log_type(u8 error_log_type)
+{
+	switch (error_log_type) {
+	case 0x00:
+		return "general";
+	case 0x01:
+		return "predictive failure";
+	case 0x02:
+		return "thermal warning";
+	case 0x03:
+		return "data loss";
+	case 0x04:
+		return "health & performance";
+	default:
+		return "unknown";
+	}
+}
+
+static void dump_error_log(struct ocxlpmem *ocxlpmem)
+{
+	struct ioctl_ocxl_pmem_error_log log;
+	u32 buf_size;
+	u8 *buf;
+	int rc;
+
+	if (ocxlpmem->admin_command.data_size == 0)
+		return;
+
+	buf_size = ocxlpmem->admin_command.data_size - 0x48;
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	log.buf = buf;
+	log.buf_size = buf_size;
+
+	rc = read_error_log(ocxlpmem, &log, false);
+	if (rc < 0)
+		goto out;
+
+	dev_warn(&ocxlpmem->dev,
+		 "OCXL PMEM Error log: WWID=0x%016llx%016llx LID=0x%x PRC=%x type=0x%x %s, Uptime=%u seconds timestamp=0x%llx\n",
+		 log.wwid[0], log.wwid[1],
+		 log.log_identifier, log.program_reference_code,
+		 log.error_log_type,
+		 decode_error_log_type(log.error_log_type),
+		 log.power_on_seconds, log.timestamp);
+	print_hex_dump(KERN_WARNING, "buf", DUMP_PREFIX_OFFSET, 16, 1, buf,
+		       log.buf_size, false);
+
+out:
+	kfree(buf);
+}
+
 /**
  * probe_function0() - Set up function 0 for an OpenCAPI persistent memory device
  * This is important as it enables templates higher than 0 across all other functions,
@@ -568,6 +831,7 @@  static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct ocxlpmem *ocxlpmem;
 	int rc;
 	u16 elapsed, timeout;
+	u64 chi;
 
 	if (PCI_FUNC(pdev->devfn) == 0)
 		return probe_function0(pdev);
@@ -667,6 +931,11 @@  static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	return 0;
 
 err:
+	if (ocxlpmem &&
+		    (ocxlpmem_chi(ocxlpmem, &chi) == 0) &&
+		    (chi & GLOBAL_MMIO_CHI_ELA))
+		dump_error_log(ocxlpmem);
+
 	/*
 	 * Further cleanup is done in the release handler via free_ocxlpmem()
 	 * This allows us to keep the character device live to handle IOCTLs to
diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
index d2d81fec7bb1..b953ee522ed4 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
@@ -5,6 +5,7 @@ 
 #include <linux/cdev.h>
 #include <misc/ocxl.h>
 #include <linux/libnvdimm.h>
+#include <uapi/nvdimm/ocxl-pmem.h>
 #include <linux/mm.h>
 
 #define LABEL_AREA_SIZE	(1UL << PA_SECTION_SHIFT)
diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
new file mode 100644
index 000000000000..b10f8ac0c20f
--- /dev/null
+++ b/include/uapi/nvdimm/ocxl-pmem.h
@@ -0,0 +1,46 @@ 
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/* Copyright 2017 IBM Corp. */
+#ifndef _UAPI_OCXL_SCM_H
+#define _UAPI_OCXL_SCM_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define OCXL_PMEM_ERROR_LOG_ACTION_RESET	(1 << (32-32))
+#define OCXL_PMEM_ERROR_LOG_ACTION_CHKFW	(1 << (53-32))
+#define OCXL_PMEM_ERROR_LOG_ACTION_REPLACE	(1 << (54-32))
+#define OCXL_PMEM_ERROR_LOG_ACTION_DUMP		(1 << (55-32))
+
+#define OCXL_PMEM_ERROR_LOG_TYPE_GENERAL		(0x00)
+#define OCXL_PMEM_ERROR_LOG_TYPE_PREDICTIVE_FAILURE	(0x01)
+#define OCXL_PMEM_ERROR_LOG_TYPE_THERMAL_WARNING	(0x02)
+#define OCXL_PMEM_ERROR_LOG_TYPE_DATA_LOSS		(0x03)
+#define OCXL_PMEM_ERROR_LOG_TYPE_HEALTH_PERFORMANCE	(0x04)
+
+struct ioctl_ocxl_pmem_error_log {
+	__u32 log_identifier; /* out */
+	__u32 program_reference_code; /* out */
+	__u32 action_flags; /* out, recommended course of action */
+	__u32 power_on_seconds; /* out, Number of seconds the controller has been on when the error occurred */
+	__u64 timestamp; /* out, relative time since the current IPL */
+	__u64 wwid[2]; /* out, the NAA formatted WWID associated with the controller */
+	char  fw_revision[8+1]; /* out, firmware revision as null terminated text */
+	__u16 buf_size; /* in/out, buffer size provided/required.
+			 * If required is greater than provided, the buffer
+			 * will be truncated to the amount provided. If its
+			 * less, then only the required bytes will be populated.
+			 * If it is 0, then there are no more error log entries.
+			 */
+	__u8  error_log_type;
+	__u8  reserved1;
+	__u32 reserved2;
+	__u64 reserved3[2];
+	__u8 *buf; /* pointer to output buffer */
+};
+
+/* ioctl numbers */
+#define OCXL_PMEM_MAGIC 0x5C
+/* SCM devices */
+#define IOCTL_OCXL_PMEM_ERROR_LOG			_IOWR(OCXL_PMEM_MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
+
+#endif /* _UAPI_OCXL_SCM_H */