[v3,18/27] powerpc/powernv/pmem: Add controller dump IOCTLs
diff mbox series

Message ID 20200221032720.33893-19-alastair@au1.ibm.com
State New
Headers show
Series
  • Add support for OpenCAPI Persistent Memory devices
Related show

Commit Message

Alastair D'Silva Feb. 21, 2020, 3:27 a.m. UTC
From: Alastair D'Silva <alastair@d-silva.org>

This patch adds IOCTLs to allow userspace to request & fetch dumps
of the internal controller state.

This is useful during debugging or when a fatal error on the controller
has occurred.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
---
 arch/powerpc/platforms/powernv/pmem/ocxl.c | 132 +++++++++++++++++++++
 include/uapi/nvdimm/ocxl-pmem.h            |  15 +++
 2 files changed, 147 insertions(+)

Comments

Frederic Barrat March 3, 2020, 6:04 p.m. UTC | #1
Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> From: Alastair D'Silva <alastair@d-silva.org>
> 
> This patch adds IOCTLs to allow userspace to request & fetch dumps
> of the internal controller state.
> 
> This is useful during debugging or when a fatal error on the controller
> has occurred.
> 
> Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> ---
>   arch/powerpc/platforms/powernv/pmem/ocxl.c | 132 +++++++++++++++++++++
>   include/uapi/nvdimm/ocxl-pmem.h            |  15 +++
>   2 files changed, 147 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index 2b64504f9129..2cabafe1fc58 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -640,6 +640,124 @@ static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
>   	return 0;
>   }
>   
> +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
> +		struct ioctl_ocxl_pmem_controller_dump_data __user *uarg)
> +{
> +	struct ioctl_ocxl_pmem_controller_dump_data args;
> +	u16 i;
> +	u64 val;
> +	int rc;
> +
> +	if (copy_from_user(&args, uarg, sizeof(args)))
> +		return -EFAULT;
> +
> +	if (args.buf_size % 8)
> +		return -EINVAL;
> +
> +	if (args.buf_size > ocxlpmem->admin_command.data_size)
> +		return -EINVAL;
> +
> +	mutex_lock(&ocxlpmem->admin_command.lock);
> +
> +	rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP);
> +	if (rc)
> +		goto out;
> +
> +	val = ((u64)args.offset) << 32;
> +	val |= args.buf_size;
> +	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> +				      ocxlpmem->admin_command.request_offset + 0x08,
> +				      OCXL_LITTLE_ENDIAN, val);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_execute(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_complete_timeout(ocxlpmem,
> +					    ADMIN_COMMAND_CONTROLLER_DUMP);
> +	if (rc < 0) {
> +		dev_warn(&ocxlpmem->dev, "Controller dump timed out\n");
> +		goto out;
> +	}
> +
> +	rc = admin_response(ocxlpmem);
> +	if (rc < 0)
> +		goto out;
> +	if (rc != STATUS_SUCCESS) {
> +		warn_status(ocxlpmem,
> +			    "Unexpected status from retrieve error log",
> +			    rc);
> +		goto out;
> +	}



It would help if there was a comment indicating how the 3 ioctls are 
used. My understanding is that the userland is:
- requesting the controller to prepare a state dump
- then one or more ioctls to fetch the data. The number of calls 
required to get the full state really depends on the size of the buffer 
passed by user
- a last ioctl to tell the controller that we're done, presumably to let 
it free some resources.


> +
> +	for (i = 0; i < args.buf_size; i += 8) {
> +		u64 val;
> +
> +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +					     ocxlpmem->admin_command.data_offset + i,
> +					     OCXL_HOST_ENDIAN, &val);
> +		if (rc)
> +			goto out;
> +
> +		if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
> +			rc = -EFAULT;
> +			goto out;
> +		}
> +	}
> +
> +	if (copy_to_user(uarg, &args, sizeof(args))) {
> +		rc = -EFAULT;
> +		goto out;
> +	}
> +
> +	rc = admin_response_handled(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +out:
> +	mutex_unlock(&ocxlpmem->admin_command.lock);
> +	return rc;
> +}
> +
> +int request_controller_dump(struct ocxlpmem *ocxlpmem)
> +{
> +	int rc;
> +	u64 busy = 1;
> +
> +	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC,
> +				    OCXL_LITTLE_ENDIAN,
> +				    GLOBAL_MMIO_CHI_CDA);
> +


rc is not checked here.


> +
> +	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
> +				    OCXL_LITTLE_ENDIAN,
> +				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
> +	if (rc)
> +		return rc;
> +
> +	while (busy) {
> +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +					     GLOBAL_MMIO_HCI,
> +					     OCXL_LITTLE_ENDIAN, &busy);
> +		if (rc)
> +			return rc;
> +
> +		busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;


Setting 'busy' doesn't hurt, but it's not really useful, is it?

We should add some kind of timeout so that if the controller hits an 
issue, we don't spin in kernel space endlessly.



> +		cond_resched();
> +	}
> +
> +	return 0;
> +}
> +
> +static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
> +{
> +	return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
> +				    OCXL_LITTLE_ENDIAN,
> +				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED);
> +}
> +
>   static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
>   {
>   	struct ocxlpmem *ocxlpmem = file->private_data;
> @@ -650,7 +768,21 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
>   		rc = ioctl_error_log(ocxlpmem,
>   				     (struct ioctl_ocxl_pmem_error_log __user *)args);
>   		break;
> +
> +	case IOCTL_OCXL_PMEM_CONTROLLER_DUMP:
> +		rc = request_controller_dump(ocxlpmem);
> +		break;
> +
> +	case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA:
> +		rc = ioctl_controller_dump_data(ocxlpmem,
> +						(struct ioctl_ocxl_pmem_controller_dump_data __user *)args);
> +		break;
> +
> +	case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
> +		rc = ioctl_controller_dump_complete(ocxlpmem);
> +		break;
>   	}
> +
>   	return rc;
>   }
>   
> diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
> index b10f8ac0c20f..d4d8512d03f7 100644
> --- a/include/uapi/nvdimm/ocxl-pmem.h
> +++ b/include/uapi/nvdimm/ocxl-pmem.h
> @@ -38,9 +38,24 @@ struct ioctl_ocxl_pmem_error_log {
>   	__u8 *buf; /* pointer to output buffer */
>   };
>   
> +struct ioctl_ocxl_pmem_controller_dump_data {
> +	__u8 *buf; /* pointer to output buffer */


We only support 64-bit user app on powerpc, but using a pointer type in 
a kernel ABI is unusual. We should use a know size like __u64.
(also applies to buf pointer in struct ioctl_ocxl_pmem_error_log from 
previous patch too)

The rest of the structure will also be padded by the compiler, which we 
should avoid.

    Fred



> +	__u16 buf_size; /* in/out, buffer size provided/required.
> +			 * If required is greater than provided, the buffer
> +			 * will be truncated to the amount provided. If its
> +			 * less, then only the required bytes will be populated.
> +			 * If it is 0, then there is no more dump data available.
> +			 */
> +	__u32 offset; /* in, Offset within the dump */
> +	__u64 reserved[8];
> +};
> +
>   /* ioctl numbers */
>   #define OCXL_PMEM_MAGIC 0x5C
>   /* SCM devices */
>   #define IOCTL_OCXL_PMEM_ERROR_LOG			_IOWR(OCXL_PMEM_MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
> +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP			_IO(OCXL_PMEM_MAGIC, 0x02)
> +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA		_IOWR(OCXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE	_IO(OCXL_PMEM_MAGIC, 0x04)
>   
>   #endif /* _UAPI_OCXL_SCM_H */
>
Andrew Donnellan March 4, 2020, 6:53 a.m. UTC | #2
On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
> +		struct ioctl_ocxl_pmem_controller_dump_data __user *uarg)
> +{
> +	struct ioctl_ocxl_pmem_controller_dump_data args;
> +	u16 i;
> +	u64 val;
> +	int rc;
> +
> +	if (copy_from_user(&args, uarg, sizeof(args)))
> +		return -EFAULT;
> +
> +	if (args.buf_size % 8)
> +		return -EINVAL;
> +
> +	if (args.buf_size > ocxlpmem->admin_command.data_size)
> +		return -EINVAL;
> +
> +	mutex_lock(&ocxlpmem->admin_command.lock);
> +
> +	rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP);
> +	if (rc)
> +		goto out;
> +
> +	val = ((u64)args.offset) << 32;
> +	val |= args.buf_size;
> +	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> +				      ocxlpmem->admin_command.request_offset + 0x08,
> +				      OCXL_LITTLE_ENDIAN, val);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_execute(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_complete_timeout(ocxlpmem,
> +					    ADMIN_COMMAND_CONTROLLER_DUMP);
> +	if (rc < 0) {
> +		dev_warn(&ocxlpmem->dev, "Controller dump timed out\n");
> +		goto out;
> +	}
> +
> +	rc = admin_response(ocxlpmem);
> +	if (rc < 0)
> +		goto out;
> +	if (rc != STATUS_SUCCESS) {
> +		warn_status(ocxlpmem,
> +			    "Unexpected status from retrieve error log",

Controller dump

> +			    rc);
> +		goto out;
> +	}
> +
> +	for (i = 0; i < args.buf_size; i += 8) {
> +		u64 val;
> +
> +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +					     ocxlpmem->admin_command.data_offset + i,
> +					     OCXL_HOST_ENDIAN, &val);

Is a controller dump something where we want to do endian swapping?

Any reason we're not doing the usual check of the data identifier, 
additional data length etc?

> +		if (rc)
> +			goto out;
> +
> +		if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
> +			rc = -EFAULT;
> +			goto out;
> +		}
> +	}
> +
> +	if (copy_to_user(uarg, &args, sizeof(args))) {
> +		rc = -EFAULT;
> +		goto out;
> +	}
> +
> +	rc = admin_response_handled(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +out:
> +	mutex_unlock(&ocxlpmem->admin_command.lock);
> +	return rc;
> +}
> +
> +int request_controller_dump(struct ocxlpmem *ocxlpmem)
> +{
> +	int rc;
> +	u64 busy = 1;
> +
> +	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC,
> +				    OCXL_LITTLE_ENDIAN,
> +				    GLOBAL_MMIO_CHI_CDA);

This return code is ignored

> +
> +
> +	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
> +				    OCXL_LITTLE_ENDIAN,
> +				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
> +	if (rc)
> +		return rc;
> +
> +	while (busy) {
> +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +					     GLOBAL_MMIO_HCI,
> +					     OCXL_LITTLE_ENDIAN, &busy);
> +		if (rc)
> +			return rc;
> +
> +		busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
> +		cond_resched();
> +	}
> +
> +	return 0;
> +}
Alastair D'Silva March 5, 2020, 11:37 p.m. UTC | #3
On Tue, 2020-03-03 at 19:04 +0100, Frederic Barrat wrote:
> 
> Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <alastair@d-silva.org>
> > 
> > This patch adds IOCTLs to allow userspace to request & fetch dumps
> > of the internal controller state.
> > 
> > This is useful during debugging or when a fatal error on the
> > controller
> > has occurred.
> > 
> > Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> > ---
> >   arch/powerpc/platforms/powernv/pmem/ocxl.c | 132
> > +++++++++++++++++++++
> >   include/uapi/nvdimm/ocxl-pmem.h            |  15 +++
> >   2 files changed, 147 insertions(+)
> > 
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index 2b64504f9129..2cabafe1fc58 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -640,6 +640,124 @@ static int ioctl_error_log(struct ocxlpmem
> > *ocxlpmem,
> >   	return 0;
> >   }
> >   
> > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
> > +		struct ioctl_ocxl_pmem_controller_dump_data __user
> > *uarg)
> > +{
> > +	struct ioctl_ocxl_pmem_controller_dump_data args;
> > +	u16 i;
> > +	u64 val;
> > +	int rc;
> > +
> > +	if (copy_from_user(&args, uarg, sizeof(args)))
> > +		return -EFAULT;
> > +
> > +	if (args.buf_size % 8)
> > +		return -EINVAL;
> > +
> > +	if (args.buf_size > ocxlpmem->admin_command.data_size)
> > +		return -EINVAL;
> > +
> > +	mutex_lock(&ocxlpmem->admin_command.lock);
> > +
> > +	rc = admin_command_request(ocxlpmem,
> > ADMIN_COMMAND_CONTROLLER_DUMP);
> > +	if (rc)
> > +		goto out;
> > +
> > +	val = ((u64)args.offset) << 32;
> > +	val |= args.buf_size;
> > +	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > +				      ocxlpmem-
> > >admin_command.request_offset + 0x08,
> > +				      OCXL_LITTLE_ENDIAN, val);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = admin_command_execute(ocxlpmem);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = admin_command_complete_timeout(ocxlpmem,
> > +					    ADMIN_COMMAND_CONTROLLER_DU
> > MP);
> > +	if (rc < 0) {
> > +		dev_warn(&ocxlpmem->dev, "Controller dump timed
> > out\n");
> > +		goto out;
> > +	}
> > +
> > +	rc = admin_response(ocxlpmem);
> > +	if (rc < 0)
> > +		goto out;
> > +	if (rc != STATUS_SUCCESS) {
> > +		warn_status(ocxlpmem,
> > +			    "Unexpected status from retrieve error
> > log",
> > +			    rc);
> > +		goto out;
> > +	}
> 
> 
> It would help if there was a comment indicating how the 3 ioctls are 
> used. My understanding is that the userland is:
> - requesting the controller to prepare a state dump
> - then one or more ioctls to fetch the data. The number of calls 
> required to get the full state really depends on the size of the
> buffer 
> passed by user
> - a last ioctl to tell the controller that we're done, presumably to
> let 
> it free some resources.
> 

Ok, will add it to the blurb.
> 
> > +
> > +	for (i = 0; i < args.buf_size; i += 8) {
> > +		u64 val;
> > +
> > +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +					     ocxlpmem-
> > >admin_command.data_offset + i,
> > +					     OCXL_HOST_ENDIAN, &val);
> > +		if (rc)
> > +			goto out;
> > +
> > +		if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
> > +			rc = -EFAULT;
> > +			goto out;
> > +		}
> > +	}
> > +
> > +	if (copy_to_user(uarg, &args, sizeof(args))) {
> > +		rc = -EFAULT;
> > +		goto out;
> > +	}
> > +
> > +	rc = admin_response_handled(ocxlpmem);
> > +	if (rc)
> > +		goto out;
> > +
> > +out:
> > +	mutex_unlock(&ocxlpmem->admin_command.lock);
> > +	return rc;
> > +}
> > +
> > +int request_controller_dump(struct ocxlpmem *ocxlpmem)
> > +{
> > +	int rc;
> > +	u64 busy = 1;
> > +
> > +	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_CHIC,
> > +				    OCXL_LITTLE_ENDIAN,
> > +				    GLOBAL_MMIO_CHI_CDA);
> > +
> 
> rc is not checked here.

Whoops

> 
> 
> > +
> > +	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCI,
> > +				    OCXL_LITTLE_ENDIAN,
> > +				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
> > +	if (rc)
> > +		return rc;
> > +
> > +	while (busy) {
> > +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +					     GLOBAL_MMIO_HCI,
> > +					     OCXL_LITTLE_ENDIAN,
> > &busy);
> > +		if (rc)
> > +			return rc;
> > +
> > +		busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
> 
> Setting 'busy' doesn't hurt, but it's not really useful, is it?
> 
> We should add some kind of timeout so that if the controller hits an 
> issue, we don't spin in kernel space endlessly.
> 
> 

Here we are polling the controller dump bit of the HCI register until
the controller clears it - that line is masking off the bits we don't
care about.

I'll talk to the firmware team about adding a timeout for that to the
spec so we know how long to wait for before giving up.

> 
> > +		cond_resched();
> > +	}
> > +
> > +	return 0;
> > +}

> > +
> > +static int ioctl_controller_dump_complete(struct ocxlpmem
> > *ocxlpmem)
> > +{
> > +	return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCI,
> > +				    OCXL_LITTLE_ENDIAN,
> > +				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COL
> > LECTED);
> > +}
> > +
> >   static long file_ioctl(struct file *file, unsigned int cmd,
> > unsigned long args)
> >   {
> >   	struct ocxlpmem *ocxlpmem = file->private_data;
> > @@ -650,7 +768,21 @@ static long file_ioctl(struct file *file,
> > unsigned int cmd, unsigned long args)
> >   		rc = ioctl_error_log(ocxlpmem,
> >   				     (struct ioctl_ocxl_pmem_error_log
> > __user *)args);
> >   		break;
> > +
> > +	case IOCTL_OCXL_PMEM_CONTROLLER_DUMP:
> > +		rc = request_controller_dump(ocxlpmem);
> > +		break;
> > +
> > +	case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA:
> > +		rc = ioctl_controller_dump_data(ocxlpmem,
> > +						(struct
> > ioctl_ocxl_pmem_controller_dump_data __user *)args);
> > +		break;
> > +
> > +	case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
> > +		rc = ioctl_controller_dump_complete(ocxlpmem);
> > +		break;
> >   	}
> > +
> >   	return rc;
> >   }
> >   
> > diff --git a/include/uapi/nvdimm/ocxl-pmem.h
> > b/include/uapi/nvdimm/ocxl-pmem.h
> > index b10f8ac0c20f..d4d8512d03f7 100644
> > --- a/include/uapi/nvdimm/ocxl-pmem.h
> > +++ b/include/uapi/nvdimm/ocxl-pmem.h
> > @@ -38,9 +38,24 @@ struct ioctl_ocxl_pmem_error_log {
> >   	__u8 *buf; /* pointer to output buffer */
> >   };
> >   
> > +struct ioctl_ocxl_pmem_controller_dump_data {
> > +	__u8 *buf; /* pointer to output buffer */
> 
> We only support 64-bit user app on powerpc, but using a pointer type
> in 
> a kernel ABI is unusual. We should use a know size like __u64.
> (also applies to buf pointer in struct ioctl_ocxl_pmem_error_log
> from 
> previous patch too)
> 
> The rest of the structure will also be padded by the compiler, which
> we 
> should avoid.
> 
>     Fred
> 

Ok, I'll co-erce the pointers into a __u64.

> 
> 
> > +	__u16 buf_size; /* in/out, buffer size provided/required.
> > +			 * If required is greater than provided, the
> > buffer
> > +			 * will be truncated to the amount provided. If
> > its
> > +			 * less, then only the required bytes will be
> > populated.
> > +			 * If it is 0, then there is no more dump data
> > available.
> > +			 */
> > +	__u32 offset; /* in, Offset within the dump */
> > +	__u64 reserved[8];
> > +};
> > +
> >   /* ioctl numbers */
> >   #define OCXL_PMEM_MAGIC 0x5C
> >   /* SCM devices */
> >   #define IOCTL_OCXL_PMEM_ERROR_LOG			_IOWR(OCXL_PMEM
> > _MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
> > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP			_IO(OCX
> > L_PMEM_MAGIC, 0x02)
> > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA		_IOWR(O
> > CXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
> > +#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE	_IO(OCXL_PMEM_M
> > AGIC, 0x04)
> >   
> >   #endif /* _UAPI_OCXL_SCM_H */
> >
Alastair D'Silva March 6, 2020, 3:34 a.m. UTC | #4
On Wed, 2020-03-04 at 17:53 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > +static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
> > +		struct ioctl_ocxl_pmem_controller_dump_data __user
> > *uarg)
> > +{
> > +	struct ioctl_ocxl_pmem_controller_dump_data args;
> > +	u16 i;
> > +	u64 val;
> > +	int rc;
> > +
> > +	if (copy_from_user(&args, uarg, sizeof(args)))
> > +		return -EFAULT;
> > +
> > +	if (args.buf_size % 8)
> > +		return -EINVAL;
> > +
> > +	if (args.buf_size > ocxlpmem->admin_command.data_size)
> > +		return -EINVAL;
> > +
> > +	mutex_lock(&ocxlpmem->admin_command.lock);
> > +
> > +	rc = admin_command_request(ocxlpmem,
> > ADMIN_COMMAND_CONTROLLER_DUMP);
> > +	if (rc)
> > +		goto out;
> > +
> > +	val = ((u64)args.offset) << 32;
> > +	val |= args.buf_size;
> > +	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > +				      ocxlpmem-
> > >admin_command.request_offset + 0x08,
> > +				      OCXL_LITTLE_ENDIAN, val);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = admin_command_execute(ocxlpmem);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = admin_command_complete_timeout(ocxlpmem,
> > +					    ADMIN_COMMAND_CONTROLLER_DU
> > MP);
> > +	if (rc < 0) {
> > +		dev_warn(&ocxlpmem->dev, "Controller dump timed
> > out\n");
> > +		goto out;
> > +	}
> > +
> > +	rc = admin_response(ocxlpmem);
> > +	if (rc < 0)
> > +		goto out;
> > +	if (rc != STATUS_SUCCESS) {
> > +		warn_status(ocxlpmem,
> > +			    "Unexpected status from retrieve error
> > log",
> 
> Controller dump
> 

Ok

> > +			    rc);
> > +		goto out;
> > +	}
> > +
> > +	for (i = 0; i < args.buf_size; i += 8) {
> > +		u64 val;
> > +
> > +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +					     ocxlpmem-
> > >admin_command.data_offset + i,
> > +					     OCXL_HOST_ENDIAN, &val);
> 
> Is a controller dump something where we want to do endian swapping?
> 

No, we just have raw binary data that we want to pass through.
OCXL_HOST_ENDIAN does no swapping.

> Any reason we're not doing the usual check of the data identifier, 
> additional data length etc?
> 

I'll add that

> > +		if (rc)
> > +			goto out;
> > +
> > +		if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
> > +			rc = -EFAULT;
> > +			goto out;
> > +		}
> > +	}
> > +
> > +	if (copy_to_user(uarg, &args, sizeof(args))) {
> > +		rc = -EFAULT;
> > +		goto out;
> > +	}
> > +
> > +	rc = admin_response_handled(ocxlpmem);
> > +	if (rc)
> > +		goto out;
> > +
> > +out:
> > +	mutex_unlock(&ocxlpmem->admin_command.lock);
> > +	return rc;
> > +}
> > +
> > +int request_controller_dump(struct ocxlpmem *ocxlpmem)
> > +{
> > +	int rc;
> > +	u64 busy = 1;
> > +
> > +	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_CHIC,
> > +				    OCXL_LITTLE_ENDIAN,
> > +				    GLOBAL_MMIO_CHI_CDA);
> 
> This return code is ignored
> 
> > +
> > +
> > +	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCI,
> > +				    OCXL_LITTLE_ENDIAN,
> > +				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
> > +	if (rc)
> > +		return rc;
> > +
> > +	while (busy) {
> > +		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> > +					     GLOBAL_MMIO_HCI,
> > +					     OCXL_LITTLE_ENDIAN,
> > &busy);
> > +		if (rc)
> > +			return rc;
> > +
> > +		busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
> > +		cond_resched();
> > +	}
> > +
> > +	return 0;
> > +}
> 
>

Patch
diff mbox series

diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
index 2b64504f9129..2cabafe1fc58 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
@@ -640,6 +640,124 @@  static int ioctl_error_log(struct ocxlpmem *ocxlpmem,
 	return 0;
 }
 
+static int ioctl_controller_dump_data(struct ocxlpmem *ocxlpmem,
+		struct ioctl_ocxl_pmem_controller_dump_data __user *uarg)
+{
+	struct ioctl_ocxl_pmem_controller_dump_data args;
+	u16 i;
+	u64 val;
+	int rc;
+
+	if (copy_from_user(&args, uarg, sizeof(args)))
+		return -EFAULT;
+
+	if (args.buf_size % 8)
+		return -EINVAL;
+
+	if (args.buf_size > ocxlpmem->admin_command.data_size)
+		return -EINVAL;
+
+	mutex_lock(&ocxlpmem->admin_command.lock);
+
+	rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_CONTROLLER_DUMP);
+	if (rc)
+		goto out;
+
+	val = ((u64)args.offset) << 32;
+	val |= args.buf_size;
+	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
+				      ocxlpmem->admin_command.request_offset + 0x08,
+				      OCXL_LITTLE_ENDIAN, val);
+	if (rc)
+		goto out;
+
+	rc = admin_command_execute(ocxlpmem);
+	if (rc)
+		goto out;
+
+	rc = admin_command_complete_timeout(ocxlpmem,
+					    ADMIN_COMMAND_CONTROLLER_DUMP);
+	if (rc < 0) {
+		dev_warn(&ocxlpmem->dev, "Controller dump timed out\n");
+		goto out;
+	}
+
+	rc = admin_response(ocxlpmem);
+	if (rc < 0)
+		goto out;
+	if (rc != STATUS_SUCCESS) {
+		warn_status(ocxlpmem,
+			    "Unexpected status from retrieve error log",
+			    rc);
+		goto out;
+	}
+
+	for (i = 0; i < args.buf_size; i += 8) {
+		u64 val;
+
+		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+					     ocxlpmem->admin_command.data_offset + i,
+					     OCXL_HOST_ENDIAN, &val);
+		if (rc)
+			goto out;
+
+		if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
+			rc = -EFAULT;
+			goto out;
+		}
+	}
+
+	if (copy_to_user(uarg, &args, sizeof(args))) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	rc = admin_response_handled(ocxlpmem);
+	if (rc)
+		goto out;
+
+out:
+	mutex_unlock(&ocxlpmem->admin_command.lock);
+	return rc;
+}
+
+int request_controller_dump(struct ocxlpmem *ocxlpmem)
+{
+	int rc;
+	u64 busy = 1;
+
+	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_CHIC,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_CHI_CDA);
+
+
+	rc = ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP);
+	if (rc)
+		return rc;
+
+	while (busy) {
+		rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
+					     GLOBAL_MMIO_HCI,
+					     OCXL_LITTLE_ENDIAN, &busy);
+		if (rc)
+			return rc;
+
+		busy &= GLOBAL_MMIO_HCI_CONTROLLER_DUMP;
+		cond_resched();
+	}
+
+	return 0;
+}
+
+static int ioctl_controller_dump_complete(struct ocxlpmem *ocxlpmem)
+{
+	return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
+				    OCXL_LITTLE_ENDIAN,
+				    GLOBAL_MMIO_HCI_CONTROLLER_DUMP_COLLECTED);
+}
+
 static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
 {
 	struct ocxlpmem *ocxlpmem = file->private_data;
@@ -650,7 +768,21 @@  static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
 		rc = ioctl_error_log(ocxlpmem,
 				     (struct ioctl_ocxl_pmem_error_log __user *)args);
 		break;
+
+	case IOCTL_OCXL_PMEM_CONTROLLER_DUMP:
+		rc = request_controller_dump(ocxlpmem);
+		break;
+
+	case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA:
+		rc = ioctl_controller_dump_data(ocxlpmem,
+						(struct ioctl_ocxl_pmem_controller_dump_data __user *)args);
+		break;
+
+	case IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE:
+		rc = ioctl_controller_dump_complete(ocxlpmem);
+		break;
 	}
+
 	return rc;
 }
 
diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
index b10f8ac0c20f..d4d8512d03f7 100644
--- a/include/uapi/nvdimm/ocxl-pmem.h
+++ b/include/uapi/nvdimm/ocxl-pmem.h
@@ -38,9 +38,24 @@  struct ioctl_ocxl_pmem_error_log {
 	__u8 *buf; /* pointer to output buffer */
 };
 
+struct ioctl_ocxl_pmem_controller_dump_data {
+	__u8 *buf; /* pointer to output buffer */
+	__u16 buf_size; /* in/out, buffer size provided/required.
+			 * If required is greater than provided, the buffer
+			 * will be truncated to the amount provided. If its
+			 * less, then only the required bytes will be populated.
+			 * If it is 0, then there is no more dump data available.
+			 */
+	__u32 offset; /* in, Offset within the dump */
+	__u64 reserved[8];
+};
+
 /* ioctl numbers */
 #define OCXL_PMEM_MAGIC 0x5C
 /* SCM devices */
 #define IOCTL_OCXL_PMEM_ERROR_LOG			_IOWR(OCXL_PMEM_MAGIC, 0x01, struct ioctl_ocxl_pmem_error_log)
+#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP			_IO(OCXL_PMEM_MAGIC, 0x02)
+#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_DATA		_IOWR(OCXL_PMEM_MAGIC, 0x03, struct ioctl_ocxl_pmem_controller_dump_data)
+#define IOCTL_OCXL_PMEM_CONTROLLER_DUMP_COMPLETE	_IO(OCXL_PMEM_MAGIC, 0x04)
 
 #endif /* _UAPI_OCXL_SCM_H */