diff mbox series

[v3,16/27] powerpc/powernv/pmem: Register a character device for userspace to interact with

Message ID 20200221032720.33893-17-alastair@au1.ibm.com
State New, archived
Headers show
Series Add support for OpenCAPI Persistent Memory devices | expand

Commit Message

Alastair D'Silva Feb. 21, 2020, 3:27 a.m. UTC
From: Alastair D'Silva <alastair@d-silva.org>

This patch introduces a character device (/dev/ocxl-scmX) which further
patches will use to interact with userspace.

Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
---
 arch/powerpc/platforms/powernv/pmem/ocxl.c    | 116 +++++++++++++++++-
 .../platforms/powernv/pmem/ocxl_internal.h    |   2 +
 2 files changed, 116 insertions(+), 2 deletions(-)

Comments

Andrew Donnellan March 2, 2020, 5:34 a.m. UTC | #1
On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> From: Alastair D'Silva <alastair@d-silva.org>
> 
> This patch introduces a character device (/dev/ocxl-scmX) which further
> patches will use to interact with userspace.

As with the comments on other patches in this series, this commit 
message is lacking in explanation. What's the purpose of this device?

> 
> Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> ---
>   arch/powerpc/platforms/powernv/pmem/ocxl.c    | 116 +++++++++++++++++-
>   .../platforms/powernv/pmem/ocxl_internal.h    |   2 +
>   2 files changed, 116 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index b8bd7e703b19..63109a870d2c 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -10,6 +10,7 @@
>   #include <misc/ocxl.h>
>   #include <linux/delay.h>
>   #include <linux/ndctl.h>
> +#include <linux/fs.h>
>   #include <linux/mm_types.h>
>   #include <linux/memory_hotplug.h>
>   #include "ocxl_internal.h"
> @@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem *ocxlpmem)
>   
>   	free_minor(ocxlpmem);
>   
> +	if (ocxlpmem->cdev.owner)
> +		cdev_del(&ocxlpmem->cdev);
> +
>   	if (ocxlpmem->metadata_addr)
>   		devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);
>   
> @@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem *ocxlpmem)
>   	return device_register(&ocxlpmem->dev);
>   }
>   
> +static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
> +{
> +	put_device(&ocxlpmem->dev);
> +}
> +
> +static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
> +{
> +	return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
> +}
> +
> +static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
> +{
> +	struct ocxlpmem *ocxlpmem;
> +	int minor = MINOR(devno);
> +	/*
> +	 * We don't declare an RCU critical section here, as our AFU
> +	 * is protected by a re0ference counter on the device. By the time the
> +	 * minor number of a device is removed from the idr, the ref count of
> +	 * the device is already at 0, so no user API will access that AFU and
> +	 * this function can't return it.
> +	 */
> +	ocxlpmem = idr_find(&minors_idr, minor);
> +	if (ocxlpmem)
> +		ocxlpmem_get(ocxlpmem);
> +	return ocxlpmem;
> +}
> +
> +static int file_open(struct inode *inode, struct file *file)
> +{
> +	struct ocxlpmem *ocxlpmem;
> +
> +	ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
> +	if (!ocxlpmem)
> +		return -ENODEV;
> +
> +	file->private_data = ocxlpmem;
> +	return 0;
> +}
> +
> +static int file_release(struct inode *inode, struct file *file)
> +{
> +	struct ocxlpmem *ocxlpmem = file->private_data;
> +
> +	ocxlpmem_put(ocxlpmem);
> +	return 0;
> +}
> +
> +static const struct file_operations fops = {
> +	.owner		= THIS_MODULE,
> +	.open		= file_open,
> +	.release	= file_release,
> +};
> +
> +/**
> + * create_cdev() - Create the chardev in /dev for the device
> + * @ocxlpmem: the SCM metadata
> + * Return: 0 on success, negative on failure
> + */
> +static int create_cdev(struct ocxlpmem *ocxlpmem)
> +{
> +	cdev_init(&ocxlpmem->cdev, &fops);
> +	return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
> +}
> +
>   /**
>    * ocxlpmem_remove() - Free an OpenCAPI persistent memory device
>    * @pdev: the PCI device information struct
> @@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   		goto err;
>   	}
>   
> +	if (create_cdev(ocxlpmem)) {
> +		dev_err(&pdev->dev, "Could not create character device\n");
> +		goto err;
> +	}
> +
>   	elapsed = 0;
>   	timeout = ocxlpmem->readiness_timeout + ocxlpmem->memory_available_timeout;
>   	while (!is_usable(ocxlpmem, false)) {
> @@ -613,20 +686,59 @@ static struct pci_driver pci_driver = {
>   	.shutdown = ocxlpmem_remove,
>   };
>   
> +static int file_init(void)
> +{
> +	int rc;
> +
> +	mutex_init(&minors_idr_lock);
> +	idr_init(&minors_idr);
> +
> +	rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-pmem");

If the driver is going to be called "ocxlpmem" can we standardise on 
that without the extra hyphen?

> +	if (rc) {
> +		idr_destroy(&minors_idr);
> +		pr_err("Unable to allocate OpenCAPI persistent memory major number: %d\n", rc);
> +		return rc;
> +	}
> +
> +	ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
> +	if (IS_ERR(ocxlpmem_class)) {
> +		idr_destroy(&minors_idr);
> +		pr_err("Unable to create ocxl-pmem class\n");
> +		unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> +		return PTR_ERR(ocxlpmem_class);
> +	}
> +
> +	return 0;
> +}
> +
> +static void file_exit(void)
> +{
> +	class_destroy(ocxlpmem_class);
> +	unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> +	idr_destroy(&minors_idr);
> +}
> +
>   static int __init ocxlpmem_init(void)
>   {
> -	int rc = 0;
> +	int rc;
>   
> -	rc = pci_register_driver(&pci_driver);
> +	rc = file_init();
>   	if (rc)
>   		return rc;
>   
> +	rc = pci_register_driver(&pci_driver);
> +	if (rc) {
> +		file_exit();
> +		return rc;
> +	}
> +
>   	return 0;
>   }
>   
>   static void ocxlpmem_exit(void)
>   {
>   	pci_unregister_driver(&pci_driver);
> +	file_exit();
>   }
>   
>   module_init(ocxlpmem_init);
Alastair D'Silva March 2, 2020, 6:05 a.m. UTC | #2
On Mon, 2020-03-02 at 16:34 +1100, Andrew Donnellan wrote:
> On 21/2/20 2:27 pm, Alastair D'Silva wrote:
> > From: Alastair D'Silva <alastair@d-silva.org>
> > 
> > This patch introduces a character device (/dev/ocxl-scmX) which
> > further
> > patches will use to interact with userspace.
> 
> As with the comments on other patches in this series, this commit 
> message is lacking in explanation. What's the purpose of this device?
> 

I'll reword this for v4.

> > Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> > ---
> >   arch/powerpc/platforms/powernv/pmem/ocxl.c    | 116
> > +++++++++++++++++-
> >   .../platforms/powernv/pmem/ocxl_internal.h    |   2 +
> >   2 files changed, 116 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index b8bd7e703b19..63109a870d2c 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -10,6 +10,7 @@
> >   #include <misc/ocxl.h>
> >   #include <linux/delay.h>
> >   #include <linux/ndctl.h>
> > +#include <linux/fs.h>
> >   #include <linux/mm_types.h>
> >   #include <linux/memory_hotplug.h>
> >   #include "ocxl_internal.h"
> > @@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem
> > *ocxlpmem)
> >   
> >   	free_minor(ocxlpmem);
> >   
> > +	if (ocxlpmem->cdev.owner)
> > +		cdev_del(&ocxlpmem->cdev);
> > +
> >   	if (ocxlpmem->metadata_addr)
> >   		devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);
> >   
> > @@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem
> > *ocxlpmem)
> >   	return device_register(&ocxlpmem->dev);
> >   }
> >   
> > +static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
> > +{
> > +	put_device(&ocxlpmem->dev);
> > +}
> > +
> > +static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
> > +{
> > +	return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
> > +}
> > +
> > +static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
> > +{
> > +	struct ocxlpmem *ocxlpmem;
> > +	int minor = MINOR(devno);
> > +	/*
> > +	 * We don't declare an RCU critical section here, as our AFU
> > +	 * is protected by a re0ference counter on the device. By the
> > time the
> > +	 * minor number of a device is removed from the idr, the ref
> > count of
> > +	 * the device is already at 0, so no user API will access that
> > AFU and
> > +	 * this function can't return it.
> > +	 */
> > +	ocxlpmem = idr_find(&minors_idr, minor);
> > +	if (ocxlpmem)
> > +		ocxlpmem_get(ocxlpmem);
> > +	return ocxlpmem;
> > +}
> > +
> > +static int file_open(struct inode *inode, struct file *file)
> > +{
> > +	struct ocxlpmem *ocxlpmem;
> > +
> > +	ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
> > +	if (!ocxlpmem)
> > +		return -ENODEV;
> > +
> > +	file->private_data = ocxlpmem;
> > +	return 0;
> > +}
> > +
> > +static int file_release(struct inode *inode, struct file *file)
> > +{
> > +	struct ocxlpmem *ocxlpmem = file->private_data;
> > +
> > +	ocxlpmem_put(ocxlpmem);
> > +	return 0;
> > +}
> > +
> > +static const struct file_operations fops = {
> > +	.owner		= THIS_MODULE,
> > +	.open		= file_open,
> > +	.release	= file_release,
> > +};
> > +
> > +/**
> > + * create_cdev() - Create the chardev in /dev for the device
> > + * @ocxlpmem: the SCM metadata
> > + * Return: 0 on success, negative on failure
> > + */
> > +static int create_cdev(struct ocxlpmem *ocxlpmem)
> > +{
> > +	cdev_init(&ocxlpmem->cdev, &fops);
> > +	return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
> > +}
> > +
> >   /**
> >    * ocxlpmem_remove() - Free an OpenCAPI persistent memory device
> >    * @pdev: the PCI device information struct
> > @@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const
> > struct pci_device_id *ent)
> >   		goto err;
> >   	}
> >   
> > +	if (create_cdev(ocxlpmem)) {
> > +		dev_err(&pdev->dev, "Could not create character
> > device\n");
> > +		goto err;
> > +	}
> > +
> >   	elapsed = 0;
> >   	timeout = ocxlpmem->readiness_timeout + ocxlpmem-
> > >memory_available_timeout;
> >   	while (!is_usable(ocxlpmem, false)) {
> > @@ -613,20 +686,59 @@ static struct pci_driver pci_driver = {
> >   	.shutdown = ocxlpmem_remove,
> >   };
> >   
> > +static int file_init(void)
> > +{
> > +	int rc;
> > +
> > +	mutex_init(&minors_idr_lock);
> > +	idr_init(&minors_idr);
> > +
> > +	rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-
> > pmem");
> 
> If the driver is going to be called "ocxlpmem" can we standardise on 
> that without the extra hyphen?

Ok

> > +	if (rc) {
> > +		idr_destroy(&minors_idr);
> > +		pr_err("Unable to allocate OpenCAPI persistent memory
> > major number: %d\n", rc);
> > +		return rc;
> > +	}
> > +
> > +	ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
> > +	if (IS_ERR(ocxlpmem_class)) {
> > +		idr_destroy(&minors_idr);
> > +		pr_err("Unable to create ocxl-pmem class\n");
> > +		unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> > +		return PTR_ERR(ocxlpmem_class);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void file_exit(void)
> > +{
> > +	class_destroy(ocxlpmem_class);
> > +	unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> > +	idr_destroy(&minors_idr);
> > +}
> > +
> >   static int __init ocxlpmem_init(void)
> >   {
> > -	int rc = 0;
> > +	int rc;
> >   
> > -	rc = pci_register_driver(&pci_driver);
> > +	rc = file_init();
> >   	if (rc)
> >   		return rc;
> >   
> > +	rc = pci_register_driver(&pci_driver);
> > +	if (rc) {
> > +		file_exit();
> > +		return rc;
> > +	}
> > +
> >   	return 0;
> >   }
> >   
> >   static void ocxlpmem_exit(void)
> >   {
> >   	pci_unregister_driver(&pci_driver);
> > +	file_exit();
> >   }
> >   
> >   module_init(ocxlpmem_init);
Frederic Barrat March 3, 2020, 9:28 a.m. UTC | #3
Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> From: Alastair D'Silva <alastair@d-silva.org>
> 
> This patch introduces a character device (/dev/ocxl-scmX) which further
> patches will use to interact with userspace.
> 
> Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> ---
>   arch/powerpc/platforms/powernv/pmem/ocxl.c    | 116 +++++++++++++++++-
>   .../platforms/powernv/pmem/ocxl_internal.h    |   2 +
>   2 files changed, 116 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index b8bd7e703b19..63109a870d2c 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -10,6 +10,7 @@
>   #include <misc/ocxl.h>
>   #include <linux/delay.h>
>   #include <linux/ndctl.h>
> +#include <linux/fs.h>
>   #include <linux/mm_types.h>
>   #include <linux/memory_hotplug.h>
>   #include "ocxl_internal.h"
> @@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem *ocxlpmem)
>   
>   	free_minor(ocxlpmem);
>   
> +	if (ocxlpmem->cdev.owner)
> +		cdev_del(&ocxlpmem->cdev);
> +
>   	if (ocxlpmem->metadata_addr)
>   		devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);
>   
> @@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem *ocxlpmem)
>   	return device_register(&ocxlpmem->dev);
>   }
>   
> +static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
> +{
> +	put_device(&ocxlpmem->dev);
> +}
> +
> +static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
> +{
> +	return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
> +}
> +
> +static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
> +{
> +	struct ocxlpmem *ocxlpmem;
> +	int minor = MINOR(devno);
> +	/*
> +	 * We don't declare an RCU critical section here, as our AFU
> +	 * is protected by a reference counter on the device. By the time the
> +	 * minor number of a device is removed from the idr, the ref count of
> +	 * the device is already at 0, so no user API will access that AFU and
> +	 * this function can't return it.
> +	 */


I fixed something related in the ocxl driver (which had enough changes 
with the introduction of the "info" device to make a similar comment 
become wrong). See commit a58d37bce0d21. The issue is handling a 
simultaneous open() and removal of the device through /sysfs as best we can.

We are on a file open path and it's not like we're going to have a 
thousand clients, so performance is not that critical. We can take the 
mutex before searching in the IDR and release it after we increment the 
reference count on the device.
But that's not enough: we could still find the device in the IDR while 
it is being removed in free_ocxlpmem(). I believe the only safe way to 
address it is by removing the user-facing APIs (the char device) before 
calling device_unregister(). So that it's not possible to find the 
device in file_open() if it's in the middle of being removed.

   Fred


> +	ocxlpmem = idr_find(&minors_idr, minor);
> +	if (ocxlpmem)
> +		ocxlpmem_get(ocxlpmem);
> +	return ocxlpmem;
> +}
> +
> +static int file_open(struct inode *inode, struct file *file)
> +{
> +	struct ocxlpmem *ocxlpmem;
> +
> +	ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
> +	if (!ocxlpmem)
> +		return -ENODEV;
> +
> +	file->private_data = ocxlpmem;
> +	return 0;
> +}
> +
> +static int file_release(struct inode *inode, struct file *file)
> +{
> +	struct ocxlpmem *ocxlpmem = file->private_data;
> +
> +	ocxlpmem_put(ocxlpmem);
> +	return 0;
> +}
> +
> +static const struct file_operations fops = {
> +	.owner		= THIS_MODULE,
> +	.open		= file_open,
> +	.release	= file_release,
> +};
> +
> +/**
> + * create_cdev() - Create the chardev in /dev for the device
> + * @ocxlpmem: the SCM metadata
> + * Return: 0 on success, negative on failure
> + */
> +static int create_cdev(struct ocxlpmem *ocxlpmem)
> +{
> +	cdev_init(&ocxlpmem->cdev, &fops);
> +	return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
> +}
> +
>   /**
>    * ocxlpmem_remove() - Free an OpenCAPI persistent memory device
>    * @pdev: the PCI device information struct
> @@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   		goto err;
>   	}
>   
> +	if (create_cdev(ocxlpmem)) {
> +		dev_err(&pdev->dev, "Could not create character device\n");
> +		goto err;
> +	}


As already mentioned in a previous patch, we branch to the err label so 
rc needs to be set to a valid error.



> +
>   	elapsed = 0;
>   	timeout = ocxlpmem->readiness_timeout + ocxlpmem->memory_available_timeout;
>   	while (!is_usable(ocxlpmem, false)) {
> @@ -613,20 +686,59 @@ static struct pci_driver pci_driver = {
>   	.shutdown = ocxlpmem_remove,
>   };
>   
> +static int file_init(void)
> +{
> +	int rc;
> +
> +	mutex_init(&minors_idr_lock);
> +	idr_init(&minors_idr);
> +
> +	rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-pmem");
> +	if (rc) {
> +		idr_destroy(&minors_idr);
> +		pr_err("Unable to allocate OpenCAPI persistent memory major number: %d\n", rc);
> +		return rc;
> +	}
> +
> +	ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
> +	if (IS_ERR(ocxlpmem_class)) {
> +		idr_destroy(&minors_idr);
> +		pr_err("Unable to create ocxl-pmem class\n");
> +		unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> +		return PTR_ERR(ocxlpmem_class);
> +	}
> +
> +	return 0;
> +}
> +
> +static void file_exit(void)
> +{
> +	class_destroy(ocxlpmem_class);
> +	unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> +	idr_destroy(&minors_idr);
> +}
> +
>   static int __init ocxlpmem_init(void)
>   {
> -	int rc = 0;
> +	int rc;
>   
> -	rc = pci_register_driver(&pci_driver);
> +	rc = file_init();
>   	if (rc)
>   		return rc;
>   
> +	rc = pci_register_driver(&pci_driver);
> +	if (rc) {
> +		file_exit();
> +		return rc;
> +	}
> +
>   	return 0;
>   }
>   
>   static void ocxlpmem_exit(void)
>   {
>   	pci_unregister_driver(&pci_driver);
> +	file_exit();
>   }
>   
>   module_init(ocxlpmem_init);
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> index 28e2020f6355..d2d81fec7bb1 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> @@ -2,6 +2,7 @@
>   // Copyright 2019 IBM Corp.
>   
>   #include <linux/pci.h>
> +#include <linux/cdev.h>
>   #include <misc/ocxl.h>
>   #include <linux/libnvdimm.h>
>   #include <linux/mm.h>
> @@ -99,6 +100,7 @@ struct ocxlpmem_function0 {
>   struct ocxlpmem {
>   	struct device dev;
>   	struct pci_dev *pdev;
> +	struct cdev cdev;
>   	struct ocxl_fn *ocxl_fn;
>   	struct nd_interleave_set nd_set;
>   	struct nvdimm_bus_descriptor bus_desc;
>
Alastair D'Silva March 5, 2020, 3:38 a.m. UTC | #4
On Tue, 2020-03-03 at 10:28 +0100, Frederic Barrat wrote:
> 
> Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <alastair@d-silva.org>
> > 
> > This patch introduces a character device (/dev/ocxl-scmX) which
> > further
> > patches will use to interact with userspace.
> > 
> > Signed-off-by: Alastair D'Silva <alastair@d-silva.org>
> > ---
> >   arch/powerpc/platforms/powernv/pmem/ocxl.c    | 116
> > +++++++++++++++++-
> >   .../platforms/powernv/pmem/ocxl_internal.h    |   2 +
> >   2 files changed, 116 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index b8bd7e703b19..63109a870d2c 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -10,6 +10,7 @@
> >   #include <misc/ocxl.h>
> >   #include <linux/delay.h>
> >   #include <linux/ndctl.h>
> > +#include <linux/fs.h>
> >   #include <linux/mm_types.h>
> >   #include <linux/memory_hotplug.h>
> >   #include "ocxl_internal.h"
> > @@ -339,6 +340,9 @@ static void free_ocxlpmem(struct ocxlpmem
> > *ocxlpmem)
> >   
> >   	free_minor(ocxlpmem);
> >   
> > +	if (ocxlpmem->cdev.owner)
> > +		cdev_del(&ocxlpmem->cdev);
> > +
> >   	if (ocxlpmem->metadata_addr)
> >   		devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);
> >   
> > @@ -396,6 +400,70 @@ static int ocxlpmem_register(struct ocxlpmem
> > *ocxlpmem)
> >   	return device_register(&ocxlpmem->dev);
> >   }
> >   
> > +static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
> > +{
> > +	put_device(&ocxlpmem->dev);
> > +}
> > +
> > +static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
> > +{
> > +	return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
> > +}
> > +
> > +static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
> > +{
> > +	struct ocxlpmem *ocxlpmem;
> > +	int minor = MINOR(devno);
> > +	/*
> > +	 * We don't declare an RCU critical section here, as our AFU
> > +	 * is protected by a reference counter on the device. By the
> > time the
> > +	 * minor number of a device is removed from the idr, the ref
> > count of
> > +	 * the device is already at 0, so no user API will access that
> > AFU and
> > +	 * this function can't return it.
> > +	 */
> 
> I fixed something related in the ocxl driver (which had enough
> changes 
> with the introduction of the "info" device to make a similar comment 
> become wrong). See commit a58d37bce0d21. The issue is handling a 
> simultaneous open() and removal of the device through /sysfs as best
> we can.
> 
> We are on a file open path and it's not like we're going to have a 
> thousand clients, so performance is not that critical. We can take
> the 
> mutex before searching in the IDR and release it after we increment
> the 
> reference count on the device.
> But that's not enough: we could still find the device in the IDR
> while 
> it is being removed in free_ocxlpmem(). I believe the only safe way
> to 
> address it is by removing the user-facing APIs (the char device)
> before 
> calling device_unregister(). So that it's not possible to find the 
> device in file_open() if it's in the middle of being removed.
> 
>    Fred
> 
> 

Ok, I'll replicate that patch & follow your advice.

> > +	ocxlpmem = idr_find(&minors_idr, minor);
> > +	if (ocxlpmem)
> > +		ocxlpmem_get(ocxlpmem);
> > +	return ocxlpmem;
> > +}
> > +
> > +static int file_open(struct inode *inode, struct file *file)
> > +{
> > +	struct ocxlpmem *ocxlpmem;
> > +
> > +	ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
> > +	if (!ocxlpmem)
> > +		return -ENODEV;
> > +
> > +	file->private_data = ocxlpmem;
> > +	return 0;
> > +}
> > +
> > +static int file_release(struct inode *inode, struct file *file)
> > +{
> > +	struct ocxlpmem *ocxlpmem = file->private_data;
> > +
> > +	ocxlpmem_put(ocxlpmem);
> > +	return 0;
> > +}
> > +
> > +static const struct file_operations fops = {
> > +	.owner		= THIS_MODULE,
> > +	.open		= file_open,
> > +	.release	= file_release,
> > +};
> > +
> > +/**
> > + * create_cdev() - Create the chardev in /dev for the device
> > + * @ocxlpmem: the SCM metadata
> > + * Return: 0 on success, negative on failure
> > + */
> > +static int create_cdev(struct ocxlpmem *ocxlpmem)
> > +{
> > +	cdev_init(&ocxlpmem->cdev, &fops);
> > +	return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
> > +}
> > +
> >   /**
> >    * ocxlpmem_remove() - Free an OpenCAPI persistent memory device
> >    * @pdev: the PCI device information struct
> > @@ -572,6 +640,11 @@ static int probe(struct pci_dev *pdev, const
> > struct pci_device_id *ent)
> >   		goto err;
> >   	}
> >   
> > +	if (create_cdev(ocxlpmem)) {
> > +		dev_err(&pdev->dev, "Could not create character
> > device\n");
> > +		goto err;
> > +	}
> 
> As already mentioned in a previous patch, we branch to the err label
> so 
> rc needs to be set to a valid error.
> 

Ok

> 
> 
> > +
> >   	elapsed = 0;
> >   	timeout = ocxlpmem->readiness_timeout + ocxlpmem-
> > >memory_available_timeout;
> >   	while (!is_usable(ocxlpmem, false)) {
> > @@ -613,20 +686,59 @@ static struct pci_driver pci_driver = {
> >   	.shutdown = ocxlpmem_remove,
> >   };
> >   
> > +static int file_init(void)
> > +{
> > +	int rc;
> > +
> > +	mutex_init(&minors_idr_lock);
> > +	idr_init(&minors_idr);
> > +
> > +	rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-
> > pmem");
> > +	if (rc) {
> > +		idr_destroy(&minors_idr);
> > +		pr_err("Unable to allocate OpenCAPI persistent memory
> > major number: %d\n", rc);
> > +		return rc;
> > +	}
> > +
> > +	ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
> > +	if (IS_ERR(ocxlpmem_class)) {
> > +		idr_destroy(&minors_idr);
> > +		pr_err("Unable to create ocxl-pmem class\n");
> > +		unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> > +		return PTR_ERR(ocxlpmem_class);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void file_exit(void)
> > +{
> > +	class_destroy(ocxlpmem_class);
> > +	unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
> > +	idr_destroy(&minors_idr);
> > +}
> > +
> >   static int __init ocxlpmem_init(void)
> >   {
> > -	int rc = 0;
> > +	int rc;
> >   
> > -	rc = pci_register_driver(&pci_driver);
> > +	rc = file_init();
> >   	if (rc)
> >   		return rc;
> >   
> > +	rc = pci_register_driver(&pci_driver);
> > +	if (rc) {
> > +		file_exit();
> > +		return rc;
> > +	}
> > +
> >   	return 0;
> >   }
> >   
> >   static void ocxlpmem_exit(void)
> >   {
> >   	pci_unregister_driver(&pci_driver);
> > +	file_exit();
> >   }
> >   
> >   module_init(ocxlpmem_init);
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > index 28e2020f6355..d2d81fec7bb1 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
> > @@ -2,6 +2,7 @@
> >   // Copyright 2019 IBM Corp.
> >   
> >   #include <linux/pci.h>
> > +#include <linux/cdev.h>
> >   #include <misc/ocxl.h>
> >   #include <linux/libnvdimm.h>
> >   #include <linux/mm.h>
> > @@ -99,6 +100,7 @@ struct ocxlpmem_function0 {
> >   struct ocxlpmem {
> >   	struct device dev;
> >   	struct pci_dev *pdev;
> > +	struct cdev cdev;
> >   	struct ocxl_fn *ocxl_fn;
> >   	struct nd_interleave_set nd_set;
> >   	struct nvdimm_bus_descriptor bus_desc;
> >
diff mbox series

Patch

diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
index b8bd7e703b19..63109a870d2c 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
@@ -10,6 +10,7 @@ 
 #include <misc/ocxl.h>
 #include <linux/delay.h>
 #include <linux/ndctl.h>
+#include <linux/fs.h>
 #include <linux/mm_types.h>
 #include <linux/memory_hotplug.h>
 #include "ocxl_internal.h"
@@ -339,6 +340,9 @@  static void free_ocxlpmem(struct ocxlpmem *ocxlpmem)
 
 	free_minor(ocxlpmem);
 
+	if (ocxlpmem->cdev.owner)
+		cdev_del(&ocxlpmem->cdev);
+
 	if (ocxlpmem->metadata_addr)
 		devm_memunmap(&ocxlpmem->dev, ocxlpmem->metadata_addr);
 
@@ -396,6 +400,70 @@  static int ocxlpmem_register(struct ocxlpmem *ocxlpmem)
 	return device_register(&ocxlpmem->dev);
 }
 
+static void ocxlpmem_put(struct ocxlpmem *ocxlpmem)
+{
+	put_device(&ocxlpmem->dev);
+}
+
+static struct ocxlpmem *ocxlpmem_get(struct ocxlpmem *ocxlpmem)
+{
+	return (get_device(&ocxlpmem->dev) == NULL) ? NULL : ocxlpmem;
+}
+
+static struct ocxlpmem *find_and_get_ocxlpmem(dev_t devno)
+{
+	struct ocxlpmem *ocxlpmem;
+	int minor = MINOR(devno);
+	/*
+	 * We don't declare an RCU critical section here, as our AFU
+	 * is protected by a reference counter on the device. By the time the
+	 * minor number of a device is removed from the idr, the ref count of
+	 * the device is already at 0, so no user API will access that AFU and
+	 * this function can't return it.
+	 */
+	ocxlpmem = idr_find(&minors_idr, minor);
+	if (ocxlpmem)
+		ocxlpmem_get(ocxlpmem);
+	return ocxlpmem;
+}
+
+static int file_open(struct inode *inode, struct file *file)
+{
+	struct ocxlpmem *ocxlpmem;
+
+	ocxlpmem = find_and_get_ocxlpmem(inode->i_rdev);
+	if (!ocxlpmem)
+		return -ENODEV;
+
+	file->private_data = ocxlpmem;
+	return 0;
+}
+
+static int file_release(struct inode *inode, struct file *file)
+{
+	struct ocxlpmem *ocxlpmem = file->private_data;
+
+	ocxlpmem_put(ocxlpmem);
+	return 0;
+}
+
+static const struct file_operations fops = {
+	.owner		= THIS_MODULE,
+	.open		= file_open,
+	.release	= file_release,
+};
+
+/**
+ * create_cdev() - Create the chardev in /dev for the device
+ * @ocxlpmem: the SCM metadata
+ * Return: 0 on success, negative on failure
+ */
+static int create_cdev(struct ocxlpmem *ocxlpmem)
+{
+	cdev_init(&ocxlpmem->cdev, &fops);
+	return cdev_add(&ocxlpmem->cdev, ocxlpmem->dev.devt, 1);
+}
+
 /**
  * ocxlpmem_remove() - Free an OpenCAPI persistent memory device
  * @pdev: the PCI device information struct
@@ -572,6 +640,11 @@  static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err;
 	}
 
+	if (create_cdev(ocxlpmem)) {
+		dev_err(&pdev->dev, "Could not create character device\n");
+		goto err;
+	}
+
 	elapsed = 0;
 	timeout = ocxlpmem->readiness_timeout + ocxlpmem->memory_available_timeout;
 	while (!is_usable(ocxlpmem, false)) {
@@ -613,20 +686,59 @@  static struct pci_driver pci_driver = {
 	.shutdown = ocxlpmem_remove,
 };
 
+static int file_init(void)
+{
+	int rc;
+
+	mutex_init(&minors_idr_lock);
+	idr_init(&minors_idr);
+
+	rc = alloc_chrdev_region(&ocxlpmem_dev, 0, NUM_MINORS, "ocxl-pmem");
+	if (rc) {
+		idr_destroy(&minors_idr);
+		pr_err("Unable to allocate OpenCAPI persistent memory major number: %d\n", rc);
+		return rc;
+	}
+
+	ocxlpmem_class = class_create(THIS_MODULE, "ocxl-pmem");
+	if (IS_ERR(ocxlpmem_class)) {
+		idr_destroy(&minors_idr);
+		pr_err("Unable to create ocxl-pmem class\n");
+		unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
+		return PTR_ERR(ocxlpmem_class);
+	}
+
+	return 0;
+}
+
+static void file_exit(void)
+{
+	class_destroy(ocxlpmem_class);
+	unregister_chrdev_region(ocxlpmem_dev, NUM_MINORS);
+	idr_destroy(&minors_idr);
+}
+
 static int __init ocxlpmem_init(void)
 {
-	int rc = 0;
+	int rc;
 
-	rc = pci_register_driver(&pci_driver);
+	rc = file_init();
 	if (rc)
 		return rc;
 
+	rc = pci_register_driver(&pci_driver);
+	if (rc) {
+		file_exit();
+		return rc;
+	}
+
 	return 0;
 }
 
 static void ocxlpmem_exit(void)
 {
 	pci_unregister_driver(&pci_driver);
+	file_exit();
 }
 
 module_init(ocxlpmem_init);
diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
index 28e2020f6355..d2d81fec7bb1 100644
--- a/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
+++ b/arch/powerpc/platforms/powernv/pmem/ocxl_internal.h
@@ -2,6 +2,7 @@ 
 // Copyright 2019 IBM Corp.
 
 #include <linux/pci.h>
+#include <linux/cdev.h>
 #include <misc/ocxl.h>
 #include <linux/libnvdimm.h>
 #include <linux/mm.h>
@@ -99,6 +100,7 @@  struct ocxlpmem_function0 {
 struct ocxlpmem {
 	struct device dev;
 	struct pci_dev *pdev;
+	struct cdev cdev;
 	struct ocxl_fn *ocxl_fn;
 	struct nd_interleave_set nd_set;
 	struct nvdimm_bus_descriptor bus_desc;