diff mbox series

[v3,20/28] cxl/mbox: Add exclusive kernel command support

Message ID 162982123298.1124374.22718002900700392.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive)
State Superseded
Headers show
Series cxl_test: Enable CXL Topology and UAPI regression tests | expand

Commit Message

Dan Williams Aug. 24, 2021, 4:07 p.m. UTC
The CXL_PMEM driver expects exclusive control of the label storage area
space. Similar to the LIBNVDIMM expectation that the label storage area
is only writable from userspace when the corresponding memory device is
not active in any region, the expectation is the native CXL_PCI UAPI
path is disabled while the cxl_nvdimm for a given cxl_memdev device is
active in LIBNVDIMM.

Add the ability to toggle the availability of a given command for the
UAPI path. Use that new capability to shutdown changes to partitions and
the label storage area while the cxl_nvdimm device is actively proxying
commands for LIBNVDIMM.

Acked-by: Ben Widawsky <ben.widawsky@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/mbox.c   |    5 +++++
 drivers/cxl/core/memdev.c |   31 +++++++++++++++++++++++++++++++
 drivers/cxl/cxlmem.h      |    4 ++++
 drivers/cxl/pmem.c        |   35 ++++++++++++++++++++++++++++-------
 4 files changed, 68 insertions(+), 7 deletions(-)

Comments

Jonathan Cameron Sept. 2, 2021, 6:09 p.m. UTC | #1
On Tue, 24 Aug 2021 09:07:13 -0700
Dan Williams <dan.j.williams@intel.com> wrote:

> The CXL_PMEM driver expects exclusive control of the label storage area
> space. Similar to the LIBNVDIMM expectation that the label storage area
> is only writable from userspace when the corresponding memory device is
> not active in any region, the expectation is the native CXL_PCI UAPI
> path is disabled while the cxl_nvdimm for a given cxl_memdev device is
> active in LIBNVDIMM.
> 
> Add the ability to toggle the availability of a given command for the
> UAPI path. Use that new capability to shutdown changes to partitions and
> the label storage area while the cxl_nvdimm device is actively proxying
> commands for LIBNVDIMM.
> 
> Acked-by: Ben Widawsky <ben.widawsky@intel.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/core/mbox.c   |    5 +++++
>  drivers/cxl/core/memdev.c |   31 +++++++++++++++++++++++++++++++
>  drivers/cxl/cxlmem.h      |    4 ++++
>  drivers/cxl/pmem.c        |   35 ++++++++++++++++++++++++++++-------
>  4 files changed, 68 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 73107b302224..6a5c4f3679ba 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -230,6 +230,7 @@ static bool cxl_mem_raw_command_allowed(u16 opcode)
>   *  * %-EINVAL	- Reserved fields or invalid values were used.
>   *  * %-ENOMEM	- Input or output buffer wasn't sized properly.
>   *  * %-EPERM	- Attempted to use a protected command.
> + *  * %-EBUSY	- Kernel has claimed exclusive access to this opcode
>   *
>   * The result of this command is a fully validated command in @out_cmd that is
>   * safe to send to the hardware.
> @@ -305,6 +306,10 @@ static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
>  	if (!test_bit(info->id, cxlm->enabled_cmds))
>  		return -ENOTTY;
>  
> +	/* Check that the command is not claimed for exclusive kernel use */
> +	if (test_bit(info->id, cxlm->exclusive_cmds))
> +		return -EBUSY;
> +
>  	/* Check the input buffer is the expected size */
>  	if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
>  		return -ENOMEM;


>  #endif /* __CXL_MEM_H__ */
> diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
> index 9652c3ee41e7..469b984176a2 100644
> --- a/drivers/cxl/pmem.c
> +++ b/drivers/cxl/pmem.c
> @@ -16,9 +16,21 @@
>   */
>  static struct workqueue_struct *cxl_pmem_wq;
>  
> -static void unregister_nvdimm(void *nvdimm)
> +static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
> +
> +static void unregister_nvdimm(void *_cxl_nvd)
>  {
> -	nvdimm_delete(nvdimm);
> +	struct cxl_nvdimm *cxl_nvd = _cxl_nvd;
> +	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
> +	struct cxl_mem *cxlm = cxlmd->cxlm;
> +	struct device *dev = &cxl_nvd->dev;
> +	struct nvdimm *nvdimm;
> +
> +	nvdimm = dev_get_drvdata(dev);
> +	if (nvdimm)
> +		nvdimm_delete(nvdimm);
> +
> +	clear_exclusive_cxl_commands(cxlm, exclusive_cmds);
>  }
>  
>  static int match_nvdimm_bridge(struct device *dev, const void *data)
> @@ -39,9 +51,11 @@ static struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(void)
>  static int cxl_nvdimm_probe(struct device *dev)
>  {
>  	struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
> +	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
> +	struct cxl_mem *cxlm = cxlmd->cxlm;
>  	struct cxl_nvdimm_bridge *cxl_nvb;
> +	struct nvdimm *nvdimm = NULL;
>  	unsigned long flags = 0;
> -	struct nvdimm *nvdimm;
>  	int rc = -ENXIO;
>  
>  	cxl_nvb = cxl_find_nvdimm_bridge();
> @@ -52,17 +66,20 @@ static int cxl_nvdimm_probe(struct device *dev)
>  	if (!cxl_nvb->nvdimm_bus)
>  		goto out;
>  
> +	set_exclusive_cxl_commands(cxlm, exclusive_cmds);
> +
>  	set_bit(NDD_LABELING, &flags);
>  	nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags, 0, 0,
>  			       NULL);
> -	if (!nvdimm)
> -		goto out;
> -
> -	rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
> +	dev_set_drvdata(dev, nvdimm);
> +	rc = devm_add_action_or_reset(dev, unregister_nvdimm, cxl_nvd);

I think this ends up less readable than explicit devm handling of each part
rather than combining them.


	set_exclusive...()
	rc = devm_add_action_or_rset(dev, unset_exclusive, cxlm);
	if (rc)
		goto out;

	nvidimm = nvdim_create()
	if (!nvdimm) //return value looks dubious in old code but I've not checked it properly.
		goto out;

	rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
	if (rc)
		goto out;
	dev_set_drvdata(dev, nvdimm);

and two simpler unwinding functions doing just one thing each.

>  out:
>  	device_unlock(&cxl_nvb->dev);
>  	put_device(&cxl_nvb->dev);
>  
> +	if (!nvdimm && rc == 0)
> +		rc = -ENOMEM;
> +
>  	return rc;
>  }
>  
> @@ -194,6 +211,10 @@ static __init int cxl_pmem_init(void)
>  {
>  	int rc;
>  
> +	set_bit(CXL_MEM_COMMAND_ID_SET_PARTITION_INFO, exclusive_cmds);
> +	set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds);
> +	set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds);
> +
>  	cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0);
>  	if (!cxl_pmem_wq)
>  		return -ENXIO;
>
Dan Williams Sept. 3, 2021, 8:47 p.m. UTC | #2
On Thu, Sep 2, 2021 at 11:09 AM Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> On Tue, 24 Aug 2021 09:07:13 -0700
> Dan Williams <dan.j.williams@intel.com> wrote:
>
> > The CXL_PMEM driver expects exclusive control of the label storage area
> > space. Similar to the LIBNVDIMM expectation that the label storage area
> > is only writable from userspace when the corresponding memory device is
> > not active in any region, the expectation is the native CXL_PCI UAPI
> > path is disabled while the cxl_nvdimm for a given cxl_memdev device is
> > active in LIBNVDIMM.
> >
> > Add the ability to toggle the availability of a given command for the
> > UAPI path. Use that new capability to shutdown changes to partitions and
> > the label storage area while the cxl_nvdimm device is actively proxying
> > commands for LIBNVDIMM.
> >
> > Acked-by: Ben Widawsky <ben.widawsky@intel.com>
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > ---
> >  drivers/cxl/core/mbox.c   |    5 +++++
> >  drivers/cxl/core/memdev.c |   31 +++++++++++++++++++++++++++++++
> >  drivers/cxl/cxlmem.h      |    4 ++++
> >  drivers/cxl/pmem.c        |   35 ++++++++++++++++++++++++++++-------
> >  4 files changed, 68 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index 73107b302224..6a5c4f3679ba 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> > @@ -230,6 +230,7 @@ static bool cxl_mem_raw_command_allowed(u16 opcode)
> >   *  * %-EINVAL       - Reserved fields or invalid values were used.
> >   *  * %-ENOMEM       - Input or output buffer wasn't sized properly.
> >   *  * %-EPERM        - Attempted to use a protected command.
> > + *  * %-EBUSY        - Kernel has claimed exclusive access to this opcode
> >   *
> >   * The result of this command is a fully validated command in @out_cmd that is
> >   * safe to send to the hardware.
> > @@ -305,6 +306,10 @@ static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
> >       if (!test_bit(info->id, cxlm->enabled_cmds))
> >               return -ENOTTY;
> >
> > +     /* Check that the command is not claimed for exclusive kernel use */
> > +     if (test_bit(info->id, cxlm->exclusive_cmds))
> > +             return -EBUSY;
> > +
> >       /* Check the input buffer is the expected size */
> >       if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
> >               return -ENOMEM;
>
>
> >  #endif /* __CXL_MEM_H__ */
> > diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
> > index 9652c3ee41e7..469b984176a2 100644
> > --- a/drivers/cxl/pmem.c
> > +++ b/drivers/cxl/pmem.c
> > @@ -16,9 +16,21 @@
> >   */
> >  static struct workqueue_struct *cxl_pmem_wq;
> >
> > -static void unregister_nvdimm(void *nvdimm)
> > +static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
> > +
> > +static void unregister_nvdimm(void *_cxl_nvd)
> >  {
> > -     nvdimm_delete(nvdimm);
> > +     struct cxl_nvdimm *cxl_nvd = _cxl_nvd;
> > +     struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
> > +     struct cxl_mem *cxlm = cxlmd->cxlm;
> > +     struct device *dev = &cxl_nvd->dev;
> > +     struct nvdimm *nvdimm;
> > +
> > +     nvdimm = dev_get_drvdata(dev);
> > +     if (nvdimm)
> > +             nvdimm_delete(nvdimm);
> > +
> > +     clear_exclusive_cxl_commands(cxlm, exclusive_cmds);
> >  }
> >
> >  static int match_nvdimm_bridge(struct device *dev, const void *data)
> > @@ -39,9 +51,11 @@ static struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(void)
> >  static int cxl_nvdimm_probe(struct device *dev)
> >  {
> >       struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
> > +     struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
> > +     struct cxl_mem *cxlm = cxlmd->cxlm;
> >       struct cxl_nvdimm_bridge *cxl_nvb;
> > +     struct nvdimm *nvdimm = NULL;
> >       unsigned long flags = 0;
> > -     struct nvdimm *nvdimm;
> >       int rc = -ENXIO;
> >
> >       cxl_nvb = cxl_find_nvdimm_bridge();
> > @@ -52,17 +66,20 @@ static int cxl_nvdimm_probe(struct device *dev)
> >       if (!cxl_nvb->nvdimm_bus)
> >               goto out;
> >
> > +     set_exclusive_cxl_commands(cxlm, exclusive_cmds);
> > +
> >       set_bit(NDD_LABELING, &flags);
> >       nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags, 0, 0,
> >                              NULL);
> > -     if (!nvdimm)
> > -             goto out;
> > -
> > -     rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
> > +     dev_set_drvdata(dev, nvdimm);
> > +     rc = devm_add_action_or_reset(dev, unregister_nvdimm, cxl_nvd);
>
> I think this ends up less readable than explicit devm handling of each part
> rather than combining them.
>
>
>         set_exclusive...()
>         rc = devm_add_action_or_rset(dev, unset_exclusive, cxlm);
>         if (rc)
>                 goto out;
>
>         nvidimm = nvdim_create()
>         if (!nvdimm) //return value looks dubious in old code but I've not checked it properly.
>                 goto out;
>
>         rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
>         if (rc)
>                 goto out;
>         dev_set_drvdata(dev, nvdimm);
>
> and two simpler unwinding functions doing just one thing each.

Taking it a step further, the unwind is so simple here, might as well
just have a typical cxl_nvdimm_remove() and drop the devm.
diff mbox series

Patch

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 73107b302224..6a5c4f3679ba 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -230,6 +230,7 @@  static bool cxl_mem_raw_command_allowed(u16 opcode)
  *  * %-EINVAL	- Reserved fields or invalid values were used.
  *  * %-ENOMEM	- Input or output buffer wasn't sized properly.
  *  * %-EPERM	- Attempted to use a protected command.
+ *  * %-EBUSY	- Kernel has claimed exclusive access to this opcode
  *
  * The result of this command is a fully validated command in @out_cmd that is
  * safe to send to the hardware.
@@ -305,6 +306,10 @@  static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
 	if (!test_bit(info->id, cxlm->enabled_cmds))
 		return -ENOTTY;
 
+	/* Check that the command is not claimed for exclusive kernel use */
+	if (test_bit(info->id, cxlm->exclusive_cmds))
+		return -EBUSY;
+
 	/* Check the input buffer is the expected size */
 	if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
 		return -ENOMEM;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index a2a9691568af..ee61202c7aab 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -134,6 +134,37 @@  static const struct device_type cxl_memdev_type = {
 	.groups = cxl_memdev_attribute_groups,
 };
 
+/**
+ * set_exclusive_cxl_commands() - atomically disable user cxl commands
+ * @cxlm: cxl_mem instance to modify
+ * @cmds: bitmap of commands to mark exclusive
+ *
+ * Flush the ioctl path and disable future execution of commands with
+ * the command ids set in @cmds.
+ */
+void set_exclusive_cxl_commands(struct cxl_mem *cxlm, unsigned long *cmds)
+{
+	down_write(&cxl_memdev_rwsem);
+	bitmap_or(cxlm->exclusive_cmds, cxlm->exclusive_cmds, cmds,
+		  CXL_MEM_COMMAND_ID_MAX);
+	up_write(&cxl_memdev_rwsem);
+}
+EXPORT_SYMBOL_GPL(set_exclusive_cxl_commands);
+
+/**
+ * clear_exclusive_cxl_commands() - atomically enable user cxl commands
+ * @cxlm: cxl_mem instance to modify
+ * @cmds: bitmap of commands to mark available for userspace
+ */
+void clear_exclusive_cxl_commands(struct cxl_mem *cxlm, unsigned long *cmds)
+{
+	down_write(&cxl_memdev_rwsem);
+	bitmap_andnot(cxlm->exclusive_cmds, cxlm->exclusive_cmds, cmds,
+		      CXL_MEM_COMMAND_ID_MAX);
+	up_write(&cxl_memdev_rwsem);
+}
+EXPORT_SYMBOL_GPL(clear_exclusive_cxl_commands);
+
 static void cxl_memdev_shutdown(struct device *dev)
 {
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index df4f3636a999..45d5347b5d61 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -102,6 +102,7 @@  struct cxl_mbox_cmd {
  * @mbox_mutex: Mutex to synchronize mailbox access.
  * @firmware_version: Firmware version for the memory device.
  * @enabled_cmds: Hardware commands found enabled in CEL.
+ * @exclusive_cmds: Commands that are kernel-internal only
  * @pmem_range: Persistent memory capacity information.
  * @ram_range: Volatile memory capacity information.
  * @mbox_send: @dev specific transport for transmitting mailbox commands
@@ -117,6 +118,7 @@  struct cxl_mem {
 	struct mutex mbox_mutex; /* Protects device mailbox and firmware */
 	char firmware_version[0x10];
 	DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
+	DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
 
 	struct range pmem_range;
 	struct range ram_range;
@@ -190,4 +192,6 @@  int cxl_mem_identify(struct cxl_mem *cxlm);
 int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm);
 int cxl_mem_create_range_info(struct cxl_mem *cxlm);
 struct cxl_mem *cxl_mem_create(struct device *dev);
+void set_exclusive_cxl_commands(struct cxl_mem *cxlm, unsigned long *cmds);
+void clear_exclusive_cxl_commands(struct cxl_mem *cxlm, unsigned long *cmds);
 #endif /* __CXL_MEM_H__ */
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 9652c3ee41e7..469b984176a2 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -16,9 +16,21 @@ 
  */
 static struct workqueue_struct *cxl_pmem_wq;
 
-static void unregister_nvdimm(void *nvdimm)
+static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX);
+
+static void unregister_nvdimm(void *_cxl_nvd)
 {
-	nvdimm_delete(nvdimm);
+	struct cxl_nvdimm *cxl_nvd = _cxl_nvd;
+	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+	struct cxl_mem *cxlm = cxlmd->cxlm;
+	struct device *dev = &cxl_nvd->dev;
+	struct nvdimm *nvdimm;
+
+	nvdimm = dev_get_drvdata(dev);
+	if (nvdimm)
+		nvdimm_delete(nvdimm);
+
+	clear_exclusive_cxl_commands(cxlm, exclusive_cmds);
 }
 
 static int match_nvdimm_bridge(struct device *dev, const void *data)
@@ -39,9 +51,11 @@  static struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(void)
 static int cxl_nvdimm_probe(struct device *dev)
 {
 	struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
+	struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
+	struct cxl_mem *cxlm = cxlmd->cxlm;
 	struct cxl_nvdimm_bridge *cxl_nvb;
+	struct nvdimm *nvdimm = NULL;
 	unsigned long flags = 0;
-	struct nvdimm *nvdimm;
 	int rc = -ENXIO;
 
 	cxl_nvb = cxl_find_nvdimm_bridge();
@@ -52,17 +66,20 @@  static int cxl_nvdimm_probe(struct device *dev)
 	if (!cxl_nvb->nvdimm_bus)
 		goto out;
 
+	set_exclusive_cxl_commands(cxlm, exclusive_cmds);
+
 	set_bit(NDD_LABELING, &flags);
 	nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags, 0, 0,
 			       NULL);
-	if (!nvdimm)
-		goto out;
-
-	rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm);
+	dev_set_drvdata(dev, nvdimm);
+	rc = devm_add_action_or_reset(dev, unregister_nvdimm, cxl_nvd);
 out:
 	device_unlock(&cxl_nvb->dev);
 	put_device(&cxl_nvb->dev);
 
+	if (!nvdimm && rc == 0)
+		rc = -ENOMEM;
+
 	return rc;
 }
 
@@ -194,6 +211,10 @@  static __init int cxl_pmem_init(void)
 {
 	int rc;
 
+	set_bit(CXL_MEM_COMMAND_ID_SET_PARTITION_INFO, exclusive_cmds);
+	set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds);
+	set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds);
+
 	cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0);
 	if (!cxl_pmem_wq)
 		return -ENXIO;