diff mbox series

[1/8] fwctl: Add basic structure for a class subsystem with a cdev

Message ID 1-v1-9912f1a11620+2a-fwctl_jgg@nvidia.com
State Superseded
Headers show
Series Introduce fwctl subystem | expand

Commit Message

Jason Gunthorpe June 3, 2024, 3:53 p.m. UTC
Create the class, character device and functions for a fwctl driver to
un/register to the subsystem.

A typical fwctl driver has a sysfs presence like:

$ ls -l /dev/fwctl/fwctl0
crw------- 1 root root 250, 0 Apr 25 19:16 /dev/fwctl/fwctl0

$ ls /sys/class/fwctl/fwctl0
dev  device  power  subsystem  uevent

$ ls /sys/class/fwctl/fwctl0/device/infiniband/
ibp0s10f0

$ ls /sys/class/infiniband/ibp0s10f0/device/fwctl/
fwctl0/

$ ls /sys/devices/pci0000:00/0000:00:0a.0/fwctl/fwctl0
dev  device  power  subsystem  uevent

Which allows userspace to link all the multi-subsystem driver components
together and learn the subsystem specific names for the device's
components.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 MAINTAINERS            |   8 ++
 drivers/Kconfig        |   2 +
 drivers/Makefile       |   1 +
 drivers/fwctl/Kconfig  |   9 +++
 drivers/fwctl/Makefile |   4 +
 drivers/fwctl/main.c   | 174 +++++++++++++++++++++++++++++++++++++++++
 include/linux/fwctl.h  |  68 ++++++++++++++++
 7 files changed, 266 insertions(+)
 create mode 100644 drivers/fwctl/Kconfig
 create mode 100644 drivers/fwctl/Makefile
 create mode 100644 drivers/fwctl/main.c
 create mode 100644 include/linux/fwctl.h

Comments

Leon Romanovsky June 4, 2024, 9:32 a.m. UTC | #1
On Mon, Jun 03, 2024 at 12:53:17PM -0300, Jason Gunthorpe wrote:
> Create the class, character device and functions for a fwctl driver to
> un/register to the subsystem.
> 
> A typical fwctl driver has a sysfs presence like:
> 
> $ ls -l /dev/fwctl/fwctl0
> crw------- 1 root root 250, 0 Apr 25 19:16 /dev/fwctl/fwctl0
> 
> $ ls /sys/class/fwctl/fwctl0
> dev  device  power  subsystem  uevent
> 
> $ ls /sys/class/fwctl/fwctl0/device/infiniband/
> ibp0s10f0
> 
> $ ls /sys/class/infiniband/ibp0s10f0/device/fwctl/
> fwctl0/
> 
> $ ls /sys/devices/pci0000:00/0000:00:0a.0/fwctl/fwctl0
> dev  device  power  subsystem  uevent
> 
> Which allows userspace to link all the multi-subsystem driver components
> together and learn the subsystem specific names for the device's
> components.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
> ---
>  MAINTAINERS            |   8 ++
>  drivers/Kconfig        |   2 +
>  drivers/Makefile       |   1 +
>  drivers/fwctl/Kconfig  |   9 +++
>  drivers/fwctl/Makefile |   4 +
>  drivers/fwctl/main.c   | 174 +++++++++++++++++++++++++++++++++++++++++
>  include/linux/fwctl.h  |  68 ++++++++++++++++
>  7 files changed, 266 insertions(+)
>  create mode 100644 drivers/fwctl/Kconfig
>  create mode 100644 drivers/fwctl/Makefile
>  create mode 100644 drivers/fwctl/main.c
>  create mode 100644 include/linux/fwctl.h

<...>

> +static struct fwctl_device *
> +_alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size)
> +{
> +	struct fwctl_device *fwctl __free(kfree) = kzalloc(size, GFP_KERNEL);
> +
> +	if (!fwctl)
> +		return NULL;

<...>

> +/* Drivers use the fwctl_alloc_device() wrapper */
> +struct fwctl_device *_fwctl_alloc_device(struct device *parent,
> +					 const struct fwctl_ops *ops,
> +					 size_t size)
> +{
> +	struct fwctl_device *fwctl __free(fwctl) =
> +		_alloc_device(parent, ops, size);

I'm not a big fan of cleanup.h pattern as it hides important to me
information about memory object lifetime and by "solving" one class of
problems it creates another one.

You didn't check if fwctl is NULL before using it.

> +	int devnum;
> +
> +	devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL);
> +	if (devnum < 0)
> +		return NULL;
> +	fwctl->dev.devt = fwctl_dev + devnum;
> +
> +	cdev_init(&fwctl->cdev, &fwctl_fops);
> +	fwctl->cdev.owner = THIS_MODULE;
> +
> +	if (dev_set_name(&fwctl->dev, "fwctl%d", fwctl->dev.devt - fwctl_dev))

Did you miss ida_free() here?

> +		return NULL;
> +
> +	fwctl->ops = ops;
> +	return_ptr(fwctl);
> +}
> +EXPORT_SYMBOL_NS_GPL(_fwctl_alloc_device, FWCTL);

Thanks
Jason Gunthorpe June 4, 2024, 3:50 p.m. UTC | #2
On Tue, Jun 04, 2024 at 12:32:19PM +0300, Leon Romanovsky wrote:
> > +static struct fwctl_device *
> > +_alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size)
> > +{
> > +	struct fwctl_device *fwctl __free(kfree) = kzalloc(size, GFP_KERNEL);
> > +
> > +	if (!fwctl)
> > +		return NULL;
> 
> <...>
> 
> > +/* Drivers use the fwctl_alloc_device() wrapper */
> > +struct fwctl_device *_fwctl_alloc_device(struct device *parent,
> > +					 const struct fwctl_ops *ops,
> > +					 size_t size)
> > +{
> > +	struct fwctl_device *fwctl __free(fwctl) =
> > +		_alloc_device(parent, ops, size);
> 
> I'm not a big fan of cleanup.h pattern as it hides important to me
> information about memory object lifetime and by "solving" one class of
> problems it creates another one.

I'm trying it here. One of the most common bugs I end up fixing is
error unwind and cleanup.h has successfully removed all of it. Let's
find out, others thought it was a good idea to add the infrastructure.

One thing that seems clear in my work here is that you should not use
cleanup.h if you don't have simple memory lifetime, like the above
case where the memory is freed if the function fails.

> You didn't check if fwctl is NULL before using it.

Oops, yes

> > +	int devnum;
> > +
> > +	devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL);
> > +	if (devnum < 0)
> > +		return NULL;
> > +	fwctl->dev.devt = fwctl_dev + devnum;
> > +
> > +	cdev_init(&fwctl->cdev, &fwctl_fops);
> > +	fwctl->cdev.owner = THIS_MODULE;
> > +
> > +	if (dev_set_name(&fwctl->dev, "fwctl%d", fwctl->dev.devt - fwctl_dev))
> 
> Did you miss ida_free() here?

No, the put_device() does it in the release function. The __free
always calls fwctl_put()/put_device() on failure, and within all
functions except _alloc_device() the put_device() is the correct way
to free this memory.

Thanks,
Jason
Randy Dunlap June 4, 2024, 4:42 p.m. UTC | #3
On 6/3/24 8:53 AM, Jason Gunthorpe wrote:
> diff --git a/drivers/fwctl/Kconfig b/drivers/fwctl/Kconfig
> new file mode 100644
> index 00000000000000..6ceee3347ae642
> --- /dev/null
> +++ b/drivers/fwctl/Kconfig
> @@ -0,0 +1,9 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +menuconfig FWCTL
> +        tristate "fwctl device firmware access framework"

Use tab above instead of spaces for indentation, please.

> +	help
> +	  fwctl provides a userspace API for restricted access to communicate
> +	  with on-device firmware. The communication channel is intended to
> +	  support a wide range of lockdown compatible device behaviors including
> +	  manipulating device FLASH, debugging, and other activities that don't
> +	  fit neatly into an existing subsystem.
Jason Gunthorpe June 4, 2024, 4:44 p.m. UTC | #4
On Tue, Jun 04, 2024 at 09:42:50AM -0700, Randy Dunlap wrote:
> 
> 
> On 6/3/24 8:53 AM, Jason Gunthorpe wrote:
> > diff --git a/drivers/fwctl/Kconfig b/drivers/fwctl/Kconfig
> > new file mode 100644
> > index 00000000000000..6ceee3347ae642
> > --- /dev/null
> > +++ b/drivers/fwctl/Kconfig
> > @@ -0,0 +1,9 @@
> > +# SPDX-License-Identifier: GPL-2.0-only
> > +menuconfig FWCTL
> > +        tristate "fwctl device firmware access framework"
> 
> Use tab above instead of spaces for indentation, please.

Thanks, done. Bit surprised checkpatch didn't flag it..

Jason
Jonathan Cameron June 4, 2024, 5:05 p.m. UTC | #5
On Tue, 4 Jun 2024 12:50:09 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> On Tue, Jun 04, 2024 at 12:32:19PM +0300, Leon Romanovsky wrote:
> > > +static struct fwctl_device *
> > > +_alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size)
> > > +{
> > > +	struct fwctl_device *fwctl __free(kfree) = kzalloc(size, GFP_KERNEL);
> > > +
> > > +	if (!fwctl)
> > > +		return NULL;  
> > 
> > <...>
> >   
> > > +/* Drivers use the fwctl_alloc_device() wrapper */
> > > +struct fwctl_device *_fwctl_alloc_device(struct device *parent,
> > > +					 const struct fwctl_ops *ops,
> > > +					 size_t size)
> > > +{
> > > +	struct fwctl_device *fwctl __free(fwctl) =
> > > +		_alloc_device(parent, ops, size);  
> > 
> > I'm not a big fan of cleanup.h pattern as it hides important to me
> > information about memory object lifetime and by "solving" one class of
> > problems it creates another one.  
> 
> I'm trying it here. One of the most common bugs I end up fixing is
> error unwind and cleanup.h has successfully removed all of it. Let's
> find out, others thought it was a good idea to add the infrastructure.
> 
> One thing that seems clear in my work here is that you should not use
> cleanup.h if you don't have simple memory lifetime, like the above
> case where the memory is freed if the function fails.
> 
> > You didn't check if fwctl is NULL before using it.  
> 
> Oops, yes
> 
> > > +	int devnum;
> > > +
> > > +	devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL);
> > > +	if (devnum < 0)
> > > +		return NULL;
> > > +	fwctl->dev.devt = fwctl_dev + devnum;
> > > +
> > > +	cdev_init(&fwctl->cdev, &fwctl_fops);
> > > +	fwctl->cdev.owner = THIS_MODULE;
> > > +
> > > +	if (dev_set_name(&fwctl->dev, "fwctl%d", fwctl->dev.devt - fwctl_dev))  
> > 
> > Did you miss ida_free() here?  
> 
> No, the put_device() does it in the release function. The __free
> always calls fwctl_put()/put_device() on failure, and within all
> functions except _alloc_device() the put_device() is the correct way
> to free this memory.

The conditional handling of the ida having been allocated or not is a bit ugly
as I think it's just papering over this corner case.
Can fwctl_dev and devnum both be zero? In practice no, but is that guaranteed
for all time? Maybe...

We got some kick back from Linus a while back in CXL and the outcome was
a few more helpers rather than too much cleverness in the use of __free.

Trick for this is often to define a small function that allocates both the
ida and the device. With in that micro function handle the one error path
or if you only have two things to do, you can use __free() for the allocation.

Something like

static struct fwctl_device *__alloc_device_and_devt(sizet_t size)
{
	struct fw_ctl_device *fwctl;
	int devnum;

	fwctl = ida_alloc_max(&fwct ...);
	if (!fwctl)
		return NULL;

	devnum = ida_alloc_max(&fwct ...);
	if (devnum < 0) {
		kfree(fwctl);
		return NULL;
	}	

	fwctl->dev.devt = fwctl_Ddev + devnum;

	reutrn fwctl;
}

Then call device_initialize() on the returned structure ->dev as you know
you ida and the containing structure are both in a state where the put_device()
call doesn't need conditions on 'how initialized' it is.

Still, maybe the ugly is fine.  


> 
> Thanks,
> Jason
> 
>
Jason Gunthorpe June 4, 2024, 6:52 p.m. UTC | #6
On Tue, Jun 04, 2024 at 06:05:55PM +0100, Jonathan Cameron wrote:

> Trick for this is often to define a small function that allocates both the
> ida and the device. With in that micro function handle the one error path
> or if you only have two things to do, you can use __free() for the allocation.

This style is already followed here, the _alloc_device() is the
function that does everything before starting reference counting (IMHO
it is the best pattern to use). If we move the ida allocation to that
function then the if inside release is not needed.

Like this:

diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c
index d25b5eb3aee73c..a26697326e6ced 100644
--- a/drivers/fwctl/main.c
+++ b/drivers/fwctl/main.c
@@ -267,8 +267,7 @@ static void fwctl_device_release(struct device *device)
 	struct fwctl_device *fwctl =
 		container_of(device, struct fwctl_device, dev);
 
-	if (fwctl->dev.devt)
-		ida_free(&fwctl_ida, fwctl->dev.devt - fwctl_dev);
+	ida_free(&fwctl_ida, fwctl->dev.devt - fwctl_dev);
 	mutex_destroy(&fwctl->uctx_list_lock);
 	kfree(fwctl);
 }
@@ -288,6 +287,7 @@ static struct fwctl_device *
 _alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size)
 {
 	struct fwctl_device *fwctl __free(kfree) = kzalloc(size, GFP_KERNEL);
+	int devnum;
 
 	if (!fwctl)
 		return NULL;
@@ -296,6 +296,12 @@ _alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size)
 	init_rwsem(&fwctl->registration_lock);
 	mutex_init(&fwctl->uctx_list_lock);
 	INIT_LIST_HEAD(&fwctl->uctx_list);
+
+	devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL);
+	if (devnum < 0)
+		return NULL;
+	fwctl->dev.devt = fwctl_dev + devnum;
+
 	device_initialize(&fwctl->dev);
 	return_ptr(fwctl);
 }
@@ -307,16 +313,10 @@ struct fwctl_device *_fwctl_alloc_device(struct device *parent,
 {
 	struct fwctl_device *fwctl __free(fwctl) =
 		_alloc_device(parent, ops, size);
-	int devnum;
 
 	if (!fwctl)
 		return NULL;
 
-	devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL);
-	if (devnum < 0)
-		return NULL;
-	fwctl->dev.devt = fwctl_dev + devnum;
-
 	cdev_init(&fwctl->cdev, &fwctl_fops);
 	fwctl->cdev.owner = THIS_MODULE;
 
Jason
Jonathan Cameron June 5, 2024, 11:08 a.m. UTC | #7
On Tue, 4 Jun 2024 15:52:00 -0300
Jason Gunthorpe <jgg@nvidia.com> wrote:

> On Tue, Jun 04, 2024 at 06:05:55PM +0100, Jonathan Cameron wrote:
> 
> > Trick for this is often to define a small function that allocates both the
> > ida and the device. With in that micro function handle the one error path
> > or if you only have two things to do, you can use __free() for the allocation.  
> 
> This style is already followed here, the _alloc_device() is the
> function that does everything before starting reference counting (IMHO
> it is the best pattern to use). If we move the ida allocation to that
> function then the if inside release is not needed.
> 
> Like this:

LGTM (this specific code, not commenting on fwctl in general yet as needs
more thinking time!)

> 
> diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c
> index d25b5eb3aee73c..a26697326e6ced 100644
> --- a/drivers/fwctl/main.c
> +++ b/drivers/fwctl/main.c
> @@ -267,8 +267,7 @@ static void fwctl_device_release(struct device *device)
>  	struct fwctl_device *fwctl =
>  		container_of(device, struct fwctl_device, dev);
>  
> -	if (fwctl->dev.devt)
> -		ida_free(&fwctl_ida, fwctl->dev.devt - fwctl_dev);
> +	ida_free(&fwctl_ida, fwctl->dev.devt - fwctl_dev);
>  	mutex_destroy(&fwctl->uctx_list_lock);
>  	kfree(fwctl);
>  }
> @@ -288,6 +287,7 @@ static struct fwctl_device *
>  _alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size)
>  {
>  	struct fwctl_device *fwctl __free(kfree) = kzalloc(size, GFP_KERNEL);
> +	int devnum;
>  
>  	if (!fwctl)
>  		return NULL;
> @@ -296,6 +296,12 @@ _alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size)
>  	init_rwsem(&fwctl->registration_lock);
>  	mutex_init(&fwctl->uctx_list_lock);
>  	INIT_LIST_HEAD(&fwctl->uctx_list);
> +
> +	devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL);
> +	if (devnum < 0)
> +		return NULL;
> +	fwctl->dev.devt = fwctl_dev + devnum;
> +
>  	device_initialize(&fwctl->dev);
>  	return_ptr(fwctl);
>  }
> @@ -307,16 +313,10 @@ struct fwctl_device *_fwctl_alloc_device(struct device *parent,
>  {
>  	struct fwctl_device *fwctl __free(fwctl) =
>  		_alloc_device(parent, ops, size);
> -	int devnum;
>  
>  	if (!fwctl)
>  		return NULL;
>  
> -	devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL);
> -	if (devnum < 0)
> -		return NULL;
> -	fwctl->dev.devt = fwctl_dev + devnum;
> -
>  	cdev_init(&fwctl->cdev, &fwctl_fops);
>  	fwctl->cdev.owner = THIS_MODULE;
>  
> Jason
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 8754ac2c259dc9..833b853808421e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9077,6 +9077,14 @@  F:	kernel/futex/*
 F:	tools/perf/bench/futex*
 F:	tools/testing/selftests/futex/
 
+FWCTL SUBSYSTEM
+M:	Jason Gunthorpe <jgg@nvidia.com>
+M:	Saeed Mahameed <saeedm@nvidia.com>
+S:	Maintained
+F:	Documentation/userspace-api/fwctl.rst
+F:	drivers/fwctl/
+F:	include/linux/fwctl.h
+
 GALAXYCORE GC0308 CAMERA SENSOR DRIVER
 M:	Sebastian Reichel <sre@kernel.org>
 L:	linux-media@vger.kernel.org
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 7bdad836fc6207..7c556c5ac4fddc 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -21,6 +21,8 @@  source "drivers/connector/Kconfig"
 
 source "drivers/firmware/Kconfig"
 
+source "drivers/fwctl/Kconfig"
+
 source "drivers/gnss/Kconfig"
 
 source "drivers/mtd/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index fe9ceb0d2288ad..f6a241b747b29c 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -133,6 +133,7 @@  obj-$(CONFIG_MEMSTICK)		+= memstick/
 obj-y				+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/
 obj-y				+= firmware/
+obj-$(CONFIG_FWCTL)		+= fwctl/
 obj-$(CONFIG_CRYPTO)		+= crypto/
 obj-$(CONFIG_SUPERH)		+= sh/
 obj-y				+= clocksource/
diff --git a/drivers/fwctl/Kconfig b/drivers/fwctl/Kconfig
new file mode 100644
index 00000000000000..6ceee3347ae642
--- /dev/null
+++ b/drivers/fwctl/Kconfig
@@ -0,0 +1,9 @@ 
+# SPDX-License-Identifier: GPL-2.0-only
+menuconfig FWCTL
+        tristate "fwctl device firmware access framework"
+	help
+	  fwctl provides a userspace API for restricted access to communicate
+	  with on-device firmware. The communication channel is intended to
+	  support a wide range of lockdown compatible device behaviors including
+	  manipulating device FLASH, debugging, and other activities that don't
+	  fit neatly into an existing subsystem.
diff --git a/drivers/fwctl/Makefile b/drivers/fwctl/Makefile
new file mode 100644
index 00000000000000..1cad210f6ba580
--- /dev/null
+++ b/drivers/fwctl/Makefile
@@ -0,0 +1,4 @@ 
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_FWCTL) += fwctl.o
+
+fwctl-y += main.o
diff --git a/drivers/fwctl/main.c b/drivers/fwctl/main.c
new file mode 100644
index 00000000000000..ff9b7bad5a2b0d
--- /dev/null
+++ b/drivers/fwctl/main.c
@@ -0,0 +1,174 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+#define pr_fmt(fmt) "fwctl: " fmt
+#include <linux/fwctl.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/container_of.h>
+#include <linux/fs.h>
+
+enum {
+	FWCTL_MAX_DEVICES = 256,
+};
+static dev_t fwctl_dev;
+static DEFINE_IDA(fwctl_ida);
+
+static int fwctl_fops_open(struct inode *inode, struct file *filp)
+{
+	struct fwctl_device *fwctl =
+		container_of(inode->i_cdev, struct fwctl_device, cdev);
+
+	get_device(&fwctl->dev);
+	filp->private_data = fwctl;
+	return 0;
+}
+
+static int fwctl_fops_release(struct inode *inode, struct file *filp)
+{
+	struct fwctl_device *fwctl = filp->private_data;
+
+	fwctl_put(fwctl);
+	return 0;
+}
+
+static const struct file_operations fwctl_fops = {
+	.owner = THIS_MODULE,
+	.open = fwctl_fops_open,
+	.release = fwctl_fops_release,
+};
+
+static void fwctl_device_release(struct device *device)
+{
+	struct fwctl_device *fwctl =
+		container_of(device, struct fwctl_device, dev);
+
+	if (fwctl->dev.devt)
+		ida_free(&fwctl_ida, fwctl->dev.devt - fwctl_dev);
+	kfree(fwctl);
+}
+
+static char *fwctl_devnode(const struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "fwctl/%s", dev_name(dev));
+}
+
+static struct class fwctl_class = {
+	.name = "fwctl",
+	.dev_release = fwctl_device_release,
+	.devnode = fwctl_devnode,
+};
+
+static struct fwctl_device *
+_alloc_device(struct device *parent, const struct fwctl_ops *ops, size_t size)
+{
+	struct fwctl_device *fwctl __free(kfree) = kzalloc(size, GFP_KERNEL);
+
+	if (!fwctl)
+		return NULL;
+	fwctl->dev.class = &fwctl_class;
+	fwctl->dev.parent = parent;
+	device_initialize(&fwctl->dev);
+	return_ptr(fwctl);
+}
+
+/* Drivers use the fwctl_alloc_device() wrapper */
+struct fwctl_device *_fwctl_alloc_device(struct device *parent,
+					 const struct fwctl_ops *ops,
+					 size_t size)
+{
+	struct fwctl_device *fwctl __free(fwctl) =
+		_alloc_device(parent, ops, size);
+	int devnum;
+
+	devnum = ida_alloc_max(&fwctl_ida, FWCTL_MAX_DEVICES - 1, GFP_KERNEL);
+	if (devnum < 0)
+		return NULL;
+	fwctl->dev.devt = fwctl_dev + devnum;
+
+	cdev_init(&fwctl->cdev, &fwctl_fops);
+	fwctl->cdev.owner = THIS_MODULE;
+
+	if (dev_set_name(&fwctl->dev, "fwctl%d", fwctl->dev.devt - fwctl_dev))
+		return NULL;
+
+	fwctl->ops = ops;
+	return_ptr(fwctl);
+}
+EXPORT_SYMBOL_NS_GPL(_fwctl_alloc_device, FWCTL);
+
+/**
+ * fwctl_register - Register a new device to the subsystem
+ * @fwctl: Previously allocated fwctl_device
+ *
+ * On return the device is visible through sysfs and /dev, driver ops may be
+ * called.
+ */
+int fwctl_register(struct fwctl_device *fwctl)
+{
+	int ret;
+
+	ret = cdev_device_add(&fwctl->cdev, &fwctl->dev);
+	if (ret)
+		return ret;
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(fwctl_register, FWCTL);
+
+/**
+ * fwctl_unregister - Unregister a device from the subsystem
+ * @fwctl: Previously allocated and registered fwctl_device
+ *
+ * Undoes fwctl_register(). On return no driver ops will be called. The
+ * caller must still call fwctl_put() to free the fwctl.
+ *
+ * Unregister will return even if userspace still has file descriptors open.
+ * This will call ops->close_uctx() on any open FDs and after return no driver
+ * op will be called. The FDs remain open but all fops will return -ENODEV.
+ *
+ * The design of fwctl allows this sort of disassociation of the driver from the
+ * subsystem primarily by keeping memory allocations owned by the core subsytem.
+ * The fwctl_device and fwctl_uctx can both be freed without requiring a driver
+ * callback. This allows the module to remain unlocked while FDs are open.
+ */
+void fwctl_unregister(struct fwctl_device *fwctl)
+{
+	cdev_device_del(&fwctl->cdev, &fwctl->dev);
+
+	/*
+	 * The driver module may unload after this returns, the op pointer will
+	 * not be valid.
+	 */
+	fwctl->ops = NULL;
+}
+EXPORT_SYMBOL_NS_GPL(fwctl_unregister, FWCTL);
+
+static int __init fwctl_init(void)
+{
+	int ret;
+
+	ret = alloc_chrdev_region(&fwctl_dev, 0, FWCTL_MAX_DEVICES, "fwctl");
+	if (ret)
+		return ret;
+
+	ret = class_register(&fwctl_class);
+	if (ret)
+		goto err_chrdev;
+	return 0;
+
+err_chrdev:
+	unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES);
+	return ret;
+}
+
+static void __exit fwctl_exit(void)
+{
+	class_unregister(&fwctl_class);
+	unregister_chrdev_region(fwctl_dev, FWCTL_MAX_DEVICES);
+}
+
+module_init(fwctl_init);
+module_exit(fwctl_exit);
+MODULE_DESCRIPTION("fwctl device firmware access framework");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/fwctl.h b/include/linux/fwctl.h
new file mode 100644
index 00000000000000..ef4eaa87c945e4
--- /dev/null
+++ b/include/linux/fwctl.h
@@ -0,0 +1,68 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+#ifndef __LINUX_FWCTL_H
+#define __LINUX_FWCTL_H
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/cleanup.h>
+
+struct fwctl_device;
+struct fwctl_uctx;
+
+struct fwctl_ops {
+};
+
+/**
+ * struct fwctl_device - Per-driver registration struct
+ * @dev: The sysfs (class/fwctl/fwctlXX) device
+ *
+ * Each driver instance will have one of these structs with the driver
+ * private data following immeidately after. This struct is refcounted,
+ * it is freed by calling fwctl_put().
+ */
+struct fwctl_device {
+	struct device dev;
+	/* private: */
+	struct cdev cdev;
+	const struct fwctl_ops *ops;
+};
+
+struct fwctl_device *_fwctl_alloc_device(struct device *parent,
+					 const struct fwctl_ops *ops,
+					 size_t size);
+/**
+ * fwctl_alloc_device - Allocate a fwctl
+ * @parent: Physical device that provides the FW interface
+ * @ops: Driver ops to register
+ * @drv_struct: 'struct driver_fwctl' that holds the struct fwctl_device
+ * @member: Name of the struct fwctl_device in @drv_struct
+ *
+ * This allocates and initializes the fwctl_device embedded in the drv_struct.
+ * Upon success the pointer must be freed via fwctl_put(). Returns NULL on
+ * failure. Returns a 'drv_struct *' on success, NULL on error.
+ */
+#define fwctl_alloc_device(parent, ops, drv_struct, member)                  \
+	container_of(_fwctl_alloc_device(                                    \
+			     parent, ops,                                    \
+			     sizeof(drv_struct) +                            \
+				     BUILD_BUG_ON_ZERO(                      \
+					     offsetof(drv_struct, member))), \
+		     drv_struct, member)
+
+static inline struct fwctl_device *fwctl_get(struct fwctl_device *fwctl)
+{
+	get_device(&fwctl->dev);
+	return fwctl;
+}
+static inline void fwctl_put(struct fwctl_device *fwctl)
+{
+	put_device(&fwctl->dev);
+}
+DEFINE_FREE(fwctl, struct fwctl_device *, if (_T) fwctl_put(_T));
+
+int fwctl_register(struct fwctl_device *fwctl);
+void fwctl_unregister(struct fwctl_device *fwctl);
+
+#endif