diff mbox series

[RFC,3/6] cache: coherency device class

Message ID 20250320174118.39173-4-Jonathan.Cameron@huawei.com (mailing list archive)
State New
Headers show
Series Cache coherency management subsystem | expand

Commit Message

Jonathan Cameron March 20, 2025, 5:41 p.m. UTC
Some systems contain devices that are capable of issuing certain cache
invalidation messages to all components in the system.  A typical use
is when the memory backing a physical address is being changed, such
as when a region is configured for CXL or a dynamic capacity event
occurs.

These perform a similar action to the WBINVD instruction on x86 but
may provide finer granularity and/or a richer set of actions.

Add a tiny device class to which drivers may register. Each driver
provides an ops structure and the first op is Write Back and Invalidate
by PA Range. The driver may over invalidate.

An optional completion check is also provided. If present that should
be called to ensure that the action has finished.

If multiple agents are present in the system each should register
with this class and the core code will issue the invalidate to all
of them before checking for completion on each. This is done
to avoid need for filtering in the core code which can become complex
when interleave, potentially across different cache coherency hardware,
is going on, so it is easier to tell everyone and let those who don't
care do nothing.

RFC question:
1. Location.  Conor, do you mind us putting this in drivers/cache?
   I'm more than happy to add a maintainers entry and not make this
   your problem!
2. Need to figure out if all agents have turned up yet?  No idea how
   to ensure this.
3. Is just iterating over devices registered with the class the
   simplest solution?
4. Given we have a nice device class, to which we can add sysfs
   attributes, what info might be useful to userspace? It might
   be useful to indicate how expensive a flush might be for example
   or where in the topology of the system it is being triggered?

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/cache/Kconfig           |   6 ++
 drivers/cache/Makefile          |   2 +
 drivers/cache/coherency_core.c  | 130 ++++++++++++++++++++++++++++++++
 include/linux/cache_coherency.h |  60 +++++++++++++++
 4 files changed, 198 insertions(+)

Comments

Greg KH March 20, 2025, 9:12 p.m. UTC | #1
On Thu, Mar 20, 2025 at 05:41:15PM +0000, Jonathan Cameron wrote:
> --- a/drivers/cache/Kconfig
> +++ b/drivers/cache/Kconfig
> @@ -1,6 +1,12 @@
>  # SPDX-License-Identifier: GPL-2.0
>  menu "Cache Drivers"
>  
> +config CACHE_COHERENCY_CLASS
> +	bool "Cache coherency control class"

Why can't this be a module?  And why would anyone want to turn it off?

> +	help
> +	  Class to which coherency control drivers register allowing core kernel
> +	  subsystems to issue invalidations and similar coherency operations.

What "core kernel subsystems"?

> +
>  config AX45MP_L2_CACHE
>  	bool "Andes Technology AX45MP L2 Cache controller"
>  	depends on RISCV

Shouldn't all of these now depend on CACHE_COHERENCY_CLASS?

> diff --git a/drivers/cache/Makefile b/drivers/cache/Makefile
> index 55c5e851034d..b72b20f4248f 100644
> --- a/drivers/cache/Makefile
> +++ b/drivers/cache/Makefile
> @@ -3,3 +3,5 @@
>  obj-$(CONFIG_AX45MP_L2_CACHE)		+= ax45mp_cache.o
>  obj-$(CONFIG_SIFIVE_CCACHE)		+= sifive_ccache.o
>  obj-$(CONFIG_STARFIVE_STARLINK_CACHE)	+= starfive_starlink_cache.o
> +
> +obj-$(CONFIG_CACHE_COHERENCY_CLASS)	+= coherency_core.o

Why the blank line?

> diff --git a/drivers/cache/coherency_core.c b/drivers/cache/coherency_core.c
> new file mode 100644
> index 000000000000..52cb4ceae00c
> --- /dev/null
> +++ b/drivers/cache/coherency_core.c
> @@ -0,0 +1,130 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Class to manage OS controlled coherency agents within the system.
> + * Specifically to enable operations such as write back and invalidate.
> + *
> + * Copyright: Huawei 2025
> + * Some elements based on fwctl class as an example of a modern
> + * lightweight class.
> + */
> +
> +#include <linux/cache_coherency.h>
> +#include <linux/container_of.h>
> +#include <linux/idr.h>
> +#include <linux/fs.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +
> +#include <asm/cacheflush.h>
> +
> +static DEFINE_IDA(cache_coherency_ida);
> +
> +static void cache_coherency_device_release(struct device *device)
> +{
> +	struct cache_coherency_device *ccd =
> +		container_of(device, struct cache_coherency_device, dev);
> +
> +	ida_free(&cache_coherency_ida, ccd->id);
> +}
> +
> +static struct class cache_coherency_class = {
> +	.name = "cache_coherency",
> +	.dev_release = cache_coherency_device_release,
> +};
> +
> +static int cache_inval_one(struct device *dev, void *data)
> +{
> +	struct cache_coherency_device *ccd =
> +		container_of(dev, struct cache_coherency_device, dev);
> +
> +	if (!ccd->ops)
> +		return -EINVAL;
> +
> +	return ccd->ops->wbinv(ccd, data);
> +}
> +
> +static int cache_inval_done_one(struct device *dev, void *data)
> +{
> +	struct cache_coherency_device *ccd =
> +		container_of(dev, struct cache_coherency_device, dev);
> +	if (!ccd->ops)
> +		return -EINVAL;
> +
> +	return ccd->ops->done(ccd);
> +}
> +
> +static int cache_invalidate_memregion(int res_desc,
> +				      phys_addr_t addr, size_t size)
> +{
> +	int ret;
> +	struct cc_inval_params params = {
> +		.addr = addr,
> +		.size = size,
> +	};
> +
> +	ret = class_for_each_device(&cache_coherency_class, NULL, &params,
> +				    cache_inval_one);
> +	if (ret)
> +		return ret;
> +
> +	return class_for_each_device(&cache_coherency_class, NULL, NULL,
> +				     cache_inval_done_one);
> +}
> +
> +static const struct system_cache_flush_method cache_flush_method = {
> +	.invalidate_memregion = cache_invalidate_memregion,
> +};
> +
> +struct cache_coherency_device *
> +_cache_coherency_alloc_device(struct device *parent,
> +			      const struct coherency_ops *ops, size_t size)
> +{
> +
> +	if (!ops || !ops->wbinv)
> +		return NULL;
> +
> +	struct cache_coherency_device *ccd __free(kfree) = kzalloc(size, GFP_KERNEL);
> +
> +	if (!ccd)
> +		return NULL;
> +
> +	ccd->dev.class = &cache_coherency_class;
> +	ccd->dev.parent = parent;
> +	ccd->ops = ops;
> +	ccd->id = ida_alloc(&cache_coherency_ida, GFP_KERNEL);
> +
> +	if (dev_set_name(&ccd->dev, "cache_coherency%d", ccd->id))
> +		return NULL;
> +
> + 	device_initialize(&ccd->dev);
> +
> +	return_ptr(ccd);
> +}
> +EXPORT_SYMBOL_NS_GPL(_cache_coherency_alloc_device, "CACHE_COHERENCY");
> +
> +int cache_coherency_device_register(struct cache_coherency_device *ccd)
> +{
> +	return device_add(&ccd->dev);
> +}
> +EXPORT_SYMBOL_NS_GPL(cache_coherency_device_register, "CACHE_COHERENCY");
> +
> +void cache_coherency_device_unregister(struct cache_coherency_device *ccd)
> +{
> +	device_del(&ccd->dev);
> +}
> +EXPORT_SYMBOL_NS_GPL(cache_coherency_device_unregister, "CACHE_COHERENCY");
> +
> +static int __init cache_coherency_init(void)
> +{
> +	int ret;
> +
> +	ret = class_register(&cache_coherency_class);
> +	if (ret)
> +		return ret;
> +
> +	//TODO: generalize
> +	arm64_set_sys_cache_flush_method(&cache_flush_method);

I'm guessing this will blow up the build on non-x86 builds :)

> +struct cache_coherency_device {
> +	struct device dev;
> +	const struct coherency_ops *ops;
> +	int id;
> +};

Classes are normally for user/kernel apis, what is this going to be used
for?  I don't see any new user/kernel apis happening, so why do you need
a struct device to be created?

thanks,

greg k-h
Jonathan Cameron March 21, 2025, 9:38 a.m. UTC | #2
On Thu, 20 Mar 2025 14:12:15 -0700
Greg KH <gregkh@linuxfoundation.org> wrote:

> On Thu, Mar 20, 2025 at 05:41:15PM +0000, Jonathan Cameron wrote:
> > --- a/drivers/cache/Kconfig
> > +++ b/drivers/cache/Kconfig
> > @@ -1,6 +1,12 @@
> >  # SPDX-License-Identifier: GPL-2.0
> >  menu "Cache Drivers"
> >  
> > +config CACHE_COHERENCY_CLASS
> > +	bool "Cache coherency control class"  
> 
> Why can't this be a module?  And why would anyone want to turn it off?

If you don't have the hardware you won't want the infrastructure.
I'll add a note.  If you do have the hardware and don't have memory subsystems
that need to use it (maybe no CXL hardware for instance and that's the only
user on a particular platform).

> 
> > +	help
> > +	  Class to which coherency control drivers register allowing core kernel
> > +	  subsystems to issue invalidations and similar coherency operations.  
> 
> What "core kernel subsystems".

I'll expand on that.  Memory hotplug related stuff (currently CXL and NVDIMM)
but the thing is more general that that.

> 
> > +
> >  config AX45MP_L2_CACHE
> >  	bool "Andes Technology AX45MP L2 Cache controller"
> >  	depends on RISCV  
> 
> Shouldn't all of these now depend on CACHE_COHERENCY_CLASS?

Nope. They are unrelated existing cache related drivers.   The question
to Conor is whether he minds me putting this in the existing directory
and to others on whether it's a good idea.

> 
> > diff --git a/drivers/cache/Makefile b/drivers/cache/Makefile
> > index 55c5e851034d..b72b20f4248f 100644
> > --- a/drivers/cache/Makefile
> > +++ b/drivers/cache/Makefile
> > @@ -3,3 +3,5 @@
> >  obj-$(CONFIG_AX45MP_L2_CACHE)		+= ax45mp_cache.o
> >  obj-$(CONFIG_SIFIVE_CCACHE)		+= sifive_ccache.o
> >  obj-$(CONFIG_STARFIVE_STARLINK_CACHE)	+= starfive_starlink_cache.o
> > +
> > +obj-$(CONFIG_CACHE_COHERENCY_CLASS)	+= coherency_core.o  
> 
> Why the blank line?

To separate existing stuff that happens to be cache related from this new
class.  Kind of camping in a directory because seemed silly to have
drivers/cache and drivers/cache_coherency


> 
> > diff --git a/drivers/cache/coherency_core.c b/drivers/cache/coherency_core.c
> > new file mode 100644
> > index 000000000000..52cb4ceae00c
> > --- /dev/null
> > +++ b/drivers/cache/coherency_core.c
> > @@ -0,0 +1,130 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Class to manage OS controlled coherency agents within the system.
> > + * Specifically to enable operations such as write back and invalidate.
> > + *
> > + * Copyright: Huawei 2025
> > + * Some elements based on fwctl class as an example of a modern
> > + * lightweight class.
> > + */
> > +
> > +#include <linux/cache_coherency.h>
> > +#include <linux/container_of.h>
> > +#include <linux/idr.h>
> > +#include <linux/fs.h>
> > +#include <linux/module.h>
> > +#include <linux/slab.h>
> > +
> > +#include <asm/cacheflush.h>
> > +
> > +static DEFINE_IDA(cache_coherency_ida);
> > +
> > +static void cache_coherency_device_release(struct device *device)
> > +{
> > +	struct cache_coherency_device *ccd =
> > +		container_of(device, struct cache_coherency_device, dev);
> > +
> > +	ida_free(&cache_coherency_ida, ccd->id);
> > +}
> > +
> > +static struct class cache_coherency_class = {
> > +	.name = "cache_coherency",
> > +	.dev_release = cache_coherency_device_release,
> > +};
> > +
> > +static int cache_inval_one(struct device *dev, void *data)
> > +{
> > +	struct cache_coherency_device *ccd =
> > +		container_of(dev, struct cache_coherency_device, dev);
> > +
> > +	if (!ccd->ops)
> > +		return -EINVAL;
> > +
> > +	return ccd->ops->wbinv(ccd, data);
> > +}
> > +
> > +static int cache_inval_done_one(struct device *dev, void *data)
> > +{
> > +	struct cache_coherency_device *ccd =
> > +		container_of(dev, struct cache_coherency_device, dev);
> > +	if (!ccd->ops)
> > +		return -EINVAL;
> > +
> > +	return ccd->ops->done(ccd);
> > +}
> > +
> > +static int cache_invalidate_memregion(int res_desc,
> > +				      phys_addr_t addr, size_t size)
> > +{
> > +	int ret;
> > +	struct cc_inval_params params = {
> > +		.addr = addr,
> > +		.size = size,
> > +	};
> > +
> > +	ret = class_for_each_device(&cache_coherency_class, NULL, &params,
> > +				    cache_inval_one);
> > +	if (ret)
> > +		return ret;
> > +
> > +	return class_for_each_device(&cache_coherency_class, NULL, NULL,
> > +				     cache_inval_done_one);
> > +}
> > +
> > +static const struct system_cache_flush_method cache_flush_method = {
> > +	.invalidate_memregion = cache_invalidate_memregion,
> > +};
> > +
> > +struct cache_coherency_device *
> > +_cache_coherency_alloc_device(struct device *parent,
> > +			      const struct coherency_ops *ops, size_t size)
> > +{
> > +
> > +	if (!ops || !ops->wbinv)
> > +		return NULL;
> > +
> > +	struct cache_coherency_device *ccd __free(kfree) = kzalloc(size, GFP_KERNEL);
> > +
> > +	if (!ccd)
> > +		return NULL;
> > +
> > +	ccd->dev.class = &cache_coherency_class;
> > +	ccd->dev.parent = parent;
> > +	ccd->ops = ops;
> > +	ccd->id = ida_alloc(&cache_coherency_ida, GFP_KERNEL);
> > +
> > +	if (dev_set_name(&ccd->dev, "cache_coherency%d", ccd->id))
> > +		return NULL;
> > +
> > + 	device_initialize(&ccd->dev);
> > +
> > +	return_ptr(ccd);
> > +}
> > +EXPORT_SYMBOL_NS_GPL(_cache_coherency_alloc_device, "CACHE_COHERENCY");
> > +
> > +int cache_coherency_device_register(struct cache_coherency_device *ccd)
> > +{
> > +	return device_add(&ccd->dev);
> > +}
> > +EXPORT_SYMBOL_NS_GPL(cache_coherency_device_register, "CACHE_COHERENCY");
> > +
> > +void cache_coherency_device_unregister(struct cache_coherency_device *ccd)
> > +{
> > +	device_del(&ccd->dev);
> > +}
> > +EXPORT_SYMBOL_NS_GPL(cache_coherency_device_unregister, "CACHE_COHERENCY");
> > +
> > +static int __init cache_coherency_init(void)
> > +{
> > +	int ret;
> > +
> > +	ret = class_register(&cache_coherency_class);
> > +	if (ret)
> > +		return ret;
> > +
> > +	//TODO: generalize
> > +	arm64_set_sys_cache_flush_method(&cache_flush_method);  
> 
> I'm guessing this will blow up the build on non-x86 builds :)

Yup. That's a TODO That needs fixing.

> 
> > +struct cache_coherency_device {
> > +	struct device dev;
> > +	const struct coherency_ops *ops;
> > +	int id;
> > +};  
> 
> Classes are normally for user/kernel apis, what is this going to be used
> for?  I don't see any new user/kernel apis happening, so why do you need
> a struct device to be created?

I'm kind of expecting to grow some userspace ABI around a few things but
indeed there isn't any yet.  Stuff that has been suggested is:
* Descriptive stuff useful for performance estimation.
* Cache locking - as kind of the opposite of flushes.
* Maybe more direct user interfaces to control it (I'm wary of that though).

Mostly thought, the idea was  avoid rolling my own similar registration
infrastructure for this case.  Absolutely can do a subsystem without
a class if that seems to be the way to go. It'll be a little more complex
though.

Thanks,

Jonathan

> 
> thanks,
> 
> greg k-h
>
diff mbox series

Patch

diff --git a/drivers/cache/Kconfig b/drivers/cache/Kconfig
index db51386c663a..45dc4b2935e7 100644
--- a/drivers/cache/Kconfig
+++ b/drivers/cache/Kconfig
@@ -1,6 +1,12 @@ 
 # SPDX-License-Identifier: GPL-2.0
 menu "Cache Drivers"
 
+config CACHE_COHERENCY_CLASS
+	bool "Cache coherency control class"
+	help
+	  Class to which coherency control drivers register allowing core kernel
+	  subsystems to issue invalidations and similar coherency operations.
+
 config AX45MP_L2_CACHE
 	bool "Andes Technology AX45MP L2 Cache controller"
 	depends on RISCV
diff --git a/drivers/cache/Makefile b/drivers/cache/Makefile
index 55c5e851034d..b72b20f4248f 100644
--- a/drivers/cache/Makefile
+++ b/drivers/cache/Makefile
@@ -3,3 +3,5 @@ 
 obj-$(CONFIG_AX45MP_L2_CACHE)		+= ax45mp_cache.o
 obj-$(CONFIG_SIFIVE_CCACHE)		+= sifive_ccache.o
 obj-$(CONFIG_STARFIVE_STARLINK_CACHE)	+= starfive_starlink_cache.o
+
+obj-$(CONFIG_CACHE_COHERENCY_CLASS)	+= coherency_core.o
diff --git a/drivers/cache/coherency_core.c b/drivers/cache/coherency_core.c
new file mode 100644
index 000000000000..52cb4ceae00c
--- /dev/null
+++ b/drivers/cache/coherency_core.c
@@ -0,0 +1,130 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Class to manage OS controlled coherency agents within the system.
+ * Specifically to enable operations such as write back and invalidate.
+ *
+ * Copyright: Huawei 2025
+ * Some elements based on fwctl class as an example of a modern
+ * lightweight class.
+ */
+
+#include <linux/cache_coherency.h>
+#include <linux/container_of.h>
+#include <linux/idr.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <asm/cacheflush.h>
+
+static DEFINE_IDA(cache_coherency_ida);
+
+static void cache_coherency_device_release(struct device *device)
+{
+	struct cache_coherency_device *ccd =
+		container_of(device, struct cache_coherency_device, dev);
+
+	ida_free(&cache_coherency_ida, ccd->id);
+}
+
+static struct class cache_coherency_class = {
+	.name = "cache_coherency",
+	.dev_release = cache_coherency_device_release,
+};
+
+static int cache_inval_one(struct device *dev, void *data)
+{
+	struct cache_coherency_device *ccd =
+		container_of(dev, struct cache_coherency_device, dev);
+
+	if (!ccd->ops)
+		return -EINVAL;
+
+	return ccd->ops->wbinv(ccd, data);
+}
+
+static int cache_inval_done_one(struct device *dev, void *data)
+{
+	struct cache_coherency_device *ccd =
+		container_of(dev, struct cache_coherency_device, dev);
+	if (!ccd->ops)
+		return -EINVAL;
+
+	return ccd->ops->done(ccd);
+}
+
+static int cache_invalidate_memregion(int res_desc,
+				      phys_addr_t addr, size_t size)
+{
+	int ret;
+	struct cc_inval_params params = {
+		.addr = addr,
+		.size = size,
+	};
+
+	ret = class_for_each_device(&cache_coherency_class, NULL, &params,
+				    cache_inval_one);
+	if (ret)
+		return ret;
+
+	return class_for_each_device(&cache_coherency_class, NULL, NULL,
+				     cache_inval_done_one);
+}
+
+static const struct system_cache_flush_method cache_flush_method = {
+	.invalidate_memregion = cache_invalidate_memregion,
+};
+
+struct cache_coherency_device *
+_cache_coherency_alloc_device(struct device *parent,
+			      const struct coherency_ops *ops, size_t size)
+{
+
+	if (!ops || !ops->wbinv)
+		return NULL;
+
+	struct cache_coherency_device *ccd __free(kfree) = kzalloc(size, GFP_KERNEL);
+
+	if (!ccd)
+		return NULL;
+
+	ccd->dev.class = &cache_coherency_class;
+	ccd->dev.parent = parent;
+	ccd->ops = ops;
+	ccd->id = ida_alloc(&cache_coherency_ida, GFP_KERNEL);
+
+	if (dev_set_name(&ccd->dev, "cache_coherency%d", ccd->id))
+		return NULL;
+
+ 	device_initialize(&ccd->dev);
+
+	return_ptr(ccd);
+}
+EXPORT_SYMBOL_NS_GPL(_cache_coherency_alloc_device, "CACHE_COHERENCY");
+
+int cache_coherency_device_register(struct cache_coherency_device *ccd)
+{
+	return device_add(&ccd->dev);
+}
+EXPORT_SYMBOL_NS_GPL(cache_coherency_device_register, "CACHE_COHERENCY");
+
+void cache_coherency_device_unregister(struct cache_coherency_device *ccd)
+{
+	device_del(&ccd->dev);
+}
+EXPORT_SYMBOL_NS_GPL(cache_coherency_device_unregister, "CACHE_COHERENCY");
+
+static int __init cache_coherency_init(void)
+{
+	int ret;
+
+	ret = class_register(&cache_coherency_class);
+	if (ret)
+		return ret;
+
+	//TODO: generalize
+	arm64_set_sys_cache_flush_method(&cache_flush_method);
+
+	return 0;
+}
+subsys_initcall(cache_coherency_init);
diff --git a/include/linux/cache_coherency.h b/include/linux/cache_coherency.h
new file mode 100644
index 000000000000..d49be27ad4f1
--- /dev/null
+++ b/include/linux/cache_coherency.h
@@ -0,0 +1,60 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Cache coherency device drivers
+ *
+ * Copyright Huawei 2025
+ */
+#ifndef _LINUX_CACHE_COHERENCY_H_
+#define _LINUX_CACHE_COHERENCY_H_
+
+#include <linux/cleanup.h>
+#include <linux/device.h>
+
+struct cache_coherency_device;
+struct cc_inval_params {
+	phys_addr_t addr;
+	size_t size;
+};
+struct coherency_ops {
+	int (*wbinv)(struct cache_coherency_device *ccd, struct cc_inval_params *invp);
+	int (*done)(struct cache_coherency_device *ccd);
+};
+
+struct cache_coherency_device {
+	struct device dev;
+	const struct coherency_ops *ops;
+	int id;
+};
+
+int cache_coherency_device_register(struct cache_coherency_device *ccd);
+void cache_coherency_device_unregister(struct cache_coherency_device *ccd);
+
+struct cache_coherency_device *
+_cache_coherency_alloc_device(struct device *parent,
+			      const struct coherency_ops *ops, size_t size);
+
+#define cache_coherency_alloc_device(parent, ops, drv_struct, member)		\
+	({									\
+		static_assert(__same_type(struct cache_coherency_device,	\
+					  ((drv_struct *)NULL)->member));	\
+		static_assert(offsetof(drv_struct, member) == 0);		\
+		(drv_struct *)_cache_coherency_alloc_device(parent, ops,	\
+							sizeof(drv_struct));	\
+	})
+
+static inline struct cache_coherency_device *
+	cache_coherency_device_get(struct cache_coherency_device *ccd)
+{
+	get_device(&ccd->dev);
+	return ccd;
+}
+
+static inline void cache_coherency_device_put(struct cache_coherency_device *ccd)
+{
+	put_device(&ccd->dev);
+}
+
+DEFINE_FREE(cache_coherency_device, struct cache_coherency_device *,
+	    if (_T) cache_coherency_device_put(_T));
+
+#endif