Message ID | 20250324100008.346009-2-daniel.lezcano@linaro.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [1/2] dt-bindings: NXP System Timer Module | expand |
On 3/24/2025 12:00 PM, Daniel Lezcano wrote: > STM supports commonly required system and application software timing > functions. STM includes a 32-bit count-up timer and four 32-bit > compare channels with a separate interrupt source for each > channel. The timer is driven by the STM module clock divided by an > 8-bit prescale value (1 to 256). > > STM has the following features: > • One 32-bit count-up timer with an 8-bit prescaler > • Four 32-bit compare channels > • An independent interrupt source for each channel > • Ability to stop the timer in Debug mode > > The s32g platform is declined into two versions, the s32g2 and the > s32g3. The former has a STM block with 8 timers and the latter has 12 > timers. > > There is a special STM instance called STM_TS which is dedicated to > the timestamp. The 7th STM instance STM_07 is directly tied to the > STM_TS which means it is not usable as a clockevent. > > This driver provides the core code to support both platform but only > the s32g2 is configured. Adding the s32g3 STM support is > straighforward. > > The first probed STM is used as a clocksource, the second will be the > broadcast timer and the rest are used as a clockevent with the > affinity set to a CPU. The rating is higher than the ARM architected > timers, so if they are enabled in the kernel configuration, they will > take over and used in place of the architected timers. The plaform > data is used to specify if a clocksource, a broadcast clockevent or a > per-cpu clockevent is desired thus allowing more flexibility in the > future to configure the STMs on the system. > > Cc: Thomas Fossati <thomas.fossati@linaro.org> > Co-developed-by: Larisa Grigore <Larisa.Grigore@nxp.com> > Signed-off-by: Larisa Grigore <Larisa.Grigore@nxp.com> > Co-developed-by: Ghennadi Procopciuc <ghennadi.procopciuc@nxp.com> > Signed-off-by: Ghennadi Procopciuc <ghennadi.procopciuc@nxp.com> > Co-developed-by: Daniel Lezcano <daniel.lezcano@linaro.org> > Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> > --- > drivers/clocksource/Kconfig | 9 + > drivers/clocksource/Makefile | 2 + > drivers/clocksource/timer-nxp-stm.c | 524 ++++++++++++++++++++++++++++ > 3 files changed, 535 insertions(+) > create mode 100644 drivers/clocksource/timer-nxp-stm.c > > diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig > index 487c85259967..e86e327392af 100644 > --- a/drivers/clocksource/Kconfig > +++ b/drivers/clocksource/Kconfig > @@ -763,4 +763,13 @@ config RALINK_TIMER > Enables support for system tick counter present on > Ralink SoCs RT3352 and MT7620. > > +config NXP_STM_TIMER > + bool "NXP System Timer Module driver" > + depends on ARCH_S32 || COMPILE_TEST > + select CLKSRC_MMIO > + help > + Support for NXP System Timer Module. It will create, in this > + order, a clocksource, a broadcast clockevent and a per cpu > + clockevent. > + > endmenu > diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile > index 43ef16a4efa6..c3a92e6b9f94 100644 > --- a/drivers/clocksource/Makefile > +++ b/drivers/clocksource/Makefile > @@ -92,3 +92,5 @@ obj-$(CONFIG_GXP_TIMER) += timer-gxp.o > obj-$(CONFIG_CLKSRC_LOONGSON1_PWM) += timer-loongson1-pwm.o > obj-$(CONFIG_EP93XX_TIMER) += timer-ep93xx.o > obj-$(CONFIG_RALINK_TIMER) += timer-ralink.o > +obj-$(CONFIG_NXP_STM_TIMER) += timer-nxp-stm.o > + > diff --git a/drivers/clocksource/timer-nxp-stm.c b/drivers/clocksource/timer-nxp-stm.c > new file mode 100644 > index 000000000000..b67e438487ae > --- /dev/null > +++ b/drivers/clocksource/timer-nxp-stm.c > @@ -0,0 +1,524 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Copyright 2016 Freescale Semiconductor, Inc. > + * Copyright 2018,2021-2025 NXP > + * > + * NXP System Timer Module: > + * > + * STM supports commonly required system and application software > + * timing functions. STM includes a 32-bit count-up timer and four > + * 32-bit compare channels with a separate interrupt source for each > + * channel. The timer is driven by the STM module clock divided by an > + * 8-bit prescale value (1 to 256). It has ability to stop the timer > + * in Debug mode > + * > + */ > +#include <linux/clk.h> > +#include <linux/clockchips.h> > +#include <linux/cpuhotplug.h> > +#include <linux/interrupt.h> > +#include <linux/module.h> > +#include <linux/of_address.h> Not needed. > +#include <linux/of_irq.h> > +#include <linux/of_device.h> Not needed. > +#include <linux/platform_device.h> > +#include <linux/sched_clock.h> > + > +/* > + * Each stm has 4 channels which take 0x10 Bytes register space > + */ > +#define STM_CHANNEL(n) (0x10 * ((n) + 1)) > + > +#define STM_CCR 0x00 > +#define STM_CCR_CEN BIT(0) > + > +#define STM_CIR 0x04 > +#define STM_CIR_CIF BIT(0) > + > +#define STM_CMP 0x08 > + > +#define STM_CR 0x00 > +#define STM_CR_TEN BIT(0) > +#define STM_CR_FRZ BIT(1) > +#define STM_CR_CPS_OFFSET 8u > +#define STM_CR_CPS_MASK GENMASK(15, STM_CR_CPS_OFFSET) > +#define STM_CR_CPS(x) (((x) << STM_CR_CPS_OFFSET) & STM_CR_CPS_MASK) STM_CR_CPS(x) seems to be unused. > + > +#define STM_CNT 0x04 > + > +#define STM_ENABLE_MASK (STM_CR_FRZ | STM_CR_TEN) > + > +struct stm_clocksource { > + struct clocksource cs; > + int counter; > +}; > + > +struct stm_clockevent { > + struct clock_event_device ced; > + unsigned long delta; > +}; > + > +struct stm_timer { > + void __iomem *base; > + unsigned long rate; > + union { > + struct stm_clocksource scs; > + struct stm_clockevent sce; > + }; > +}; > + > +static DEFINE_PER_CPU(struct stm_timer *, stm_timers); > + > +static struct stm_timer *stm_sched_clock; > + > +/** > + * struct stm_instances - a set of counter for the STM initialized > + * > + * @clocksource: an integer giving the number of initialized clocksource > + * @clockevent_per_cpu: an integer giving the number of initialized clockevent per cpu > + * @clockevent_broadcast: an integer giving the number of initialized broadcast clockevent > + * @features: a set of flag telling what kind of timer to initialize > + */ > +struct stm_instances { > + int clocksource; > + int clockevent_per_cpu; > + int clockevent_broadcast; > + int features; 'unsigned int' instead of 'int' since none of these fields are expected to contain negative values? > +}; > + > +#define STM_CLKSRC BIT(0) > +#define STM_CLKEVT_PER_CPU BIT(1) > +#define STM_CLKEVT_BROADCAST BIT(2) > + > +static struct stm_clocksource *cs_to_scs(struct clocksource *cs) > +{ > + return container_of(cs, struct stm_clocksource, cs); > +} > + > +static struct stm_clockevent *ced_to_sced(struct clock_event_device *ced) > +{ > + return container_of(ced, struct stm_clockevent, ced); > +} > + > +static struct stm_timer *cs_to_stm(struct clocksource *cs) > +{ > + struct stm_clocksource *scs = cs_to_scs(cs); > + > + return container_of(scs, struct stm_timer, scs); > +} > + > +static struct stm_timer *ced_to_stm(struct clock_event_device *ced) > +{ > + struct stm_clockevent *sce = ced_to_sced(ced); > + > + return container_of(sce, struct stm_timer, sce); > +} > + > +static u64 notrace nxp_stm_read_sched_clock(void) > +{ > + return readl(stm_sched_clock->base + STM_CNT); > +} > + > +static u32 nxp_stm_clocksource_getcnt(struct stm_timer *stm_timer) > +{ > + return readl(stm_timer->base + STM_CNT); > +} > + > +static void nxp_stm_clocksource_setcnt(struct stm_timer *stm_timer, u32 cnt) > +{ > + writel(cnt, stm_timer->base + STM_CNT); > +} > + > +static u64 nxp_stm_clocksource_read(struct clocksource *cs) > +{ > + struct stm_timer *stm_timer = cs_to_stm(cs); > + > + return (u64)nxp_stm_clocksource_getcnt(stm_timer); > +} > + > +static int nxp_stm_clocksource_enable(struct clocksource *cs) > +{ > + struct stm_timer *stm_timer = cs_to_stm(cs); > + u32 reg; > + > + reg = readl(stm_timer->base + STM_CR); > + reg &= ~STM_CR_CPS_MASK; > + reg |= STM_ENABLE_MASK; > + > + writel(reg, stm_timer->base + STM_CR); > + > + return 0; > +} > + > +static void nxp_stm_clocksource_disable(struct clocksource *cs) > +{ > + struct stm_timer *stm_timer = cs_to_stm(cs); > + u32 reg; > + > + reg = readl(stm_timer->base + STM_CR); > + reg &= ~(STM_CR_CPS_MASK | STM_ENABLE_MASK); > + > + writel(reg, stm_timer->base + STM_CR); > +} > + > +static void nxp_stm_clocksource_suspend(struct clocksource *cs) > +{ > + struct stm_timer *stm_timer = cs_to_stm(cs); > + > + nxp_stm_clocksource_disable(cs); > + stm_timer->scs.counter = nxp_stm_clocksource_getcnt(stm_timer); > +} > + > +static void nxp_stm_clocksource_resume(struct clocksource *cs) > +{ > + struct stm_timer *stm_timer = cs_to_stm(cs); > + > + nxp_stm_clocksource_setcnt(stm_timer, stm_timer->scs.counter); > + nxp_stm_clocksource_enable(cs); > +} > + > +static int __init nxp_stm_clocksource_init(struct device *dev, const char *name, > + void __iomem *base, struct clk *clk) > +{ > + struct stm_timer *stm_timer; > + int ret; > + > + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); > + if (!stm_timer) > + return -ENOMEM; > + > + stm_timer->base = base; > + stm_timer->rate = clk_get_rate(clk); > + > + stm_timer->scs.cs.name = name; > + stm_timer->scs.cs.rating = 460; > + stm_timer->scs.cs.read = nxp_stm_clocksource_read; > + stm_timer->scs.cs.enable = nxp_stm_clocksource_enable; > + stm_timer->scs.cs.disable = nxp_stm_clocksource_disable; > + stm_timer->scs.cs.suspend = nxp_stm_clocksource_suspend; > + stm_timer->scs.cs.resume = nxp_stm_clocksource_resume; > + stm_timer->scs.cs.mask = CLOCKSOURCE_MASK(32); > + stm_timer->scs.cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; > + > + ret = clocksource_register_hz(&stm_timer->scs.cs, stm_timer->rate); > + if (ret) > + return ret; clocksource_unregister during remove callback for cleanup? > + > + stm_sched_clock = stm_timer; > + > + sched_clock_register(nxp_stm_read_sched_clock, 32, stm_timer->rate); > + > + dev_set_drvdata(dev, stm_timer); Is this used? > + > + dev_dbg(dev, "Registered clocksource %s\n", name); > + > + return 0; > +} > + > +static int nxp_stm_clockevent_read_counter(struct stm_timer *stm_timer) > +{ > + return readl(stm_timer->base + STM_CNT); > +} > + > +static void nxp_stm_clockevent_disable(struct stm_timer *stm_timer) > +{ > + /* > + * The counter is shared between channels and will continue to > + * be incremented. If STM_CMP value is too small, the next event can > + * be lost if we don't disable the entire module. > + * Disabling the entire module, makes STM not suitable as clocksource. > + */ > + writel(0, stm_timer->base + STM_CR); > + writel(0, stm_timer->base + STM_CHANNEL(0) + STM_CCR);> +} > + > +static void nxp_stm_clockevent_enable(struct stm_timer *stm_timer) > +{ > + u32 reg = readl(stm_timer->base + STM_CR); > + > + reg &= ~STM_CR_CPS_MASK; > + reg |= STM_ENABLE_MASK; > + > + writel(reg, stm_timer->base + STM_CR); > + writel(STM_CCR_CEN, stm_timer->base + STM_CHANNEL(0) + STM_CCR); > +} > + > +static void nxp_stm_clockevent_irq_clr(struct stm_timer *stm_timer) > +{ > + /* Clear the interrupt */ > + writel(STM_CIR_CIF, stm_timer->base + STM_CHANNEL(0) + STM_CIR); > +} > + > +static void nxp_stm_clockevent_irq_ack(struct stm_timer *stm_timer) > +{ > + u32 val; > + > + nxp_stm_clockevent_irq_clr(stm_timer); > + > + /* > + * Update STM_CMP value using the counter value > + */ > + val = nxp_stm_clockevent_read_counter(stm_timer) + stm_timer->sce.delta; > + > + writel(val, stm_timer->base + STM_CHANNEL(0) + STM_CMP); > +} > + > +static irqreturn_t nxp_stm_clockevent_interrupt(int irq, void *dev_id) > +{ > + struct clock_event_device *ced = dev_id; > + struct stm_timer *stm_timer = ced_to_stm(ced); > + > + nxp_stm_clockevent_irq_ack(stm_timer); > + > + /* > + * stm hardware doesn't support oneshot, it will generate an > + * interrupt and start the counter again so software need to > + * disable the timer to stop the counter loop in ONESHOT mode. > + */ > + if (likely(clockevent_state_oneshot(ced))) > + nxp_stm_clockevent_disable(stm_timer); > + > + ced->event_handler(ced); > + > + return IRQ_HANDLED; > +} > + > +static int nxp_stm_clockevent_shutdown(struct clock_event_device *ced) > +{ > + struct stm_timer *stm_timer = ced_to_stm(ced); > + > + nxp_stm_clockevent_disable(stm_timer); > + > + return 0; > +} > + > +static int nxp_stm_clockevent_set_next_event(unsigned long delta, struct clock_event_device *ced) > +{ > + struct stm_timer *stm_timer = ced_to_stm(ced); > + u32 val; > + > + nxp_stm_clockevent_disable(stm_timer); While examining the code base, I came across the drivers/clocksource/timer-imx-gpt.c file, specifically the mx1_2_set_next_event function, which includes a protection against missing events. Using a similar approach would allow us to keep the STM module enabled while only altering the channel's register state. This risk can also be mitigated by adjusting min_delta_ns based on tick frequency. > + > + stm_timer->sce.delta = delta; > + > + val = nxp_stm_clockevent_read_counter(stm_timer) + delta; > + > + writel(val, stm_timer->base + STM_CHANNEL(0) + STM_CMP); > + > + nxp_stm_clockevent_enable(stm_timer); > + > + return 0; > +} > + > +static int nxp_stm_clockevent_set_periodic(struct clock_event_device *ced) > +{ > + struct stm_timer *stm_timer = ced_to_stm(ced); > + > + return nxp_stm_clockevent_set_next_event(stm_timer->rate, ced); > +} > + > +static int __init nxp_stm_clockevent_broadcast_init(struct device *dev, const char *name, void __iomem *base, > + int irq, struct clk *clk) > +{ > + struct stm_timer *stm_timer; > + int ret; > + > + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); > + if (!stm_timer) > + return -ENOMEM; > + > + stm_timer->base = base; > + stm_timer->rate = clk_get_rate(clk); > + > + stm_timer->sce.ced.name = name; > + stm_timer->sce.ced.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; > + stm_timer->sce.ced.set_state_shutdown = nxp_stm_clockevent_shutdown; > + stm_timer->sce.ced.set_state_periodic = nxp_stm_clockevent_set_periodic; > + stm_timer->sce.ced.set_next_event = nxp_stm_clockevent_set_next_event; > + stm_timer->sce.ced.cpumask = cpu_possible_mask; > + stm_timer->sce.ced.rating = 460; > + stm_timer->sce.ced.irq = irq; > + > + nxp_stm_clockevent_irq_clr(stm_timer); > + > + ret = request_irq(irq, nxp_stm_clockevent_interrupt, > + IRQF_TIMER | IRQF_NOBALANCING, name, &stm_timer->sce.ced); > + if (ret) { > + dev_err(dev, "Unable to allocate interrupt line: %d\n", ret); > + return ret; > + } > + > + clockevents_config_and_register(&stm_timer->sce.ced, stm_timer->rate, 1, 0xffffffff); > + > + dev_dbg(dev, "Registered broadcast clockevent %s irq=%d\n", name, irq); > + > + return 0; > +} > + > +static int __init nxp_stm_clockevent_per_cpu_init(struct device *dev, const char *name, void __iomem *base, > + int irq, struct clk *clk, int cpu) > +{ This function duplicates a significant portion of the previous one. To avoid code duplication, it would be beneficial to extract the common part into a dedicated function. > + struct stm_timer *stm_timer; > + int ret; > + > + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); > + if (!stm_timer) > + return -ENOMEM; > + > + stm_timer->base = base; > + stm_timer->rate = clk_get_rate(clk); > + > + stm_timer->sce.ced.name = name; > + stm_timer->sce.ced.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; > + stm_timer->sce.ced.set_state_shutdown = nxp_stm_clockevent_shutdown; > + stm_timer->sce.ced.set_state_periodic = nxp_stm_clockevent_set_periodic; > + stm_timer->sce.ced.set_next_event = nxp_stm_clockevent_set_next_event; > + stm_timer->sce.ced.cpumask = cpumask_of(cpu); > + stm_timer->sce.ced.rating = 460; > + stm_timer->sce.ced.irq = irq; > + > + nxp_stm_clockevent_irq_clr(stm_timer); > + > + ret = request_irq(irq, nxp_stm_clockevent_interrupt, > + IRQF_TIMER | IRQF_NOBALANCING, name, &stm_timer->sce.ced); devm_request_irq instead ? > + if (ret) { > + dev_err(dev, "Unable to allocate interrupt line: %d\n", ret); > + return ret; > + } > + > + per_cpu(stm_timers, cpu) = stm_timer; > + > + dev_dbg(dev, "Initialized per cpu clockevent name=%s, irq=%d, cpu=%d\n", name, irq, cpu); > + > + return 0; > +} > + > +static int nxp_stm_clockevent_starting_cpu(unsigned int cpu) > +{ > + struct stm_timer *stm_timer = per_cpu(stm_timers, cpu); > + int ret; > + > + if (WARN_ON(!stm_timer)) > + return -EFAULT; > + > + ret = irq_force_affinity(stm_timer->sce.ced.irq, cpumask_of(cpu)); > + if (ret) > + return ret; > + > + clockevents_config_and_register(&stm_timer->sce.ced, stm_timer->rate, 1, 0xffffffff); > + > + return 0; > +} > + > +static int __init nxp_stm_timer_probe(struct platform_device *pdev) > +{ > + struct device *dev = &pdev->dev; > + struct device_node *np = dev->of_node; > + struct stm_instances *stm_instances; > + const char *name = of_node_full_name(np); > + void __iomem *base; > + int irq, ret; > + struct clk *clk; > + > + stm_instances = (typeof(stm_instances))of_device_get_match_data(dev); > + if (!stm_instances) { > + dev_err(dev, "No STM instances associated with a cpu"); > + return -EINVAL; > + } > + > + base = devm_of_iomap(dev, np, 0, NULL); > + if (IS_ERR(base)) { > + dev_err(dev, "Failed to iomap %pOFn\n", np); > + return PTR_ERR(base); > + } > + > + irq = irq_of_parse_and_map(np, 0); > + if (irq <= 0) { > + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); > + return -EINVAL; > + } From commit description: > The first probed STM is used as a clocksource, the second will be the > broadcast timer and the rest are used as a clockevent with the > affinity set to a CPU. Why is the interrupt mandatory when the node is probed as a clocksource? > + > + clk = devm_clk_get(dev, NULL); > + if (IS_ERR(clk)) { > + dev_err(dev, "Clock not found\n"); Missing irq_dispose_mapping ? > + return PTR_ERR(clk); > + } > + > + ret = clk_prepare_enable(clk); > + if (ret) { > + dev_err(dev, "Failed to enable STM timer clock: %d\n", ret); > + return ret; > + } devm_clk_get_enabled instead of devm_clk_get + clk_prepare_enable ? > + > + if (!stm_instances->clocksource && (stm_instances->features & STM_CLKSRC)) { > + > + /* > + * First probed STM will be a clocksource > + */ > + ret = nxp_stm_clocksource_init(dev, name, base, clk); > + if (ret) > + return ret; > + stm_instances->clocksource++; > + > + } else if (!stm_instances->clockevent_broadcast && > + (stm_instances->features & STM_CLKEVT_BROADCAST)) { > + > + /* > + * Second probed STM will be a broadcast clockevent > + */ > + ret = nxp_stm_clockevent_broadcast_init(dev, name, base, irq, clk); > + if (ret) > + return ret; > + stm_instances->clockevent_broadcast++; > + > + } else if (stm_instances->clockevent_per_cpu < num_possible_cpus() && > + (stm_instances->features & STM_CLKEVT_PER_CPU)) { > + > + /* > + * Next probed STM will be a per CPU clockevent, until > + * we probe as much as we have CPUs available on the > + * system, we do a partial initialization > + */ > + ret = nxp_stm_clockevent_per_cpu_init(dev, name, base, irq, clk, > + stm_instances->clockevent_per_cpu); > + if (ret) > + return ret; > + > + stm_instances->clockevent_per_cpu++; > + > + /* > + * The number of probed STM for per CPU clockevent is > + * equal to the number of available CPUs on the > + * system. We install the cpu hotplug to finish the > + * initialization by registering the clockevents > + */ > + if (stm_instances->clockevent_per_cpu == num_possible_cpus()) { > + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "STM timer:starting", > + nxp_stm_clockevent_starting_cpu, NULL); > + if (ret < 0) > + return ret; > + } > + } > + > + return 0; > +} > + > +static struct stm_instances s32g_stm_instances = { .features = STM_CLKSRC | STM_CLKEVT_PER_CPU }; > + > +static const struct of_device_id nxp_stm_of_match[] = { > + { .compatible = "nxp,s32g2-stm", &s32g_stm_instances }, > + { } > +}; > +MODULE_DEVICE_TABLE(of, nxp_stm_of_match); > + > +static struct platform_driver nxp_stm_probe = { > + .probe = nxp_stm_timer_probe, > + .driver = { > + .name = "nxp-stm", > + .of_match_table = of_match_ptr(nxp_stm_of_match), > + }, > +}; > +module_platform_driver(nxp_stm_probe); > + > +MODULE_DESCRIPTION("NXP System Timer Module driver"); > +MODULE_LICENSE("GPL");
On 24/03/2025 11:00, Daniel Lezcano wrote: > + > +static int __init nxp_stm_clocksource_init(struct device *dev, const char *name, > + void __iomem *base, struct clk *clk) > +{ > + struct stm_timer *stm_timer; > + int ret; > + > + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); > + if (!stm_timer) > + return -ENOMEM; > + > + stm_timer->base = base; > + stm_timer->rate = clk_get_rate(clk); > + > + stm_timer->scs.cs.name = name; You are aware that all node names will have exactly the same name? All of them will be called "timer"? > + stm_timer->scs.cs.rating = 460; > + stm_timer->scs.cs.read = nxp_stm_clocksource_read; > + stm_timer->scs.cs.enable = nxp_stm_clocksource_enable; > + stm_timer->scs.cs.disable = nxp_stm_clocksource_disable; > + stm_timer->scs.cs.suspend = nxp_stm_clocksource_suspend; > + stm_timer->scs.cs.resume = nxp_stm_clocksource_resume; > + stm_timer->scs.cs.mask = CLOCKSOURCE_MASK(32); > + stm_timer->scs.cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; > + > + ret = clocksource_register_hz(&stm_timer->scs.cs, stm_timer->rate); > + if (ret) > + return ret; > + > + stm_sched_clock = stm_timer; > + > + sched_clock_register(nxp_stm_read_sched_clock, 32, stm_timer->rate); > + > + dev_set_drvdata(dev, stm_timer); > + > + dev_dbg(dev, "Registered clocksource %s\n", name); Since all names will be the same, this makes little sense in debugging. I guess you wanted one of the OF printk-formats for full node name. > + > + return 0; > +} > + > +static int nxp_stm_clockevent_read_counter(struct stm_timer *stm_timer) > +{ > + return readl(stm_timer->base + STM_CNT); > +} > + ... > + > +static int __init nxp_stm_timer_probe(struct platform_device *pdev) > +{ > + struct device *dev = &pdev->dev; > + struct device_node *np = dev->of_node; > + struct stm_instances *stm_instances; > + const char *name = of_node_full_name(np); > + void __iomem *base; > + int irq, ret; > + struct clk *clk; > + > + stm_instances = (typeof(stm_instances))of_device_get_match_data(dev); No, you *cannot* drop the const. It's there on purpose. Match data should be const because it defines per variant differences. That's why the prototype of of_device_get_match_data() has such return type. You just want some global singleton, not match data. > + if (!stm_instances) { > + dev_err(dev, "No STM instances associated with a cpu"); > + return -EINVAL; > + } > + > + base = devm_of_iomap(dev, np, 0, NULL); > + if (IS_ERR(base)) { > + dev_err(dev, "Failed to iomap %pOFn\n", np); You need to clean up the downstream code to match upstream. All of these should be return dev_err_probe(). > + return PTR_ERR(base); > + } > + > + irq = irq_of_parse_and_map(np, 0); > + if (irq <= 0) { > + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); > + return -EINVAL; > + } > + > + clk = devm_clk_get(dev, NULL); > + if (IS_ERR(clk)) { > + dev_err(dev, "Clock not found\n"); And this is clearly incorrect - spamming logs. Syntax is: return dev_err_probe > + return PTR_ERR(clk); > + } > + > + ret = clk_prepare_enable(clk); Drop, devm_clk_get_enabled. > + if (ret) { > + dev_err(dev, "Failed to enable STM timer clock: %d\n", ret); > + return ret; > + } > + > + if (!stm_instances->clocksource && (stm_instances->features & STM_CLKSRC)) { > + > + /* > + * First probed STM will be a clocksource > + */ > + ret = nxp_stm_clocksource_init(dev, name, base, clk); > + if (ret) > + return ret; > + stm_instances->clocksource++; That's racy. Devices can be brought async, ideally. This should be rather idr or probably entire structure protected with a mutex. > + > + } else if (!stm_instances->clockevent_broadcast && > + (stm_instances->features & STM_CLKEVT_BROADCAST)) { > + > + /* > + * Second probed STM will be a broadcast clockevent > + */ > + ret = nxp_stm_clockevent_broadcast_init(dev, name, base, irq, clk); > + if (ret) > + return ret; > + stm_instances->clockevent_broadcast++; > + > + } else if (stm_instances->clockevent_per_cpu < num_possible_cpus() && > + (stm_instances->features & STM_CLKEVT_PER_CPU)) { > + > + /* > + * Next probed STM will be a per CPU clockevent, until > + * we probe as much as we have CPUs available on the > + * system, we do a partial initialization > + */ > + ret = nxp_stm_clockevent_per_cpu_init(dev, name, base, irq, clk, > + stm_instances->clockevent_per_cpu); > + if (ret) > + return ret; > + > + stm_instances->clockevent_per_cpu++; > + > + /* > + * The number of probed STM for per CPU clockevent is > + * equal to the number of available CPUs on the > + * system. We install the cpu hotplug to finish the > + * initialization by registering the clockevents > + */ > + if (stm_instances->clockevent_per_cpu == num_possible_cpus()) { > + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "STM timer:starting", > + nxp_stm_clockevent_starting_cpu, NULL); > + if (ret < 0) > + return ret; > + } > + } > + > + return 0; > +} > + > +static struct stm_instances s32g_stm_instances = { .features = STM_CLKSRC | STM_CLKEVT_PER_CPU }; Missing const. Or misplaced - all file-scope static variables are declared at the top. > + > +static const struct of_device_id nxp_stm_of_match[] = { > + { .compatible = "nxp,s32g2-stm", &s32g_stm_instances }, > + { } > +}; > +MODULE_DEVICE_TABLE(of, nxp_stm_of_match); > + > +static struct platform_driver nxp_stm_probe = { > + .probe = nxp_stm_timer_probe, > + .driver = { > + .name = "nxp-stm", > + .of_match_table = of_match_ptr(nxp_stm_of_match), Drop of_match_ptr, you have here warnings. > + }, > +}; > +module_platform_driver(nxp_stm_probe); > + > +MODULE_DESCRIPTION("NXP System Timer Module driver"); > +MODULE_LICENSE("GPL"); Best regards, Krzysztof
Hi Ghennadi, thanks for the review On 25/03/2025 08:28, Ghennadi Procopciuc wrote: [ ... ] >> +static int __init nxp_stm_clocksource_init(struct device *dev, const char *name, >> + void __iomem *base, struct clk *clk) >> +{ >> + struct stm_timer *stm_timer; >> + int ret; >> + >> + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); >> + if (!stm_timer) >> + return -ENOMEM; >> + >> + stm_timer->base = base; >> + stm_timer->rate = clk_get_rate(clk); >> + >> + stm_timer->scs.cs.name = name; >> + stm_timer->scs.cs.rating = 460; >> + stm_timer->scs.cs.read = nxp_stm_clocksource_read; >> + stm_timer->scs.cs.enable = nxp_stm_clocksource_enable; >> + stm_timer->scs.cs.disable = nxp_stm_clocksource_disable; >> + stm_timer->scs.cs.suspend = nxp_stm_clocksource_suspend; >> + stm_timer->scs.cs.resume = nxp_stm_clocksource_resume; >> + stm_timer->scs.cs.mask = CLOCKSOURCE_MASK(32); >> + stm_timer->scs.cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; >> + >> + ret = clocksource_register_hz(&stm_timer->scs.cs, stm_timer->rate); >> + if (ret) >> + return ret; > > clocksource_unregister during remove callback for cleanup? Sorry I don't get it :/ There is no cleanup after the clocksource_register_hz() is successful >> + >> + stm_sched_clock = stm_timer; >> + >> + sched_clock_register(nxp_stm_read_sched_clock, 32, stm_timer->rate); >> + >> + dev_set_drvdata(dev, stm_timer); > > Is this used? Nope, I'll remove it. >> + >> + dev_dbg(dev, "Registered clocksource %s\n", name); >> + >> + return 0; >> +} [ ... ] >> +static int nxp_stm_clockevent_set_next_event(unsigned long delta, struct clock_event_device *ced) >> +{ >> + struct stm_timer *stm_timer = ced_to_stm(ced); >> + u32 val; >> + >> + nxp_stm_clockevent_disable(stm_timer); > > While examining the code base, I came across the > drivers/clocksource/timer-imx-gpt.c file, specifically the > mx1_2_set_next_event function, which includes a protection against > missing events. Using a similar approach would allow us to keep the STM > module enabled while only altering the channel's register state. This > risk can also be mitigated by adjusting min_delta_ns based on tick > frequency. How would you validate that ? >> + stm_timer->sce.delta = delta; >> + >> + val = nxp_stm_clockevent_read_counter(stm_timer) + delta; >> + >> + writel(val, stm_timer->base + STM_CHANNEL(0) + STM_CMP); >> + >> + nxp_stm_clockevent_enable(stm_timer); >> + >> + return 0; >> +} >> + >> +static int nxp_stm_clockevent_set_periodic(struct clock_event_device *ced) >> +{ >> + struct stm_timer *stm_timer = ced_to_stm(ced); >> + >> + return nxp_stm_clockevent_set_next_event(stm_timer->rate, ced); >> +} >> + >> +static int __init nxp_stm_clockevent_broadcast_init(struct device *dev, const char *name, void __iomem *base, >> + int irq, struct clk *clk) >> +{ >> + struct stm_timer *stm_timer; >> + int ret; >> + >> + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); >> + if (!stm_timer) >> + return -ENOMEM; >> + >> + stm_timer->base = base; >> + stm_timer->rate = clk_get_rate(clk); >> + >> + stm_timer->sce.ced.name = name; >> + stm_timer->sce.ced.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; >> + stm_timer->sce.ced.set_state_shutdown = nxp_stm_clockevent_shutdown; >> + stm_timer->sce.ced.set_state_periodic = nxp_stm_clockevent_set_periodic; >> + stm_timer->sce.ced.set_next_event = nxp_stm_clockevent_set_next_event; >> + stm_timer->sce.ced.cpumask = cpu_possible_mask; >> + stm_timer->sce.ced.rating = 460; >> + stm_timer->sce.ced.irq = irq; >> + >> + nxp_stm_clockevent_irq_clr(stm_timer); >> + >> + ret = request_irq(irq, nxp_stm_clockevent_interrupt, >> + IRQF_TIMER | IRQF_NOBALANCING, name, &stm_timer->sce.ced); >> + if (ret) { >> + dev_err(dev, "Unable to allocate interrupt line: %d\n", ret); >> + return ret; >> + } >> + >> + clockevents_config_and_register(&stm_timer->sce.ced, stm_timer->rate, 1, 0xffffffff); >> + >> + dev_dbg(dev, "Registered broadcast clockevent %s irq=%d\n", name, irq); >> + >> + return 0; >> +} >> + >> +static int __init nxp_stm_clockevent_per_cpu_init(struct device *dev, const char *name, void __iomem *base, >> + int irq, struct clk *clk, int cpu) >> +{ > > This function duplicates a significant portion of the previous one. To > avoid code duplication, it would be beneficial to extract the common > part into a dedicated function. Sure >> + struct stm_timer *stm_timer; >> + int ret; >> + >> + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); >> + if (!stm_timer) >> + return -ENOMEM; >> + >> + stm_timer->base = base; >> + stm_timer->rate = clk_get_rate(clk); >> + >> + stm_timer->sce.ced.name = name; >> + stm_timer->sce.ced.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; >> + stm_timer->sce.ced.set_state_shutdown = nxp_stm_clockevent_shutdown; >> + stm_timer->sce.ced.set_state_periodic = nxp_stm_clockevent_set_periodic; >> + stm_timer->sce.ced.set_next_event = nxp_stm_clockevent_set_next_event; >> + stm_timer->sce.ced.cpumask = cpumask_of(cpu); >> + stm_timer->sce.ced.rating = 460; >> + stm_timer->sce.ced.irq = irq; >> + >> + nxp_stm_clockevent_irq_clr(stm_timer); >> + >> + ret = request_irq(irq, nxp_stm_clockevent_interrupt, >> + IRQF_TIMER | IRQF_NOBALANCING, name, &stm_timer->sce.ced); > > devm_request_irq instead ? Yes >> + if (ret) { >> + dev_err(dev, "Unable to allocate interrupt line: %d\n", ret); >> + return ret; >> + } >> + >> + per_cpu(stm_timers, cpu) = stm_timer; >> + >> + dev_dbg(dev, "Initialized per cpu clockevent name=%s, irq=%d, cpu=%d\n", name, irq, cpu); >> + >> + return 0; >> +} >> + >> +static int nxp_stm_clockevent_starting_cpu(unsigned int cpu) >> +{ >> + struct stm_timer *stm_timer = per_cpu(stm_timers, cpu); >> + int ret; >> + >> + if (WARN_ON(!stm_timer)) >> + return -EFAULT; >> + >> + ret = irq_force_affinity(stm_timer->sce.ced.irq, cpumask_of(cpu)); >> + if (ret) >> + return ret; >> + >> + clockevents_config_and_register(&stm_timer->sce.ced, stm_timer->rate, 1, 0xffffffff); >> + >> + return 0; >> +} >> + >> +static int __init nxp_stm_timer_probe(struct platform_device *pdev) >> +{ >> + struct device *dev = &pdev->dev; >> + struct device_node *np = dev->of_node; >> + struct stm_instances *stm_instances; >> + const char *name = of_node_full_name(np); >> + void __iomem *base; >> + int irq, ret; >> + struct clk *clk; >> + >> + stm_instances = (typeof(stm_instances))of_device_get_match_data(dev); >> + if (!stm_instances) { >> + dev_err(dev, "No STM instances associated with a cpu"); >> + return -EINVAL; >> + } >> + >> + base = devm_of_iomap(dev, np, 0, NULL); >> + if (IS_ERR(base)) { >> + dev_err(dev, "Failed to iomap %pOFn\n", np); >> + return PTR_ERR(base); >> + } >> + >> + irq = irq_of_parse_and_map(np, 0); >> + if (irq <= 0) { >> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >> + return -EINVAL; >> + } > > From commit description: > >> The first probed STM is used as a clocksource, the second will be the >> broadcast timer and the rest are used as a clockevent with the >> affinity set to a CPU. > > Why is the interrupt mandatory when the node is probed as a clocksource? It relies on the DT description and it does not hurt to have a consistent code path for clockevent / clocksource even if the interrupt is not requested for the clocksource later. >> + >> + clk = devm_clk_get(dev, NULL); >> + if (IS_ERR(clk)) { >> + dev_err(dev, "Clock not found\n"); > > Missing irq_dispose_mapping ? Yeah ... >> + return PTR_ERR(clk); >> + } >> + >> + ret = clk_prepare_enable(clk); >> + if (ret) { >> + dev_err(dev, "Failed to enable STM timer clock: %d\n", ret); >> + return ret; >> + } > > devm_clk_get_enabled instead of devm_clk_get + clk_prepare_enable ? Yes, good point >> + >> + if (!stm_instances->clocksource && (stm_instances->features & STM_CLKSRC)) { >> + >> + /* >> + * First probed STM will be a clocksource >> + */ >> + ret = nxp_stm_clocksource_init(dev, name, base, clk); >> + if (ret) >> + return ret; >> + stm_instances->clocksource++; >> + >> + } else if (!stm_instances->clockevent_broadcast && >> + (stm_instances->features & STM_CLKEVT_BROADCAST)) { >> + >> + /* >> + * Second probed STM will be a broadcast clockevent >> + */ >> + ret = nxp_stm_clockevent_broadcast_init(dev, name, base, irq, clk); >> + if (ret) >> + return ret; >> + stm_instances->clockevent_broadcast++; >> + >> + } else if (stm_instances->clockevent_per_cpu < num_possible_cpus() && >> + (stm_instances->features & STM_CLKEVT_PER_CPU)) { >> + >> + /* >> + * Next probed STM will be a per CPU clockevent, until >> + * we probe as much as we have CPUs available on the >> + * system, we do a partial initialization >> + */ >> + ret = nxp_stm_clockevent_per_cpu_init(dev, name, base, irq, clk, >> + stm_instances->clockevent_per_cpu); >> + if (ret) >> + return ret; >> + >> + stm_instances->clockevent_per_cpu++; >> + >> + /* >> + * The number of probed STM for per CPU clockevent is >> + * equal to the number of available CPUs on the >> + * system. We install the cpu hotplug to finish the >> + * initialization by registering the clockevents >> + */ >> + if (stm_instances->clockevent_per_cpu == num_possible_cpus()) { >> + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "STM timer:starting", >> + nxp_stm_clockevent_starting_cpu, NULL); >> + if (ret < 0) >> + return ret; >> + } >> + } >> + >> + return 0; >> +} >> + >> +static struct stm_instances s32g_stm_instances = { .features = STM_CLKSRC | STM_CLKEVT_PER_CPU }; >> + >> +static const struct of_device_id nxp_stm_of_match[] = { >> + { .compatible = "nxp,s32g2-stm", &s32g_stm_instances }, >> + { } >> +}; >> +MODULE_DEVICE_TABLE(of, nxp_stm_of_match); >> + >> +static struct platform_driver nxp_stm_probe = { >> + .probe = nxp_stm_timer_probe, >> + .driver = { >> + .name = "nxp-stm", >> + .of_match_table = of_match_ptr(nxp_stm_of_match), >> + }, >> +}; >> +module_platform_driver(nxp_stm_probe); >> + >> +MODULE_DESCRIPTION("NXP System Timer Module driver"); >> +MODULE_LICENSE("GPL"); >
On 3/25/2025 12:53 PM, Daniel Lezcano wrote: [ ... ] >>> +static int __init nxp_stm_clocksource_init(struct device *dev, const >>> char *name, >>> + void __iomem *base, struct clk *clk) >>> +{ >>> + struct stm_timer *stm_timer; >>> + int ret; >>> + >>> + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); >>> + if (!stm_timer) >>> + return -ENOMEM; >>> + >>> + stm_timer->base = base; >>> + stm_timer->rate = clk_get_rate(clk); >>> + >>> + stm_timer->scs.cs.name = name; >>> + stm_timer->scs.cs.rating = 460; >>> + stm_timer->scs.cs.read = nxp_stm_clocksource_read; >>> + stm_timer->scs.cs.enable = nxp_stm_clocksource_enable; >>> + stm_timer->scs.cs.disable = nxp_stm_clocksource_disable; >>> + stm_timer->scs.cs.suspend = nxp_stm_clocksource_suspend; >>> + stm_timer->scs.cs.resume = nxp_stm_clocksource_resume; >>> + stm_timer->scs.cs.mask = CLOCKSOURCE_MASK(32); >>> + stm_timer->scs.cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; >>> + >>> + ret = clocksource_register_hz(&stm_timer->scs.cs, stm_timer->rate); >>> + if (ret) >>> + return ret; >> >> clocksource_unregister during remove callback for cleanup? > > Sorry I don't get it :/ > > There is no cleanup after the clocksource_register_hz() is successful > I noticed that other drivers, such as drivers/clocksource/timer-tegra186.c and drivers/clocksource/timer-sun5i.c, perform clocksource_unregister during their platform_driver.remove callback. Shouldn't this apply here as well? [ ... ] > >>> +static int nxp_stm_clockevent_set_next_event(unsigned long delta, >>> struct clock_event_device *ced) >>> +{ >>> + struct stm_timer *stm_timer = ced_to_stm(ced); >>> + u32 val; >>> + >>> + nxp_stm_clockevent_disable(stm_timer); >> >> While examining the code base, I came across the >> drivers/clocksource/timer-imx-gpt.c file, specifically the >> mx1_2_set_next_event function, which includes a protection against >> missing events. Using a similar approach would allow us to keep the STM >> module enabled while only altering the channel's register state. This >> risk can also be mitigated by adjusting min_delta_ns based on tick >> frequency. > > How would you validate that ? > How would I validate that this works? If this is the question, I see that the core performs an auto adjustment of the minimum delta as part of the clockevents_program_min_delta function when CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST is enabled. Initially, I would examine how many times dev->set_next_event() returns -ETIME. At the end of the function, min_delta_ns should reflect the actual minimum delta the device can handle. [ ... ] >>> +static int __init nxp_stm_timer_probe(struct platform_device *pdev) >>> +{ >>> + struct device *dev = &pdev->dev; >>> + struct device_node *np = dev->of_node; >>> + struct stm_instances *stm_instances; >>> + const char *name = of_node_full_name(np); >>> + void __iomem *base; >>> + int irq, ret; >>> + struct clk *clk; >>> + >>> + stm_instances = >>> (typeof(stm_instances))of_device_get_match_data(dev); >>> + if (!stm_instances) { >>> + dev_err(dev, "No STM instances associated with a cpu"); >>> + return -EINVAL; >>> + } >>> + >>> + base = devm_of_iomap(dev, np, 0, NULL); >>> + if (IS_ERR(base)) { >>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >>> + return PTR_ERR(base); >>> + } >>> + >>> + irq = irq_of_parse_and_map(np, 0); >>> + if (irq <= 0) { >>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >>> + return -EINVAL; >>> + } >> >> From commit description: >> >>> The first probed STM is used as a clocksource, the second will be the >>> broadcast timer and the rest are used as a clockevent with the >>> affinity set to a CPU. >> >> Why is the interrupt mandatory when the node is probed as a clocksource? > > It relies on the DT description and it does not hurt to have a > consistent code path for clockevent / clocksource even if the interrupt > is not requested for the clocksource later. > If so, in my opinion, it would make sense to use the same STM instance for both the clocksource and broadcast clockevent, as both functions can be accommodated within a single STM instance, which will help reduce the number of STM instances used.
On 25/03/2025 12:40, Ghennadi Procopciuc wrote: > On 3/25/2025 12:53 PM, Daniel Lezcano wrote: > [ ... ] >>>> +static int __init nxp_stm_clocksource_init(struct device *dev, const >>>> char *name, >>>> + void __iomem *base, struct clk *clk) >>>> +{ >>>> + struct stm_timer *stm_timer; >>>> + int ret; >>>> + >>>> + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); >>>> + if (!stm_timer) >>>> + return -ENOMEM; >>>> + >>>> + stm_timer->base = base; >>>> + stm_timer->rate = clk_get_rate(clk); >>>> + >>>> + stm_timer->scs.cs.name = name; >>>> + stm_timer->scs.cs.rating = 460; >>>> + stm_timer->scs.cs.read = nxp_stm_clocksource_read; >>>> + stm_timer->scs.cs.enable = nxp_stm_clocksource_enable; >>>> + stm_timer->scs.cs.disable = nxp_stm_clocksource_disable; >>>> + stm_timer->scs.cs.suspend = nxp_stm_clocksource_suspend; >>>> + stm_timer->scs.cs.resume = nxp_stm_clocksource_resume; >>>> + stm_timer->scs.cs.mask = CLOCKSOURCE_MASK(32); >>>> + stm_timer->scs.cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; >>>> + >>>> + ret = clocksource_register_hz(&stm_timer->scs.cs, stm_timer->rate); >>>> + if (ret) >>>> + return ret; >>> >>> clocksource_unregister during remove callback for cleanup? >> >> Sorry I don't get it :/ >> >> There is no cleanup after the clocksource_register_hz() is successful >> > > I noticed that other drivers, such as > drivers/clocksource/timer-tegra186.c and > drivers/clocksource/timer-sun5i.c, perform clocksource_unregister during > their platform_driver.remove callback. Shouldn't this apply here as well? The tegra186 registers with one probe several timers and clocksources. The timer-nxp probes for each node. The timer-sun5i.c has the remove callback but it is pointless as it can not be compiled as module. So it should not have it. > [ ... ] >> >>>> +static int nxp_stm_clockevent_set_next_event(unsigned long delta, >>>> struct clock_event_device *ced) >>>> +{ >>>> + struct stm_timer *stm_timer = ced_to_stm(ced); >>>> + u32 val; >>>> + >>>> + nxp_stm_clockevent_disable(stm_timer); >>> >>> While examining the code base, I came across the >>> drivers/clocksource/timer-imx-gpt.c file, specifically the >>> mx1_2_set_next_event function, which includes a protection against >>> missing events. Using a similar approach would allow us to keep the STM >>> module enabled while only altering the channel's register state. This >>> risk can also be mitigated by adjusting min_delta_ns based on tick >>> frequency. >> >> How would you validate that ? >> > > How would I validate that this works? > > If this is the question, I see that the core performs an auto adjustment > of the minimum delta as part of the clockevents_program_min_delta > function when CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST is enabled. > Initially, I would examine how many times dev->set_next_event() returns > -ETIME. At the end of the function, min_delta_ns should reflect the > actual minimum delta the device can handle. That is an area of optimization and I would prefer to keep that as a separate change focused on validating this approach. > [ ... ] >>>> +static int __init nxp_stm_timer_probe(struct platform_device *pdev) >>>> +{ >>>> + struct device *dev = &pdev->dev; >>>> + struct device_node *np = dev->of_node; >>>> + struct stm_instances *stm_instances; >>>> + const char *name = of_node_full_name(np); >>>> + void __iomem *base; >>>> + int irq, ret; >>>> + struct clk *clk; >>>> + >>>> + stm_instances = >>>> (typeof(stm_instances))of_device_get_match_data(dev); >>>> + if (!stm_instances) { >>>> + dev_err(dev, "No STM instances associated with a cpu"); >>>> + return -EINVAL; >>>> + } >>>> + >>>> + base = devm_of_iomap(dev, np, 0, NULL); >>>> + if (IS_ERR(base)) { >>>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >>>> + return PTR_ERR(base); >>>> + } >>>> + >>>> + irq = irq_of_parse_and_map(np, 0); >>>> + if (irq <= 0) { >>>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >>>> + return -EINVAL; >>>> + } >>> >>> From commit description: >>> >>>> The first probed STM is used as a clocksource, the second will be the >>>> broadcast timer and the rest are used as a clockevent with the >>>> affinity set to a CPU. >>> >>> Why is the interrupt mandatory when the node is probed as a clocksource? >> >> It relies on the DT description and it does not hurt to have a >> consistent code path for clockevent / clocksource even if the interrupt >> is not requested for the clocksource later. >> > > If so, in my opinion, it would make sense to use the same STM instance > for both the clocksource and broadcast clockevent, as both functions can > be accommodated within a single STM instance, which will help reduce the > number of STM instances used. The broadcast timer is stopped when it is unused which is the case for the s32 because there are no idle state powering down the local timers. They have to have be separated.
On 3/25/2025 2:09 PM, Daniel Lezcano wrote: > On 25/03/2025 12:40, Ghennadi Procopciuc wrote: >> On 3/25/2025 12:53 PM, Daniel Lezcano wrote: >> [ ... ] >>>>> +static int __init nxp_stm_clocksource_init(struct device *dev, const >>>>> char *name, >>>>> + void __iomem *base, struct clk *clk) >>>>> +{ >>>>> + struct stm_timer *stm_timer; >>>>> + int ret; >>>>> + >>>>> + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); >>>>> + if (!stm_timer) >>>>> + return -ENOMEM; >>>>> + >>>>> + stm_timer->base = base; >>>>> + stm_timer->rate = clk_get_rate(clk); >>>>> + >>>>> + stm_timer->scs.cs.name = name; >>>>> + stm_timer->scs.cs.rating = 460; >>>>> + stm_timer->scs.cs.read = nxp_stm_clocksource_read; >>>>> + stm_timer->scs.cs.enable = nxp_stm_clocksource_enable; >>>>> + stm_timer->scs.cs.disable = nxp_stm_clocksource_disable; >>>>> + stm_timer->scs.cs.suspend = nxp_stm_clocksource_suspend; >>>>> + stm_timer->scs.cs.resume = nxp_stm_clocksource_resume; >>>>> + stm_timer->scs.cs.mask = CLOCKSOURCE_MASK(32); >>>>> + stm_timer->scs.cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; >>>>> + >>>>> + ret = clocksource_register_hz(&stm_timer->scs.cs, >>>>> stm_timer->rate); >>>>> + if (ret) >>>>> + return ret; >>>> >>>> clocksource_unregister during remove callback for cleanup? >>> >>> Sorry I don't get it :/ >>> >>> There is no cleanup after the clocksource_register_hz() is successful >>> >> >> I noticed that other drivers, such as >> drivers/clocksource/timer-tegra186.c and >> drivers/clocksource/timer-sun5i.c, perform clocksource_unregister during >> their platform_driver.remove callback. Shouldn't this apply here as well? > > The tegra186 registers with one probe several timers and clocksources. > The timer-nxp probes for each node. > > The timer-sun5i.c has the remove callback but it is pointless as it can > not be compiled as module. So it should not have it. > Ok. >> [ ... ] >>> >>>>> +static int nxp_stm_clockevent_set_next_event(unsigned long delta, >>>>> struct clock_event_device *ced) >>>>> +{ >>>>> + struct stm_timer *stm_timer = ced_to_stm(ced); >>>>> + u32 val; >>>>> + >>>>> + nxp_stm_clockevent_disable(stm_timer); >>>> >>>> While examining the code base, I came across the >>>> drivers/clocksource/timer-imx-gpt.c file, specifically the >>>> mx1_2_set_next_event function, which includes a protection against >>>> missing events. Using a similar approach would allow us to keep the STM >>>> module enabled while only altering the channel's register state. This >>>> risk can also be mitigated by adjusting min_delta_ns based on tick >>>> frequency. >>> >>> How would you validate that ? >>> >> >> How would I validate that this works? >> >> If this is the question, I see that the core performs an auto adjustment >> of the minimum delta as part of the clockevents_program_min_delta >> function when CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST is enabled. >> Initially, I would examine how many times dev->set_next_event() returns >> -ETIME. At the end of the function, min_delta_ns should reflect the >> actual minimum delta the device can handle. > > That is an area of optimization and I would prefer to keep that as a > separate change focused on validating this approach. > This suggestion supports the argument presented below. >> [ ... ] >>>>> +static int __init nxp_stm_timer_probe(struct platform_device *pdev) >>>>> +{ >>>>> + struct device *dev = &pdev->dev; >>>>> + struct device_node *np = dev->of_node; >>>>> + struct stm_instances *stm_instances; >>>>> + const char *name = of_node_full_name(np); >>>>> + void __iomem *base; >>>>> + int irq, ret; >>>>> + struct clk *clk; >>>>> + >>>>> + stm_instances = >>>>> (typeof(stm_instances))of_device_get_match_data(dev); >>>>> + if (!stm_instances) { >>>>> + dev_err(dev, "No STM instances associated with a cpu"); >>>>> + return -EINVAL; >>>>> + } >>>>> + >>>>> + base = devm_of_iomap(dev, np, 0, NULL); >>>>> + if (IS_ERR(base)) { >>>>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >>>>> + return PTR_ERR(base); >>>>> + } >>>>> + >>>>> + irq = irq_of_parse_and_map(np, 0); >>>>> + if (irq <= 0) { >>>>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >>>>> + return -EINVAL; >>>>> + } >>>> >>>> From commit description: >>>> >>>>> The first probed STM is used as a clocksource, the second will be the >>>>> broadcast timer and the rest are used as a clockevent with the >>>>> affinity set to a CPU. >>>> >>>> Why is the interrupt mandatory when the node is probed as a >>>> clocksource? >>> >>> It relies on the DT description and it does not hurt to have a >>> consistent code path for clockevent / clocksource even if the interrupt >>> is not requested for the clocksource later. >>> >> >> If so, in my opinion, it would make sense to use the same STM instance >> for both the clocksource and broadcast clockevent, as both functions can >> be accommodated within a single STM instance, which will help reduce the >> number of STM instances used. > > The broadcast timer is stopped when it is unused which is the case for > the s32 because there are no idle state powering down the local timers. > They have to have be separated. > This wouldn't be the case if the STM is kept running/counting during the clock event setup, with only the clock event interrupt being disabled (CCR.CEN).
[Added s32@ list which I miss from the initial series] On 25/03/2025 08:30, Krzysztof Kozlowski wrote: > On 24/03/2025 11:00, Daniel Lezcano wrote: >> + >> +static int __init nxp_stm_clocksource_init(struct device *dev, const char *name, >> + void __iomem *base, struct clk *clk) >> +{ >> + struct stm_timer *stm_timer; >> + int ret; >> + >> + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); >> + if (!stm_timer) >> + return -ENOMEM; >> + >> + stm_timer->base = base; >> + stm_timer->rate = clk_get_rate(clk); >> + >> + stm_timer->scs.cs.name = name; > > You are aware that all node names will have exactly the same name? All > of them will be called "timer"? From the caller const char *name = of_node_full_name(np); The names are: "clocksource: Switched to clocksource stm@4011c000" Or: 17: 24150 0 0 0 GICv3 57 Level stm@40120000 18: 0 22680 0 0 GICv3 58 Level stm@40124000 19: 0 0 24110 0 GICv3 59 Level stm@40128000 20: 0 0 0 21164 GICv3 60 Level stm@4021c000 And: cat /sys/devices/system/clocksource/clocksource0/current_clocksource stm@4011c000 cat /sys/devices/system/clockevents/clockevent*/current_device stm@40120000 stm@40124000 stm@40128000 stm@4021c000 [ ... ] >> + >> +static int __init nxp_stm_timer_probe(struct platform_device *pdev) >> +{ >> + struct device *dev = &pdev->dev; >> + struct device_node *np = dev->of_node; >> + struct stm_instances *stm_instances; >> + const char *name = of_node_full_name(np); >> + void __iomem *base; >> + int irq, ret; >> + struct clk *clk; >> + >> + stm_instances = (typeof(stm_instances))of_device_get_match_data(dev); > > No, you *cannot* drop the const. It's there on purpose. Match data > should be const because it defines per variant differences. That's why > the prototype of of_device_get_match_data() has such return type. > You just want some global singleton, not match data. > >> + if (!stm_instances) { >> + dev_err(dev, "No STM instances associated with a cpu"); >> + return -EINVAL; >> + } >> + >> + base = devm_of_iomap(dev, np, 0, NULL); >> + if (IS_ERR(base)) { >> + dev_err(dev, "Failed to iomap %pOFn\n", np); > > You need to clean up the downstream code to match upstream. All of these > should be return dev_err_probe(). Oh right, thanks for the reminder >> + return PTR_ERR(base); >> + } >> + >> + irq = irq_of_parse_and_map(np, 0); >> + if (irq <= 0) { >> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >> + return -EINVAL; >> + } >> + >> + clk = devm_clk_get(dev, NULL); >> + if (IS_ERR(clk)) { >> + dev_err(dev, "Clock not found\n"); > > And this is clearly incorrect - spamming logs. Syntax is: > return dev_err_probe > >> + return PTR_ERR(clk); >> + } >> + >> + ret = clk_prepare_enable(clk); > > Drop, devm_clk_get_enabled. > >> + if (ret) { >> + dev_err(dev, "Failed to enable STM timer clock: %d\n", ret); >> + return ret; >> + } >> + >> + if (!stm_instances->clocksource && (stm_instances->features & STM_CLKSRC)) { >> + >> + /* >> + * First probed STM will be a clocksource >> + */ >> + ret = nxp_stm_clocksource_init(dev, name, base, clk); >> + if (ret) >> + return ret; >> + stm_instances->clocksource++; > > That's racy. Devices can be brought async, ideally. This should be > rather idr or probably entire structure protected with a mutex. Mmh, interesting. I never had to think about this problem before Do you know at what moment the probing is parallelized ? >> +static struct stm_instances s32g_stm_instances = { .features = STM_CLKSRC | STM_CLKEVT_PER_CPU }; > > Missing const. Or misplaced - all file-scope static variables are > declared at the top. > >> + >> +static const struct of_device_id nxp_stm_of_match[] = { >> + { .compatible = "nxp,s32g2-stm", &s32g_stm_instances }, >> + { } >> +}; >> +MODULE_DEVICE_TABLE(of, nxp_stm_of_match); >> + >> +static struct platform_driver nxp_stm_probe = { >> + .probe = nxp_stm_timer_probe, >> + .driver = { >> + .name = "nxp-stm", >> + .of_match_table = of_match_ptr(nxp_stm_of_match), > > Drop of_match_ptr, you have here warnings. > >> + }, >> +}; >> +module_platform_driver(nxp_stm_probe); >> + >> +MODULE_DESCRIPTION("NXP System Timer Module driver"); >> +MODULE_LICENSE("GPL"); Thanks for the review -- Daniel
On 25/03/2025 13:23, Daniel Lezcano wrote: > > [Added s32@ list which I miss from the initial series] > > On 25/03/2025 08:30, Krzysztof Kozlowski wrote: >> On 24/03/2025 11:00, Daniel Lezcano wrote: >>> + >>> +static int __init nxp_stm_clocksource_init(struct device *dev, const char *name, >>> + void __iomem *base, struct clk *clk) >>> +{ >>> + struct stm_timer *stm_timer; >>> + int ret; >>> + >>> + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); >>> + if (!stm_timer) >>> + return -ENOMEM; >>> + >>> + stm_timer->base = base; >>> + stm_timer->rate = clk_get_rate(clk); >>> + >>> + stm_timer->scs.cs.name = name; >> >> You are aware that all node names will have exactly the same name? All >> of them will be called "timer"? > > From the caller const char *name = of_node_full_name(np); Ah, right, it's the full name. > > The names are: > > "clocksource: Switched to clocksource stm@4011c000" > > Or: > > 17: 24150 0 0 0 GICv3 57 Level > stm@40120000 This all will be timer@, but anyway I get your point. > 18: 0 22680 0 0 GICv3 58 Level > stm@40124000 > 19: 0 0 24110 0 GICv3 59 Level > stm@40128000 > 20: 0 0 0 21164 GICv3 60 Level > stm@4021c000 > > And: > > cat /sys/devices/system/clocksource/clocksource0/current_clocksource > stm@4011c000 > > cat /sys/devices/system/clockevents/clockevent*/current_device > stm@40120000 > stm@40124000 > stm@40128000 > stm@4021c000 Ack > > [ ... ] > >>> + >>> +static int __init nxp_stm_timer_probe(struct platform_device *pdev) >>> +{ >>> + struct device *dev = &pdev->dev; >>> + struct device_node *np = dev->of_node; >>> + struct stm_instances *stm_instances; >>> + const char *name = of_node_full_name(np); >>> + void __iomem *base; >>> + int irq, ret; >>> + struct clk *clk; >>> + >>> + stm_instances = (typeof(stm_instances))of_device_get_match_data(dev); >> >> No, you *cannot* drop the const. It's there on purpose. Match data >> should be const because it defines per variant differences. That's why >> the prototype of of_device_get_match_data() has such return type. >> You just want some global singleton, not match data. >> >>> + if (!stm_instances) { >>> + dev_err(dev, "No STM instances associated with a cpu"); >>> + return -EINVAL; >>> + } >>> + >>> + base = devm_of_iomap(dev, np, 0, NULL); >>> + if (IS_ERR(base)) { >>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >> >> You need to clean up the downstream code to match upstream. All of these >> should be return dev_err_probe(). > > Oh right, thanks for the reminder > >>> + return PTR_ERR(base); >>> + } >>> + >>> + irq = irq_of_parse_and_map(np, 0); >>> + if (irq <= 0) { >>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >>> + return -EINVAL; >>> + } >>> + >>> + clk = devm_clk_get(dev, NULL); >>> + if (IS_ERR(clk)) { >>> + dev_err(dev, "Clock not found\n"); >> >> And this is clearly incorrect - spamming logs. Syntax is: >> return dev_err_probe >> >>> + return PTR_ERR(clk); >>> + } >>> + >>> + ret = clk_prepare_enable(clk); >> >> Drop, devm_clk_get_enabled. >> >>> + if (ret) { >>> + dev_err(dev, "Failed to enable STM timer clock: %d\n", ret); >>> + return ret; >>> + } >>> + >>> + if (!stm_instances->clocksource && (stm_instances->features & STM_CLKSRC)) { >>> + >>> + /* >>> + * First probed STM will be a clocksource >>> + */ >>> + ret = nxp_stm_clocksource_init(dev, name, base, clk); >>> + if (ret) >>> + return ret; >>> + stm_instances->clocksource++; >> >> That's racy. Devices can be brought async, ideally. This should be >> rather idr or probably entire structure protected with a mutex. > > Mmh, interesting. I never had to think about this problem before > > Do you know at what moment the probing is parallelized ? You don't have PROBE_PREFER_ASYNCHRONOUS, so currently this will be still sync, but I don't think we want it to be that way forever. I think new drivers should not rely on implicit sync, because converting it later to async will be difficult. It's easier to design it now or even choose async explicitly (after testing). BTW, PREEMPT_RT and all fast-boot-use-cases would be only happier with probe being async. Best regards, Krzysztof
On 25/03/2025 13:21, Ghennadi Procopciuc wrote: > On 3/25/2025 2:09 PM, Daniel Lezcano wrote: >> On 25/03/2025 12:40, Ghennadi Procopciuc wrote: >>> On 3/25/2025 12:53 PM, Daniel Lezcano wrote: >>> [ ... ] >>>>>> +static int __init nxp_stm_clocksource_init(struct device *dev, const >>>>>> char *name, >>>>>> + void __iomem *base, struct clk *clk) >>>>>> +{ >>>>>> + struct stm_timer *stm_timer; >>>>>> + int ret; >>>>>> + >>>>>> + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); >>>>>> + if (!stm_timer) >>>>>> + return -ENOMEM; >>>>>> + >>>>>> + stm_timer->base = base; >>>>>> + stm_timer->rate = clk_get_rate(clk); >>>>>> + >>>>>> + stm_timer->scs.cs.name = name; >>>>>> + stm_timer->scs.cs.rating = 460; >>>>>> + stm_timer->scs.cs.read = nxp_stm_clocksource_read; >>>>>> + stm_timer->scs.cs.enable = nxp_stm_clocksource_enable; >>>>>> + stm_timer->scs.cs.disable = nxp_stm_clocksource_disable; >>>>>> + stm_timer->scs.cs.suspend = nxp_stm_clocksource_suspend; >>>>>> + stm_timer->scs.cs.resume = nxp_stm_clocksource_resume; >>>>>> + stm_timer->scs.cs.mask = CLOCKSOURCE_MASK(32); >>>>>> + stm_timer->scs.cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; >>>>>> + >>>>>> + ret = clocksource_register_hz(&stm_timer->scs.cs, >>>>>> stm_timer->rate); >>>>>> + if (ret) >>>>>> + return ret; >>>>> >>>>> clocksource_unregister during remove callback for cleanup? >>>> >>>> Sorry I don't get it :/ >>>> >>>> There is no cleanup after the clocksource_register_hz() is successful >>>> >>> >>> I noticed that other drivers, such as >>> drivers/clocksource/timer-tegra186.c and >>> drivers/clocksource/timer-sun5i.c, perform clocksource_unregister during >>> their platform_driver.remove callback. Shouldn't this apply here as well? >> >> The tegra186 registers with one probe several timers and clocksources. >> The timer-nxp probes for each node. >> >> The timer-sun5i.c has the remove callback but it is pointless as it can >> not be compiled as module. So it should not have it. >> > > Ok. > >>> [ ... ] >>>> >>>>>> +static int nxp_stm_clockevent_set_next_event(unsigned long delta, >>>>>> struct clock_event_device *ced) >>>>>> +{ >>>>>> + struct stm_timer *stm_timer = ced_to_stm(ced); >>>>>> + u32 val; >>>>>> + >>>>>> + nxp_stm_clockevent_disable(stm_timer); >>>>> >>>>> While examining the code base, I came across the >>>>> drivers/clocksource/timer-imx-gpt.c file, specifically the >>>>> mx1_2_set_next_event function, which includes a protection against >>>>> missing events. Using a similar approach would allow us to keep the STM >>>>> module enabled while only altering the channel's register state. This >>>>> risk can also be mitigated by adjusting min_delta_ns based on tick >>>>> frequency. >>>> >>>> How would you validate that ? >>>> >>> >>> How would I validate that this works? >>> >>> If this is the question, I see that the core performs an auto adjustment >>> of the minimum delta as part of the clockevents_program_min_delta >>> function when CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST is enabled. >>> Initially, I would examine how many times dev->set_next_event() returns >>> -ETIME. At the end of the function, min_delta_ns should reflect the >>> actual minimum delta the device can handle. >> >> That is an area of optimization and I would prefer to keep that as a >> separate change focused on validating this approach. >> > > This suggestion supports the argument presented below. > >>> [ ... ] >>>>>> +static int __init nxp_stm_timer_probe(struct platform_device *pdev) >>>>>> +{ >>>>>> + struct device *dev = &pdev->dev; >>>>>> + struct device_node *np = dev->of_node; >>>>>> + struct stm_instances *stm_instances; >>>>>> + const char *name = of_node_full_name(np); >>>>>> + void __iomem *base; >>>>>> + int irq, ret; >>>>>> + struct clk *clk; >>>>>> + >>>>>> + stm_instances = >>>>>> (typeof(stm_instances))of_device_get_match_data(dev); >>>>>> + if (!stm_instances) { >>>>>> + dev_err(dev, "No STM instances associated with a cpu"); >>>>>> + return -EINVAL; >>>>>> + } >>>>>> + >>>>>> + base = devm_of_iomap(dev, np, 0, NULL); >>>>>> + if (IS_ERR(base)) { >>>>>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >>>>>> + return PTR_ERR(base); >>>>>> + } >>>>>> + >>>>>> + irq = irq_of_parse_and_map(np, 0); >>>>>> + if (irq <= 0) { >>>>>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >>>>>> + return -EINVAL; >>>>>> + } >>>>> >>>>> From commit description: >>>>> >>>>>> The first probed STM is used as a clocksource, the second will be the >>>>>> broadcast timer and the rest are used as a clockevent with the >>>>>> affinity set to a CPU. >>>>> >>>>> Why is the interrupt mandatory when the node is probed as a >>>>> clocksource? >>>> >>>> It relies on the DT description and it does not hurt to have a >>>> consistent code path for clockevent / clocksource even if the interrupt >>>> is not requested for the clocksource later. >>>> >>> >>> If so, in my opinion, it would make sense to use the same STM instance >>> for both the clocksource and broadcast clockevent, as both functions can >>> be accommodated within a single STM instance, which will help reduce the >>> number of STM instances used. >> >> The broadcast timer is stopped when it is unused which is the case for >> the s32 because there are no idle state powering down the local timers. >> They have to have be separated. >> > > This wouldn't be the case if the STM is kept running/counting during the > clock event setup, with only the clock event interrupt being disabled > (CCR.CEN). Are you asking to use two different channels for the same STM instance, one for the clocksource and one for the clockevent ?
On 3/25/2025 2:51 PM, Daniel Lezcano wrote: [ ... ] >>>>>>> +static int __init nxp_stm_timer_probe(struct platform_device *pdev) >>>>>>> +{ >>>>>>> + struct device *dev = &pdev->dev; >>>>>>> + struct device_node *np = dev->of_node; >>>>>>> + struct stm_instances *stm_instances; >>>>>>> + const char *name = of_node_full_name(np); >>>>>>> + void __iomem *base; >>>>>>> + int irq, ret; >>>>>>> + struct clk *clk; >>>>>>> + >>>>>>> + stm_instances = >>>>>>> (typeof(stm_instances))of_device_get_match_data(dev); >>>>>>> + if (!stm_instances) { >>>>>>> + dev_err(dev, "No STM instances associated with a cpu"); >>>>>>> + return -EINVAL; >>>>>>> + } >>>>>>> + >>>>>>> + base = devm_of_iomap(dev, np, 0, NULL); >>>>>>> + if (IS_ERR(base)) { >>>>>>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >>>>>>> + return PTR_ERR(base); >>>>>>> + } >>>>>>> + >>>>>>> + irq = irq_of_parse_and_map(np, 0); >>>>>>> + if (irq <= 0) { >>>>>>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >>>>>>> + return -EINVAL; >>>>>>> + } >>>>>> >>>>>> From commit description: >>>>>> >>>>>>> The first probed STM is used as a clocksource, the second will be >>>>>>> the >>>>>>> broadcast timer and the rest are used as a clockevent with the >>>>>>> affinity set to a CPU. >>>>>> >>>>>> Why is the interrupt mandatory when the node is probed as a >>>>>> clocksource? >>>>> >>>>> It relies on the DT description and it does not hurt to have a >>>>> consistent code path for clockevent / clocksource even if the >>>>> interrupt >>>>> is not requested for the clocksource later. >>>>> >>>> >>>> If so, in my opinion, it would make sense to use the same STM instance >>>> for both the clocksource and broadcast clockevent, as both functions >>>> can >>>> be accommodated within a single STM instance, which will help reduce >>>> the >>>> number of STM instances used. >>> >>> The broadcast timer is stopped when it is unused which is the case for >>> the s32 because there are no idle state powering down the local timers. >>> They have to have be separated. >>> >> >> This wouldn't be the case if the STM is kept running/counting during the >> clock event setup, with only the clock event interrupt being disabled >> (CCR.CEN). > > Are you asking to use two different channels for the same STM instance, > one for the clocksource and one for the clockevent ? > I suggested using the CNT register to obtain the count for the clock source, while using one of the STM channels for the clock event.
On 25/03/2025 14:21, Ghennadi Procopciuc wrote: > On 3/25/2025 2:51 PM, Daniel Lezcano wrote: > [ ... ] >>>>>>>> +static int __init nxp_stm_timer_probe(struct platform_device *pdev) >>>>>>>> +{ >>>>>>>> + struct device *dev = &pdev->dev; >>>>>>>> + struct device_node *np = dev->of_node; >>>>>>>> + struct stm_instances *stm_instances; >>>>>>>> + const char *name = of_node_full_name(np); >>>>>>>> + void __iomem *base; >>>>>>>> + int irq, ret; >>>>>>>> + struct clk *clk; >>>>>>>> + >>>>>>>> + stm_instances = >>>>>>>> (typeof(stm_instances))of_device_get_match_data(dev); >>>>>>>> + if (!stm_instances) { >>>>>>>> + dev_err(dev, "No STM instances associated with a cpu"); >>>>>>>> + return -EINVAL; >>>>>>>> + } >>>>>>>> + >>>>>>>> + base = devm_of_iomap(dev, np, 0, NULL); >>>>>>>> + if (IS_ERR(base)) { >>>>>>>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >>>>>>>> + return PTR_ERR(base); >>>>>>>> + } >>>>>>>> + >>>>>>>> + irq = irq_of_parse_and_map(np, 0); >>>>>>>> + if (irq <= 0) { >>>>>>>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >>>>>>>> + return -EINVAL; >>>>>>>> + } >>>>>>> >>>>>>> From commit description: >>>>>>> >>>>>>>> The first probed STM is used as a clocksource, the second will be >>>>>>>> the >>>>>>>> broadcast timer and the rest are used as a clockevent with the >>>>>>>> affinity set to a CPU. >>>>>>> >>>>>>> Why is the interrupt mandatory when the node is probed as a >>>>>>> clocksource? >>>>>> >>>>>> It relies on the DT description and it does not hurt to have a >>>>>> consistent code path for clockevent / clocksource even if the >>>>>> interrupt >>>>>> is not requested for the clocksource later. >>>>>> >>>>> >>>>> If so, in my opinion, it would make sense to use the same STM instance >>>>> for both the clocksource and broadcast clockevent, as both functions >>>>> can >>>>> be accommodated within a single STM instance, which will help reduce >>>>> the >>>>> number of STM instances used. >>>> >>>> The broadcast timer is stopped when it is unused which is the case for >>>> the s32 because there are no idle state powering down the local timers. >>>> They have to have be separated. >>>> >>> >>> This wouldn't be the case if the STM is kept running/counting during the >>> clock event setup, with only the clock event interrupt being disabled >>> (CCR.CEN). >> >> Are you asking to use two different channels for the same STM instance, >> one for the clocksource and one for the clockevent ? >> > > I suggested using the CNT register to obtain the count for the clock > source, while using one of the STM channels for the clock event. Ah, ok. I think it is preferable to keep them separated to keep the code modular. Given the number of STM on the platform, it does not hurt
On 25/03/2025 13:30, Krzysztof Kozlowski wrote: > On 25/03/2025 13:23, Daniel Lezcano wrote: [ ... ] >>>> + if (!stm_instances->clocksource && (stm_instances->features & STM_CLKSRC)) { >>>> + >>>> + /* >>>> + * First probed STM will be a clocksource >>>> + */ >>>> + ret = nxp_stm_clocksource_init(dev, name, base, clk); >>>> + if (ret) >>>> + return ret; >>>> + stm_instances->clocksource++; >>> >>> That's racy. Devices can be brought async, ideally. This should be >>> rather idr or probably entire structure protected with a mutex. >> >> Mmh, interesting. I never had to think about this problem before >> >> Do you know at what moment the probing is parallelized ? > > You don't have PROBE_PREFER_ASYNCHRONOUS, so currently this will be > still sync, but I don't think we want it to be that way forever. I think > new drivers should not rely on implicit sync, because converting it > later to async will be difficult. It's easier to design it now or even > choose async explicitly (after testing). I gave a try and sometimes I reach the warnings below. I suspect the underlying code in the time framework is not yet ready for that. Even if it could be a good candidate for parallelizing the boot, this driver should stay sync ATM. Except if someone has the willing to dig into the core framework to find out the race when switching the clockevent. I think a thread is setting a timer while we are switching the driver. IMO, this core framework is too sensitive for this kind of change now. Alternatively, I can put anyway the lock which is harmless for the sync code but making the driver race free. The async flag can be put later. [ 2.066807] ------------[ cut here ]------------ [ 2.073190] SPI driver st-magn-spi has no spi_device_id for st,lsm9ds1-magn [ 2.077841] Current state: 0 [ 2.077866] WARNING: CPU: 0 PID: 0 at kernel/time/clockevents.c:319 clockevents_program_event+0x124/0x12c [ 2.084972] SPI driver st-magn-spi has no spi_device_id for st,lsm303c-magn [ 2.087876] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.14.0-rc4-00009-g1418f8fd0e24-dirty #163 [ 2.087889] Hardware name: NXP S32G2 Reference Design Board 2 (S32G-VNP-RDB2) (DT) [ 2.121868] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 2.128959] pc : clockevents_program_event+0x124/0x12c [ 2.134196] lr : clockevents_program_event+0x124/0x12c [ 2.139437] sp : ffff800080003e50 [ 2.142808] x29: ffff800080003e50 x28: ffff00085f899538 x27: ffff00085f899578 [ 2.150092] x26: ffff00085f8995b8 x25: ffff00085f89948c x24: 7fffffffffffffff [ 2.157368] x23: 0000000000000003 x22: 000000007233f8db x21: 0000000000000000 [ 2.164643] x20: 000000007270e000 x19: ffff0008001286c0 x18: 00000000ffffffff [ 2.171918] x17: ffff8007dd240000 x16: ffff800080000000 x15: ffff8000800039f0 [ 2.179195] x14: 0000000000000000 x13: ffff800082a864f8 x12: 0000000000000381 [ 2.186470] x11: 000000000000012b x10: ffff800082ade4f8 x9 : ffff800082a864f8 [ 2.193746] x8 : 00000000ffffefff x7 : ffff800082ade4f8 x6 : 80000000fffff000 [ 2.201023] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 00000000ffffffff [ 2.208297] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff800082a78080 [ 2.215574] Call trace: [ 2.218063] clockevents_program_event+0x124/0x12c (P) [ 2.223304] tick_program_event+0x50/0x9c [ 2.227392] hrtimer_interrupt+0x120/0x254 [ 2.231571] nxp_stm_clockevent_interrupt+0x8c/0x9c [ 2.236545] __handle_irq_event_percpu+0x48/0x13c [ 2.241345] handle_irq_event+0x4c/0xa8 [ 2.245256] handle_fasteoi_irq+0xa4/0x230 [ 2.249431] handle_irq_desc+0x40/0x58 [ 2.253254] generic_handle_domain_irq+0x1c/0x28 [ 2.257961] gic_handle_irq+0x4c/0x118 [ 2.261783] call_on_irq_stack+0x24/0x4c [ 2.265784] do_interrupt_handler+0x80/0x84 [ 2.270049] el1_interrupt+0x34/0x68 [ 2.273696] el1h_64_irq_handler+0x18/0x24 [ 2.277873] el1h_64_irq+0x6c/0x70 [ 2.281340] default_idle_call+0x28/0x3c (P) [ 2.285693] do_idle+0x1f8/0x250 [ 2.288988] cpu_startup_entry+0x34/0x3c [ 2.292988] kernel_init+0x0/0x1d4 [ 2.296454] start_kernel+0x5e4/0x72c [ 2.300188] __primary_switched+0x88/0x90 [ 2.304283] ---[ end trace 0000000000000000 ]--- [ 2.309891] hw perfevents: enabled with armv8_cortex_a53 PMU driver, 7 (0,8000003f) counters available [ 2.320001] cs_system_cfg: CoreSight Configuration manager initialised [ 2.327712] gnss: GNSS driver registered with major 506 [ 2.340793] GACT probability NOT on [ 2.344401] Mirror/redirect action on [ 2.348835] IPVS: Registered protocols () [ 2.352946] IPVS: Connection hash table configured (size=4096, memory=32Kbytes) [ 2.360566] IPVS: ipvs loaded. [ 2.363904] NET: Registered PF_INET6 protocol family Or this one: [ 2.369946] ------------[ cut here ]------------ [ 2.370044] Segment Routing with IPv6 [ 2.374649] Current state: 0 [ 2.374668] WARNING: CPU: 0 PID: 0 at kernel/time/clockevents.c:125 clockevents_switch_state+0x10c/0x114 [ 2.378462] In-situ OAM (IOAM) with IPv6 [ 2.381328] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Tainted: G W 6.14.0-rc4-00009-g1418f8fd0e24-dirty #163 [ 2.381341] Tainted: [W]=WARN [ 2.391115] sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver [ 2.394964] Hardware name: NXP S32G2 Reference Design Board 2 (S32G-VNP-RDB2) (DT) [ 2.394971] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 2.394980] pc : clockevents_switch_state+0x10c/0x114 [ 2.406609] NET: Registered PF_PACKET protocol family [ 2.408957] lr : clockevents_switch_state+0x10c/0x114 [ 2.408971] sp : ffff800080003cf0 [ 2.408975] x29: ffff800080003cf0 [ 2.415066] Bridge firewalling registered [ 2.422692] x28: ffff00085f89fc40 x27: 0000000000000001 [ 2.422704] x26: ffff800082a6f0e0 x25: ffff80008265c400 x24: ffff00085f89fbc0 [ 2.468656] x23: 0000000000000001 x22: ffff00085f899480 x21: 0000000000000001 [ 2.475931] x20: 0000000000000004 x19: ffff0008001286c0 x18: 00000000ffffffff [ 2.483208] x17: ffff8007dd240000 x16: ffff800080000000 x15: ffff800080003890 [ 2.490484] x14: 0000000000000000 x13: ffff800082a864f8 x12: 0000000000000420 [ 2.497763] x11: 0000000000000160 x10: ffff800082ade4f8 x9 : ffff800082a864f8 [ 2.505038] x8 : 00000000ffffefff x7 : ffff800082ade4f8 x6 : 80000000fffff000 [ 2.512314] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 00000000ffffffff [ 2.519588] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff800082a78080 [ 2.526865] Call trace: [ 2.529352] clockevents_switch_state+0x10c/0x114 (P) [ 2.534505] tick_program_event+0x6c/0x9c [ 2.538590] __remove_hrtimer+0xb0/0xb4 [ 2.542506] hrtimer_try_to_cancel.part.0+0xc8/0xd0 [ 2.547478] hrtimer_try_to_cancel+0x6c/0x78 [ 2.551832] task_contending+0xd4/0x13c [ 2.555746] enqueue_dl_entity+0x214/0x500 [ 2.559925] dl_server_start+0x50/0x138 [ 2.563835] enqueue_task_fair+0x1dc/0x5e0 [ 2.568012] activate_task+0x4c/0x90 [ 2.571660] ttwu_do_activate.isra.0+0x58/0x138 [ 2.576281] sched_ttwu_pending+0xa4/0x128 [ 2.580459] __flush_smp_call_function_queue+0xf0/0x224 [ 2.585790] generic_smp_call_function_single_interrupt+0x14/0x20 [ 2.592002] ipi_handler+0x134/0x168 [ 2.595650] handle_percpu_devid_irq+0x80/0x120 [ 2.600269] handle_irq_desc+0x40/0x58 [ 2.604091] generic_handle_domain_irq+0x1c/0x28 [ 2.608798] gic_handle_irq+0x4c/0x118 [ 2.612618] call_on_irq_stack+0x24/0x4c [ 2.616617] do_interrupt_handler+0x80/0x84 [ 2.620881] el1_interrupt+0x34/0x68 [ 2.624526] el1h_64_irq_handler+0x18/0x24 [ 2.628700] el1h_64_irq+0x6c/0x70 [ 2.632166] default_idle_call+0x28/0x3c (P) [ 2.636520] do_idle+0x1f8/0x250 [ 2.639812] cpu_startup_entry+0x38/0x3c [ 2.643814] kernel_init+0x0/0x1d4 [ 2.647281] start_kernel+0x5e4/0x72c [ 2.651014] __primary_switched+0x88/0x90 [ 2.655107] ---[ end trace 0000000000000000 ]---
On 25/03/2025 19:38, Daniel Lezcano wrote: > On 25/03/2025 13:30, Krzysztof Kozlowski wrote: >> On 25/03/2025 13:23, Daniel Lezcano wrote: > > [ ... ] > >>>>> + if (!stm_instances->clocksource && (stm_instances->features & STM_CLKSRC)) { >>>>> + >>>>> + /* >>>>> + * First probed STM will be a clocksource >>>>> + */ >>>>> + ret = nxp_stm_clocksource_init(dev, name, base, clk); >>>>> + if (ret) >>>>> + return ret; >>>>> + stm_instances->clocksource++; >>>> >>>> That's racy. Devices can be brought async, ideally. This should be >>>> rather idr or probably entire structure protected with a mutex. >>> >>> Mmh, interesting. I never had to think about this problem before >>> >>> Do you know at what moment the probing is parallelized ? >> >> You don't have PROBE_PREFER_ASYNCHRONOUS, so currently this will be >> still sync, but I don't think we want it to be that way forever. I think >> new drivers should not rely on implicit sync, because converting it >> later to async will be difficult. It's easier to design it now or even >> choose async explicitly (after testing). > > I gave a try and sometimes I reach the warnings below. I suspect the > underlying code in the time framework is not yet ready for that. > > Even if it could be a good candidate for parallelizing the boot, this > driver should stay sync ATM. Except if someone has the willing to dig > into the core framework to find out the race when switching the > clockevent. I think a thread is setting a timer while we are switching > the driver. > > IMO, this core framework is too sensitive for this kind of change now. > > Alternatively, I can put anyway the lock which is harmless for the sync > code but making the driver race free. The async flag can be put later. Yes, that's what I meant, although indeed good point that clocksource is way too early to be async. In that case this part is up to you, maybe my suggestion was not correct. Best regards, Krzysztof
On 3/25/2025 3:54 PM, Daniel Lezcano wrote: > On 25/03/2025 14:21, Ghennadi Procopciuc wrote: >> On 3/25/2025 2:51 PM, Daniel Lezcano wrote: >> [ ... ] >>>>>>>>> +static int __init nxp_stm_timer_probe(struct platform_device >>>>>>>>> *pdev) >>>>>>>>> +{ >>>>>>>>> + struct device *dev = &pdev->dev; >>>>>>>>> + struct device_node *np = dev->of_node; >>>>>>>>> + struct stm_instances *stm_instances; >>>>>>>>> + const char *name = of_node_full_name(np); >>>>>>>>> + void __iomem *base; >>>>>>>>> + int irq, ret; >>>>>>>>> + struct clk *clk; >>>>>>>>> + >>>>>>>>> + stm_instances = >>>>>>>>> (typeof(stm_instances))of_device_get_match_data(dev); >>>>>>>>> + if (!stm_instances) { >>>>>>>>> + dev_err(dev, "No STM instances associated with a cpu"); >>>>>>>>> + return -EINVAL; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + base = devm_of_iomap(dev, np, 0, NULL); >>>>>>>>> + if (IS_ERR(base)) { >>>>>>>>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >>>>>>>>> + return PTR_ERR(base); >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + irq = irq_of_parse_and_map(np, 0); >>>>>>>>> + if (irq <= 0) { >>>>>>>>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >>>>>>>>> + return -EINVAL; >>>>>>>>> + } >>>>>>>> >>>>>>>> From commit description: >>>>>>>> >>>>>>>>> The first probed STM is used as a clocksource, the second will be >>>>>>>>> the >>>>>>>>> broadcast timer and the rest are used as a clockevent with the >>>>>>>>> affinity set to a CPU. >>>>>>>> >>>>>>>> Why is the interrupt mandatory when the node is probed as a >>>>>>>> clocksource? >>>>>>> >>>>>>> It relies on the DT description and it does not hurt to have a >>>>>>> consistent code path for clockevent / clocksource even if the >>>>>>> interrupt >>>>>>> is not requested for the clocksource later. >>>>>>> >>>>>> >>>>>> If so, in my opinion, it would make sense to use the same STM >>>>>> instance >>>>>> for both the clocksource and broadcast clockevent, as both functions >>>>>> can >>>>>> be accommodated within a single STM instance, which will help reduce >>>>>> the >>>>>> number of STM instances used. >>>>> >>>>> The broadcast timer is stopped when it is unused which is the case for >>>>> the s32 because there are no idle state powering down the local >>>>> timers. >>>>> They have to have be separated. >>>>> >>>> >>>> This wouldn't be the case if the STM is kept running/counting during >>>> the >>>> clock event setup, with only the clock event interrupt being disabled >>>> (CCR.CEN). >>> >>> Are you asking to use two different channels for the same STM instance, >>> one for the clocksource and one for the clockevent ? >>> >> >> I suggested using the CNT register to obtain the count for the clock >> source, while using one of the STM channels for the clock event. > > Ah, ok. > > I think it is preferable to keep them separated to keep the code > modular. Given the number of STM on the platform, it does not hurt > The S32G2 and S32G3 are SoCs featuring a diverse set of cores. Linux is expected to run on Cortex-A53 cores, while other software stacks will operate on Cortex-M cores. The number of STM instances has been sized to include at most one instance per core. Allocating six instances (1 clock source, 1 broadcast clock event, and 4 clock events for all A53 cores) to Linux on the S32G2 leaves the M7 software stacks without adequate STM coverage. Additionally, the proposed implementation uses only one STM channel out of four, which is not optimal hardware usage. I suggest using all STM channels instead of limiting it to a single channel per instance, given that the driver already uses a global structure to pair STM instances with cores. This approach will optimize the number of instances required by Linux and leverage the capabilities of each STM.
On 26/03/2025 08:44, Ghennadi Procopciuc wrote: > On 3/25/2025 3:54 PM, Daniel Lezcano wrote: >> On 25/03/2025 14:21, Ghennadi Procopciuc wrote: >>> On 3/25/2025 2:51 PM, Daniel Lezcano wrote: [ ... ] >>>>> >>>>> This wouldn't be the case if the STM is kept running/counting during >>>>> the >>>>> clock event setup, with only the clock event interrupt being disabled >>>>> (CCR.CEN). >>>> >>>> Are you asking to use two different channels for the same STM instance, >>>> one for the clocksource and one for the clockevent ? >>>> >>> >>> I suggested using the CNT register to obtain the count for the clock >>> source, while using one of the STM channels for the clock event. >> >> Ah, ok. >> >> I think it is preferable to keep them separated to keep the code >> modular. Given the number of STM on the platform, it does not hurt >> > > The S32G2 and S32G3 are SoCs featuring a diverse set of cores. Linux is > expected to run on Cortex-A53 cores, while other software stacks will > operate on Cortex-M cores. The number of STM instances has been sized to > include at most one instance per core. Allocating six instances (1 clock > source, 1 broadcast clock event, and 4 clock events for all A53 cores) > to Linux on the S32G2 leaves the M7 software stacks without adequate STM > coverage. Mmh, right. From this perspective it makes sense. > Additionally, the proposed implementation uses only one STM channel out > of four, which is not optimal hardware usage. I suggest using all STM > channels instead of limiting it to a single channel per instance, given > that the driver already uses a global structure to pair STM instances > with cores. This approach will optimize the number of instances required > by Linux and leverage the capabilities of each STM.
On 26/03/2025 08:44, Ghennadi Procopciuc wrote: > On 3/25/2025 3:54 PM, Daniel Lezcano wrote: >> On 25/03/2025 14:21, Ghennadi Procopciuc wrote: >>> On 3/25/2025 2:51 PM, Daniel Lezcano wrote: >>> [ ... ] >>>>>>>>>> +static int __init nxp_stm_timer_probe(struct platform_device >>>>>>>>>> *pdev) >>>>>>>>>> +{ >>>>>>>>>> + struct device *dev = &pdev->dev; >>>>>>>>>> + struct device_node *np = dev->of_node; >>>>>>>>>> + struct stm_instances *stm_instances; >>>>>>>>>> + const char *name = of_node_full_name(np); >>>>>>>>>> + void __iomem *base; >>>>>>>>>> + int irq, ret; >>>>>>>>>> + struct clk *clk; >>>>>>>>>> + >>>>>>>>>> + stm_instances = >>>>>>>>>> (typeof(stm_instances))of_device_get_match_data(dev); >>>>>>>>>> + if (!stm_instances) { >>>>>>>>>> + dev_err(dev, "No STM instances associated with a cpu"); >>>>>>>>>> + return -EINVAL; >>>>>>>>>> + } >>>>>>>>>> + >>>>>>>>>> + base = devm_of_iomap(dev, np, 0, NULL); >>>>>>>>>> + if (IS_ERR(base)) { >>>>>>>>>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >>>>>>>>>> + return PTR_ERR(base); >>>>>>>>>> + } >>>>>>>>>> + >>>>>>>>>> + irq = irq_of_parse_and_map(np, 0); >>>>>>>>>> + if (irq <= 0) { >>>>>>>>>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >>>>>>>>>> + return -EINVAL; >>>>>>>>>> + } >>>>>>>>> >>>>>>>>> From commit description: >>>>>>>>> >>>>>>>>>> The first probed STM is used as a clocksource, the second will be >>>>>>>>>> the >>>>>>>>>> broadcast timer and the rest are used as a clockevent with the >>>>>>>>>> affinity set to a CPU. >>>>>>>>> >>>>>>>>> Why is the interrupt mandatory when the node is probed as a >>>>>>>>> clocksource? >>>>>>>> >>>>>>>> It relies on the DT description and it does not hurt to have a >>>>>>>> consistent code path for clockevent / clocksource even if the >>>>>>>> interrupt >>>>>>>> is not requested for the clocksource later. >>>>>>>> >>>>>>> >>>>>>> If so, in my opinion, it would make sense to use the same STM >>>>>>> instance >>>>>>> for both the clocksource and broadcast clockevent, as both functions >>>>>>> can >>>>>>> be accommodated within a single STM instance, which will help reduce >>>>>>> the >>>>>>> number of STM instances used. >>>>>> >>>>>> The broadcast timer is stopped when it is unused which is the case for >>>>>> the s32 because there are no idle state powering down the local >>>>>> timers. >>>>>> They have to have be separated. >>>>>> >>>>> >>>>> This wouldn't be the case if the STM is kept running/counting during >>>>> the >>>>> clock event setup, with only the clock event interrupt being disabled >>>>> (CCR.CEN). >>>> >>>> Are you asking to use two different channels for the same STM instance, >>>> one for the clocksource and one for the clockevent ? >>>> >>> >>> I suggested using the CNT register to obtain the count for the clock >>> source, while using one of the STM channels for the clock event. >> >> Ah, ok. >> >> I think it is preferable to keep them separated to keep the code >> modular. Given the number of STM on the platform, it does not hurt >> > > The S32G2 and S32G3 are SoCs featuring a diverse set of cores. Linux is > expected to run on Cortex-A53 cores, while other software stacks will > operate on Cortex-M cores. The number of STM instances has been sized to > include at most one instance per core. Allocating six instances (1 clock > source, 1 broadcast clock event, and 4 clock events for all A53 cores) > to Linux on the S32G2 leaves the M7 software stacks without adequate STM > coverage. Given this description I'm wondering why one STM has to be used as a clocksource if the STM_07 is already a TS one. And also, we can get rid of the broadcast timer as there is no idle state forcing a switch to an always-on timer. IIUC, on the S32g2 there are 4 x Cortex-A53 and 3 x Cortex-M3, that means we need 7 timers. The system has 7 timers + a special one for timestamp. So if I follow the reasoning, the broadcast timer should not be used otherwise one cortex-M3 will end up without a timer. That leads us to one clocksource for the first per CPU timer initialized or alternatively one STM instance == 1 clocksource and 1 clockevent which makes things simpler
On 3/26/2025 11:19 AM, Daniel Lezcano wrote: > On 26/03/2025 08:44, Ghennadi Procopciuc wrote: >> On 3/25/2025 3:54 PM, Daniel Lezcano wrote: >>> On 25/03/2025 14:21, Ghennadi Procopciuc wrote: >>>> On 3/25/2025 2:51 PM, Daniel Lezcano wrote: >>>> [ ... ] >>>>>>>>>>> +static int __init nxp_stm_timer_probe(struct platform_device >>>>>>>>>>> *pdev) >>>>>>>>>>> +{ >>>>>>>>>>> + struct device *dev = &pdev->dev; >>>>>>>>>>> + struct device_node *np = dev->of_node; >>>>>>>>>>> + struct stm_instances *stm_instances; >>>>>>>>>>> + const char *name = of_node_full_name(np); >>>>>>>>>>> + void __iomem *base; >>>>>>>>>>> + int irq, ret; >>>>>>>>>>> + struct clk *clk; >>>>>>>>>>> + >>>>>>>>>>> + stm_instances = >>>>>>>>>>> (typeof(stm_instances))of_device_get_match_data(dev); >>>>>>>>>>> + if (!stm_instances) { >>>>>>>>>>> + dev_err(dev, "No STM instances associated with a cpu"); >>>>>>>>>>> + return -EINVAL; >>>>>>>>>>> + } >>>>>>>>>>> + >>>>>>>>>>> + base = devm_of_iomap(dev, np, 0, NULL); >>>>>>>>>>> + if (IS_ERR(base)) { >>>>>>>>>>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >>>>>>>>>>> + return PTR_ERR(base); >>>>>>>>>>> + } >>>>>>>>>>> + >>>>>>>>>>> + irq = irq_of_parse_and_map(np, 0); >>>>>>>>>>> + if (irq <= 0) { >>>>>>>>>>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >>>>>>>>>>> + return -EINVAL; >>>>>>>>>>> + } >>>>>>>>>> >>>>>>>>>> From commit description: >>>>>>>>>> >>>>>>>>>>> The first probed STM is used as a clocksource, the second >>>>>>>>>>> will be >>>>>>>>>>> the >>>>>>>>>>> broadcast timer and the rest are used as a clockevent with the >>>>>>>>>>> affinity set to a CPU. >>>>>>>>>> >>>>>>>>>> Why is the interrupt mandatory when the node is probed as a >>>>>>>>>> clocksource? >>>>>>>>> >>>>>>>>> It relies on the DT description and it does not hurt to have a >>>>>>>>> consistent code path for clockevent / clocksource even if the >>>>>>>>> interrupt >>>>>>>>> is not requested for the clocksource later. >>>>>>>>> >>>>>>>> >>>>>>>> If so, in my opinion, it would make sense to use the same STM >>>>>>>> instance >>>>>>>> for both the clocksource and broadcast clockevent, as both >>>>>>>> functions >>>>>>>> can >>>>>>>> be accommodated within a single STM instance, which will help >>>>>>>> reduce >>>>>>>> the >>>>>>>> number of STM instances used. >>>>>>> >>>>>>> The broadcast timer is stopped when it is unused which is the >>>>>>> case for >>>>>>> the s32 because there are no idle state powering down the local >>>>>>> timers. >>>>>>> They have to have be separated. >>>>>>> >>>>>> >>>>>> This wouldn't be the case if the STM is kept running/counting during >>>>>> the >>>>>> clock event setup, with only the clock event interrupt being disabled >>>>>> (CCR.CEN). >>>>> >>>>> Are you asking to use two different channels for the same STM >>>>> instance, >>>>> one for the clocksource and one for the clockevent ? >>>>> >>>> >>>> I suggested using the CNT register to obtain the count for the clock >>>> source, while using one of the STM channels for the clock event. >>> >>> Ah, ok. >>> >>> I think it is preferable to keep them separated to keep the code >>> modular. Given the number of STM on the platform, it does not hurt >>> >> >> The S32G2 and S32G3 are SoCs featuring a diverse set of cores. Linux is >> expected to run on Cortex-A53 cores, while other software stacks will >> operate on Cortex-M cores. The number of STM instances has been sized to >> include at most one instance per core. Allocating six instances (1 clock >> source, 1 broadcast clock event, and 4 clock events for all A53 cores) >> to Linux on the S32G2 leaves the M7 software stacks without adequate STM >> coverage. > > Given this description I'm wondering why one STM has to be used as a > clocksource if the STM_07 is already a TS one. And also, we can get rid > of the broadcast timer as there is no idle state forcing a switch to an > always-on timer. Indeed, STM_07 can be used as a system clock source, but Linux should not assume ownership of it. > > IIUC, on the S32g2 there are 4 x Cortex-A53 and 3 x Cortex-M3, that > means we need 7 timers. > > The system has 7 timers + a special one for timestamp. > > So if I follow the reasoning, the broadcast timer should not be used > otherwise one cortex-M3 will end up without a timer. > On the S32G2, there are eight STM timers and STM_TS. Therefore, there should be enough instances to accommodate 4xA53 and 3xM7 cores. > That leads us to one clocksource for the first per CPU timer initialized > or alternatively one STM instance == 1 clocksource and 1 clockevent > which makes things simpler > I'm not sure I understood the entire description. As I see it, two STM instances should be used to accommodate one clock source, a broadcast clock event, and four clock events—one per core. E.g. STM_0 - clocksource (based on CNT) - broadcast clock event (channel 0) STM_1 - Cortex-A53 0 (channel 0) - Cortex-A53 1 (channel 1) - Cortex-A53 2 (channel 2) - Cortex-A53 3 (channel 3)
On 26/03/2025 10:57, Ghennadi Procopciuc wrote: > On 3/26/2025 11:19 AM, Daniel Lezcano wrote: >> On 26/03/2025 08:44, Ghennadi Procopciuc wrote: >>> On 3/25/2025 3:54 PM, Daniel Lezcano wrote: >>>> On 25/03/2025 14:21, Ghennadi Procopciuc wrote: >>>>> On 3/25/2025 2:51 PM, Daniel Lezcano wrote: >>>>> [ ... ] >>>>>>>>>>>> +static int __init nxp_stm_timer_probe(struct platform_device >>>>>>>>>>>> *pdev) >>>>>>>>>>>> +{ >>>>>>>>>>>> + struct device *dev = &pdev->dev; >>>>>>>>>>>> + struct device_node *np = dev->of_node; >>>>>>>>>>>> + struct stm_instances *stm_instances; >>>>>>>>>>>> + const char *name = of_node_full_name(np); >>>>>>>>>>>> + void __iomem *base; >>>>>>>>>>>> + int irq, ret; >>>>>>>>>>>> + struct clk *clk; >>>>>>>>>>>> + >>>>>>>>>>>> + stm_instances = >>>>>>>>>>>> (typeof(stm_instances))of_device_get_match_data(dev); >>>>>>>>>>>> + if (!stm_instances) { >>>>>>>>>>>> + dev_err(dev, "No STM instances associated with a cpu"); >>>>>>>>>>>> + return -EINVAL; >>>>>>>>>>>> + } >>>>>>>>>>>> + >>>>>>>>>>>> + base = devm_of_iomap(dev, np, 0, NULL); >>>>>>>>>>>> + if (IS_ERR(base)) { >>>>>>>>>>>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >>>>>>>>>>>> + return PTR_ERR(base); >>>>>>>>>>>> + } >>>>>>>>>>>> + >>>>>>>>>>>> + irq = irq_of_parse_and_map(np, 0); >>>>>>>>>>>> + if (irq <= 0) { >>>>>>>>>>>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); >>>>>>>>>>>> + return -EINVAL; >>>>>>>>>>>> + } >>>>>>>>>>> >>>>>>>>>>> From commit description: >>>>>>>>>>> >>>>>>>>>>>> The first probed STM is used as a clocksource, the second >>>>>>>>>>>> will be >>>>>>>>>>>> the >>>>>>>>>>>> broadcast timer and the rest are used as a clockevent with the >>>>>>>>>>>> affinity set to a CPU. >>>>>>>>>>> >>>>>>>>>>> Why is the interrupt mandatory when the node is probed as a >>>>>>>>>>> clocksource? >>>>>>>>>> >>>>>>>>>> It relies on the DT description and it does not hurt to have a >>>>>>>>>> consistent code path for clockevent / clocksource even if the >>>>>>>>>> interrupt >>>>>>>>>> is not requested for the clocksource later. >>>>>>>>>> >>>>>>>>> >>>>>>>>> If so, in my opinion, it would make sense to use the same STM >>>>>>>>> instance >>>>>>>>> for both the clocksource and broadcast clockevent, as both >>>>>>>>> functions >>>>>>>>> can >>>>>>>>> be accommodated within a single STM instance, which will help >>>>>>>>> reduce >>>>>>>>> the >>>>>>>>> number of STM instances used. >>>>>>>> >>>>>>>> The broadcast timer is stopped when it is unused which is the >>>>>>>> case for >>>>>>>> the s32 because there are no idle state powering down the local >>>>>>>> timers. >>>>>>>> They have to have be separated. >>>>>>>> >>>>>>> >>>>>>> This wouldn't be the case if the STM is kept running/counting during >>>>>>> the >>>>>>> clock event setup, with only the clock event interrupt being disabled >>>>>>> (CCR.CEN). >>>>>> >>>>>> Are you asking to use two different channels for the same STM >>>>>> instance, >>>>>> one for the clocksource and one for the clockevent ? >>>>>> >>>>> >>>>> I suggested using the CNT register to obtain the count for the clock >>>>> source, while using one of the STM channels for the clock event. >>>> >>>> Ah, ok. >>>> >>>> I think it is preferable to keep them separated to keep the code >>>> modular. Given the number of STM on the platform, it does not hurt >>>> >>> >>> The S32G2 and S32G3 are SoCs featuring a diverse set of cores. Linux is >>> expected to run on Cortex-A53 cores, while other software stacks will >>> operate on Cortex-M cores. The number of STM instances has been sized to >>> include at most one instance per core. Allocating six instances (1 clock >>> source, 1 broadcast clock event, and 4 clock events for all A53 cores) >>> to Linux on the S32G2 leaves the M7 software stacks without adequate STM >>> coverage. >> >> Given this description I'm wondering why one STM has to be used as a >> clocksource if the STM_07 is already a TS one. And also, we can get rid >> of the broadcast timer as there is no idle state forcing a switch to an >> always-on timer. > > Indeed, STM_07 can be used as a system clock source, but Linux should > not assume ownership of it. > >> >> IIUC, on the S32g2 there are 4 x Cortex-A53 and 3 x Cortex-M3, that >> means we need 7 timers. >> >> The system has 7 timers + a special one for timestamp. >> >> So if I follow the reasoning, the broadcast timer should not be used >> otherwise one cortex-M3 will end up without a timer. >> > > On the S32G2, there are eight STM timers and STM_TS. Therefore, there > should be enough instances to accommodate 4xA53 and 3xM7 cores. > >> That leads us to one clocksource for the first per CPU timer initialized >> or alternatively one STM instance == 1 clocksource and 1 clockevent >> which makes things simpler >> > I'm not sure I understood the entire description. As I see it, two STM > instances should be used to accommodate one clock source, a broadcast > clock event, and four clock events—one per core. What I meant is to do something simpler: ----------------- cat /proc/interrupts 16: 7891 0 0 0 GICv3 56 Level stm@4011c000 17: 0 5326 0 0 GICv3 57 Level stm@40120000 18: 3 0 16668 0 GICv3 58 Level stm@40124000 19: 0 0 0 5152 GICv3 59 Level stm@40128000 ------------------ cat /sys/devices/system/clockevents/clockevent*/current_device stm@4011c000 stm@40120000 stm@40124000 stm@40128000 ------------------ cat /sys/devices/system/clocksource/clocksource0/available_clocksource stm@4011c000 stm@40120000 stm@40124000 stm@40128000 arch_sys_counter On the S32G2: 4 STM instances used for one clocksource and one clockevent On the S32G3: 8 STM instances used for one clocksource and one clockevent Specific broadcast timer is not needed as the s32g system. The resulting timer driver code is much simpler. > E.g. > STM_0 > - clocksource (based on CNT) > - broadcast clock event (channel 0) > > STM_1 > - Cortex-A53 0 (channel 0) > - Cortex-A53 1 (channel 1) > - Cortex-A53 2 (channel 2) > - Cortex-A53 3 (channel 3) >
On 3/26/2025 12:31 PM, Daniel Lezcano wrote: > On 26/03/2025 10:57, Ghennadi Procopciuc wrote: >> On 3/26/2025 11:19 AM, Daniel Lezcano wrote: >>> On 26/03/2025 08:44, Ghennadi Procopciuc wrote: >>>> On 3/25/2025 3:54 PM, Daniel Lezcano wrote: >>>>> On 25/03/2025 14:21, Ghennadi Procopciuc wrote: >>>>>> On 3/25/2025 2:51 PM, Daniel Lezcano wrote: >>>>>> [ ... ] >>>>>>>>>>>>> +static int __init nxp_stm_timer_probe(struct platform_device >>>>>>>>>>>>> *pdev) >>>>>>>>>>>>> +{ >>>>>>>>>>>>> + struct device *dev = &pdev->dev; >>>>>>>>>>>>> + struct device_node *np = dev->of_node; >>>>>>>>>>>>> + struct stm_instances *stm_instances; >>>>>>>>>>>>> + const char *name = of_node_full_name(np); >>>>>>>>>>>>> + void __iomem *base; >>>>>>>>>>>>> + int irq, ret; >>>>>>>>>>>>> + struct clk *clk; >>>>>>>>>>>>> + >>>>>>>>>>>>> + stm_instances = >>>>>>>>>>>>> (typeof(stm_instances))of_device_get_match_data(dev); >>>>>>>>>>>>> + if (!stm_instances) { >>>>>>>>>>>>> + dev_err(dev, "No STM instances associated with a >>>>>>>>>>>>> cpu"); >>>>>>>>>>>>> + return -EINVAL; >>>>>>>>>>>>> + } >>>>>>>>>>>>> + >>>>>>>>>>>>> + base = devm_of_iomap(dev, np, 0, NULL); >>>>>>>>>>>>> + if (IS_ERR(base)) { >>>>>>>>>>>>> + dev_err(dev, "Failed to iomap %pOFn\n", np); >>>>>>>>>>>>> + return PTR_ERR(base); >>>>>>>>>>>>> + } >>>>>>>>>>>>> + >>>>>>>>>>>>> + irq = irq_of_parse_and_map(np, 0); >>>>>>>>>>>>> + if (irq <= 0) { >>>>>>>>>>>>> + dev_err(dev, "Failed to parse and map IRQ: %d\n", >>>>>>>>>>>>> irq); >>>>>>>>>>>>> + return -EINVAL; >>>>>>>>>>>>> + } >>>>>>>>>>>> >>>>>>>>>>>> From commit description: >>>>>>>>>>>> >>>>>>>>>>>>> The first probed STM is used as a clocksource, the second >>>>>>>>>>>>> will be >>>>>>>>>>>>> the >>>>>>>>>>>>> broadcast timer and the rest are used as a clockevent with the >>>>>>>>>>>>> affinity set to a CPU. >>>>>>>>>>>> >>>>>>>>>>>> Why is the interrupt mandatory when the node is probed as a >>>>>>>>>>>> clocksource? >>>>>>>>>>> >>>>>>>>>>> It relies on the DT description and it does not hurt to have a >>>>>>>>>>> consistent code path for clockevent / clocksource even if the >>>>>>>>>>> interrupt >>>>>>>>>>> is not requested for the clocksource later. >>>>>>>>>>> >>>>>>>>>> >>>>>>>>>> If so, in my opinion, it would make sense to use the same STM >>>>>>>>>> instance >>>>>>>>>> for both the clocksource and broadcast clockevent, as both >>>>>>>>>> functions >>>>>>>>>> can >>>>>>>>>> be accommodated within a single STM instance, which will help >>>>>>>>>> reduce >>>>>>>>>> the >>>>>>>>>> number of STM instances used. >>>>>>>>> >>>>>>>>> The broadcast timer is stopped when it is unused which is the >>>>>>>>> case for >>>>>>>>> the s32 because there are no idle state powering down the local >>>>>>>>> timers. >>>>>>>>> They have to have be separated. >>>>>>>>> >>>>>>>> >>>>>>>> This wouldn't be the case if the STM is kept running/counting >>>>>>>> during >>>>>>>> the >>>>>>>> clock event setup, with only the clock event interrupt being >>>>>>>> disabled >>>>>>>> (CCR.CEN). >>>>>>> >>>>>>> Are you asking to use two different channels for the same STM >>>>>>> instance, >>>>>>> one for the clocksource and one for the clockevent ? >>>>>>> >>>>>> >>>>>> I suggested using the CNT register to obtain the count for the clock >>>>>> source, while using one of the STM channels for the clock event. >>>>> >>>>> Ah, ok. >>>>> >>>>> I think it is preferable to keep them separated to keep the code >>>>> modular. Given the number of STM on the platform, it does not hurt >>>>> >>>> >>>> The S32G2 and S32G3 are SoCs featuring a diverse set of cores. Linux is >>>> expected to run on Cortex-A53 cores, while other software stacks will >>>> operate on Cortex-M cores. The number of STM instances has been >>>> sized to >>>> include at most one instance per core. Allocating six instances (1 >>>> clock >>>> source, 1 broadcast clock event, and 4 clock events for all A53 cores) >>>> to Linux on the S32G2 leaves the M7 software stacks without adequate >>>> STM >>>> coverage. >>> >>> Given this description I'm wondering why one STM has to be used as a >>> clocksource if the STM_07 is already a TS one. And also, we can get rid >>> of the broadcast timer as there is no idle state forcing a switch to an >>> always-on timer. >> >> Indeed, STM_07 can be used as a system clock source, but Linux should >> not assume ownership of it. >> >>> >>> IIUC, on the S32g2 there are 4 x Cortex-A53 and 3 x Cortex-M3, that >>> means we need 7 timers. >>> >>> The system has 7 timers + a special one for timestamp. >>> >>> So if I follow the reasoning, the broadcast timer should not be used >>> otherwise one cortex-M3 will end up without a timer. >>> >> >> On the S32G2, there are eight STM timers and STM_TS. Therefore, there >> should be enough instances to accommodate 4xA53 and 3xM7 cores. >> >>> That leads us to one clocksource for the first per CPU timer initialized >>> or alternatively one STM instance == 1 clocksource and 1 clockevent >>> which makes things simpler >>> >> I'm not sure I understood the entire description. As I see it, two STM >> instances should be used to accommodate one clock source, a broadcast >> clock event, and four clock events—one per core. > > What I meant is to do something simpler: > > ----------------- > > cat /proc/interrupts > > 16: 7891 0 0 0 GICv3 56 Level > stm@4011c000 > 17: 0 5326 0 0 GICv3 57 Level > stm@40120000 > 18: 3 0 16668 0 GICv3 58 Level > stm@40124000 > 19: 0 0 0 5152 GICv3 59 Level > stm@40128000 > > ------------------ > > cat /sys/devices/system/clockevents/clockevent*/current_device > > stm@4011c000 > stm@40120000 > stm@40124000 > stm@40128000 > > ------------------ > > cat /sys/devices/system/clocksource/clocksource0/available_clocksource > > stm@4011c000 stm@40120000 stm@40124000 stm@40128000 arch_sys_counter > > > > On the S32G2: 4 STM instances used for one clocksource and one clockevent > > On the S32G3: 8 STM instances used for one clocksource and one clockevent > > > Specific broadcast timer is not needed as the s32g system. > > > The resulting timer driver code is much simpler. > Okay, it makes sense from a complexity standpoint, even though it doubles the number of STM modules used, while keeping the required number of STM modules aligned with the number of cores. > >> E.g. >> STM_0 >> - clocksource (based on CNT) >> - broadcast clock event (channel 0) >> >> STM_1 >> - Cortex-A53 0 (channel 0) >> - Cortex-A53 1 (channel 1) >> - Cortex-A53 2 (channel 2) >> - Cortex-A53 3 (channel 3) >> > >
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 487c85259967..e86e327392af 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -763,4 +763,13 @@ config RALINK_TIMER Enables support for system tick counter present on Ralink SoCs RT3352 and MT7620. +config NXP_STM_TIMER + bool "NXP System Timer Module driver" + depends on ARCH_S32 || COMPILE_TEST + select CLKSRC_MMIO + help + Support for NXP System Timer Module. It will create, in this + order, a clocksource, a broadcast clockevent and a per cpu + clockevent. + endmenu diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 43ef16a4efa6..c3a92e6b9f94 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -92,3 +92,5 @@ obj-$(CONFIG_GXP_TIMER) += timer-gxp.o obj-$(CONFIG_CLKSRC_LOONGSON1_PWM) += timer-loongson1-pwm.o obj-$(CONFIG_EP93XX_TIMER) += timer-ep93xx.o obj-$(CONFIG_RALINK_TIMER) += timer-ralink.o +obj-$(CONFIG_NXP_STM_TIMER) += timer-nxp-stm.o + diff --git a/drivers/clocksource/timer-nxp-stm.c b/drivers/clocksource/timer-nxp-stm.c new file mode 100644 index 000000000000..b67e438487ae --- /dev/null +++ b/drivers/clocksource/timer-nxp-stm.c @@ -0,0 +1,524 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2016 Freescale Semiconductor, Inc. + * Copyright 2018,2021-2025 NXP + * + * NXP System Timer Module: + * + * STM supports commonly required system and application software + * timing functions. STM includes a 32-bit count-up timer and four + * 32-bit compare channels with a separate interrupt source for each + * channel. The timer is driven by the STM module clock divided by an + * 8-bit prescale value (1 to 256). It has ability to stop the timer + * in Debug mode + * + */ +#include <linux/clk.h> +#include <linux/clockchips.h> +#include <linux/cpuhotplug.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/sched_clock.h> + +/* + * Each stm has 4 channels which take 0x10 Bytes register space + */ +#define STM_CHANNEL(n) (0x10 * ((n) + 1)) + +#define STM_CCR 0x00 +#define STM_CCR_CEN BIT(0) + +#define STM_CIR 0x04 +#define STM_CIR_CIF BIT(0) + +#define STM_CMP 0x08 + +#define STM_CR 0x00 +#define STM_CR_TEN BIT(0) +#define STM_CR_FRZ BIT(1) +#define STM_CR_CPS_OFFSET 8u +#define STM_CR_CPS_MASK GENMASK(15, STM_CR_CPS_OFFSET) +#define STM_CR_CPS(x) (((x) << STM_CR_CPS_OFFSET) & STM_CR_CPS_MASK) + +#define STM_CNT 0x04 + +#define STM_ENABLE_MASK (STM_CR_FRZ | STM_CR_TEN) + +struct stm_clocksource { + struct clocksource cs; + int counter; +}; + +struct stm_clockevent { + struct clock_event_device ced; + unsigned long delta; +}; + +struct stm_timer { + void __iomem *base; + unsigned long rate; + union { + struct stm_clocksource scs; + struct stm_clockevent sce; + }; +}; + +static DEFINE_PER_CPU(struct stm_timer *, stm_timers); + +static struct stm_timer *stm_sched_clock; + +/** + * struct stm_instances - a set of counter for the STM initialized + * + * @clocksource: an integer giving the number of initialized clocksource + * @clockevent_per_cpu: an integer giving the number of initialized clockevent per cpu + * @clockevent_broadcast: an integer giving the number of initialized broadcast clockevent + * @features: a set of flag telling what kind of timer to initialize + */ +struct stm_instances { + int clocksource; + int clockevent_per_cpu; + int clockevent_broadcast; + int features; +}; + +#define STM_CLKSRC BIT(0) +#define STM_CLKEVT_PER_CPU BIT(1) +#define STM_CLKEVT_BROADCAST BIT(2) + +static struct stm_clocksource *cs_to_scs(struct clocksource *cs) +{ + return container_of(cs, struct stm_clocksource, cs); +} + +static struct stm_clockevent *ced_to_sced(struct clock_event_device *ced) +{ + return container_of(ced, struct stm_clockevent, ced); +} + +static struct stm_timer *cs_to_stm(struct clocksource *cs) +{ + struct stm_clocksource *scs = cs_to_scs(cs); + + return container_of(scs, struct stm_timer, scs); +} + +static struct stm_timer *ced_to_stm(struct clock_event_device *ced) +{ + struct stm_clockevent *sce = ced_to_sced(ced); + + return container_of(sce, struct stm_timer, sce); +} + +static u64 notrace nxp_stm_read_sched_clock(void) +{ + return readl(stm_sched_clock->base + STM_CNT); +} + +static u32 nxp_stm_clocksource_getcnt(struct stm_timer *stm_timer) +{ + return readl(stm_timer->base + STM_CNT); +} + +static void nxp_stm_clocksource_setcnt(struct stm_timer *stm_timer, u32 cnt) +{ + writel(cnt, stm_timer->base + STM_CNT); +} + +static u64 nxp_stm_clocksource_read(struct clocksource *cs) +{ + struct stm_timer *stm_timer = cs_to_stm(cs); + + return (u64)nxp_stm_clocksource_getcnt(stm_timer); +} + +static int nxp_stm_clocksource_enable(struct clocksource *cs) +{ + struct stm_timer *stm_timer = cs_to_stm(cs); + u32 reg; + + reg = readl(stm_timer->base + STM_CR); + reg &= ~STM_CR_CPS_MASK; + reg |= STM_ENABLE_MASK; + + writel(reg, stm_timer->base + STM_CR); + + return 0; +} + +static void nxp_stm_clocksource_disable(struct clocksource *cs) +{ + struct stm_timer *stm_timer = cs_to_stm(cs); + u32 reg; + + reg = readl(stm_timer->base + STM_CR); + reg &= ~(STM_CR_CPS_MASK | STM_ENABLE_MASK); + + writel(reg, stm_timer->base + STM_CR); +} + +static void nxp_stm_clocksource_suspend(struct clocksource *cs) +{ + struct stm_timer *stm_timer = cs_to_stm(cs); + + nxp_stm_clocksource_disable(cs); + stm_timer->scs.counter = nxp_stm_clocksource_getcnt(stm_timer); +} + +static void nxp_stm_clocksource_resume(struct clocksource *cs) +{ + struct stm_timer *stm_timer = cs_to_stm(cs); + + nxp_stm_clocksource_setcnt(stm_timer, stm_timer->scs.counter); + nxp_stm_clocksource_enable(cs); +} + +static int __init nxp_stm_clocksource_init(struct device *dev, const char *name, + void __iomem *base, struct clk *clk) +{ + struct stm_timer *stm_timer; + int ret; + + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); + if (!stm_timer) + return -ENOMEM; + + stm_timer->base = base; + stm_timer->rate = clk_get_rate(clk); + + stm_timer->scs.cs.name = name; + stm_timer->scs.cs.rating = 460; + stm_timer->scs.cs.read = nxp_stm_clocksource_read; + stm_timer->scs.cs.enable = nxp_stm_clocksource_enable; + stm_timer->scs.cs.disable = nxp_stm_clocksource_disable; + stm_timer->scs.cs.suspend = nxp_stm_clocksource_suspend; + stm_timer->scs.cs.resume = nxp_stm_clocksource_resume; + stm_timer->scs.cs.mask = CLOCKSOURCE_MASK(32); + stm_timer->scs.cs.flags = CLOCK_SOURCE_IS_CONTINUOUS; + + ret = clocksource_register_hz(&stm_timer->scs.cs, stm_timer->rate); + if (ret) + return ret; + + stm_sched_clock = stm_timer; + + sched_clock_register(nxp_stm_read_sched_clock, 32, stm_timer->rate); + + dev_set_drvdata(dev, stm_timer); + + dev_dbg(dev, "Registered clocksource %s\n", name); + + return 0; +} + +static int nxp_stm_clockevent_read_counter(struct stm_timer *stm_timer) +{ + return readl(stm_timer->base + STM_CNT); +} + +static void nxp_stm_clockevent_disable(struct stm_timer *stm_timer) +{ + /* + * The counter is shared between channels and will continue to + * be incremented. If STM_CMP value is too small, the next event can + * be lost if we don't disable the entire module. + * Disabling the entire module, makes STM not suitable as clocksource. + */ + writel(0, stm_timer->base + STM_CR); + writel(0, stm_timer->base + STM_CHANNEL(0) + STM_CCR); +} + +static void nxp_stm_clockevent_enable(struct stm_timer *stm_timer) +{ + u32 reg = readl(stm_timer->base + STM_CR); + + reg &= ~STM_CR_CPS_MASK; + reg |= STM_ENABLE_MASK; + + writel(reg, stm_timer->base + STM_CR); + writel(STM_CCR_CEN, stm_timer->base + STM_CHANNEL(0) + STM_CCR); +} + +static void nxp_stm_clockevent_irq_clr(struct stm_timer *stm_timer) +{ + /* Clear the interrupt */ + writel(STM_CIR_CIF, stm_timer->base + STM_CHANNEL(0) + STM_CIR); +} + +static void nxp_stm_clockevent_irq_ack(struct stm_timer *stm_timer) +{ + u32 val; + + nxp_stm_clockevent_irq_clr(stm_timer); + + /* + * Update STM_CMP value using the counter value + */ + val = nxp_stm_clockevent_read_counter(stm_timer) + stm_timer->sce.delta; + + writel(val, stm_timer->base + STM_CHANNEL(0) + STM_CMP); +} + +static irqreturn_t nxp_stm_clockevent_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *ced = dev_id; + struct stm_timer *stm_timer = ced_to_stm(ced); + + nxp_stm_clockevent_irq_ack(stm_timer); + + /* + * stm hardware doesn't support oneshot, it will generate an + * interrupt and start the counter again so software need to + * disable the timer to stop the counter loop in ONESHOT mode. + */ + if (likely(clockevent_state_oneshot(ced))) + nxp_stm_clockevent_disable(stm_timer); + + ced->event_handler(ced); + + return IRQ_HANDLED; +} + +static int nxp_stm_clockevent_shutdown(struct clock_event_device *ced) +{ + struct stm_timer *stm_timer = ced_to_stm(ced); + + nxp_stm_clockevent_disable(stm_timer); + + return 0; +} + +static int nxp_stm_clockevent_set_next_event(unsigned long delta, struct clock_event_device *ced) +{ + struct stm_timer *stm_timer = ced_to_stm(ced); + u32 val; + + nxp_stm_clockevent_disable(stm_timer); + + stm_timer->sce.delta = delta; + + val = nxp_stm_clockevent_read_counter(stm_timer) + delta; + + writel(val, stm_timer->base + STM_CHANNEL(0) + STM_CMP); + + nxp_stm_clockevent_enable(stm_timer); + + return 0; +} + +static int nxp_stm_clockevent_set_periodic(struct clock_event_device *ced) +{ + struct stm_timer *stm_timer = ced_to_stm(ced); + + return nxp_stm_clockevent_set_next_event(stm_timer->rate, ced); +} + +static int __init nxp_stm_clockevent_broadcast_init(struct device *dev, const char *name, void __iomem *base, + int irq, struct clk *clk) +{ + struct stm_timer *stm_timer; + int ret; + + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); + if (!stm_timer) + return -ENOMEM; + + stm_timer->base = base; + stm_timer->rate = clk_get_rate(clk); + + stm_timer->sce.ced.name = name; + stm_timer->sce.ced.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; + stm_timer->sce.ced.set_state_shutdown = nxp_stm_clockevent_shutdown; + stm_timer->sce.ced.set_state_periodic = nxp_stm_clockevent_set_periodic; + stm_timer->sce.ced.set_next_event = nxp_stm_clockevent_set_next_event; + stm_timer->sce.ced.cpumask = cpu_possible_mask; + stm_timer->sce.ced.rating = 460; + stm_timer->sce.ced.irq = irq; + + nxp_stm_clockevent_irq_clr(stm_timer); + + ret = request_irq(irq, nxp_stm_clockevent_interrupt, + IRQF_TIMER | IRQF_NOBALANCING, name, &stm_timer->sce.ced); + if (ret) { + dev_err(dev, "Unable to allocate interrupt line: %d\n", ret); + return ret; + } + + clockevents_config_and_register(&stm_timer->sce.ced, stm_timer->rate, 1, 0xffffffff); + + dev_dbg(dev, "Registered broadcast clockevent %s irq=%d\n", name, irq); + + return 0; +} + +static int __init nxp_stm_clockevent_per_cpu_init(struct device *dev, const char *name, void __iomem *base, + int irq, struct clk *clk, int cpu) +{ + struct stm_timer *stm_timer; + int ret; + + stm_timer = devm_kzalloc(dev, sizeof(*stm_timer), GFP_KERNEL); + if (!stm_timer) + return -ENOMEM; + + stm_timer->base = base; + stm_timer->rate = clk_get_rate(clk); + + stm_timer->sce.ced.name = name; + stm_timer->sce.ced.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; + stm_timer->sce.ced.set_state_shutdown = nxp_stm_clockevent_shutdown; + stm_timer->sce.ced.set_state_periodic = nxp_stm_clockevent_set_periodic; + stm_timer->sce.ced.set_next_event = nxp_stm_clockevent_set_next_event; + stm_timer->sce.ced.cpumask = cpumask_of(cpu); + stm_timer->sce.ced.rating = 460; + stm_timer->sce.ced.irq = irq; + + nxp_stm_clockevent_irq_clr(stm_timer); + + ret = request_irq(irq, nxp_stm_clockevent_interrupt, + IRQF_TIMER | IRQF_NOBALANCING, name, &stm_timer->sce.ced); + if (ret) { + dev_err(dev, "Unable to allocate interrupt line: %d\n", ret); + return ret; + } + + per_cpu(stm_timers, cpu) = stm_timer; + + dev_dbg(dev, "Initialized per cpu clockevent name=%s, irq=%d, cpu=%d\n", name, irq, cpu); + + return 0; +} + +static int nxp_stm_clockevent_starting_cpu(unsigned int cpu) +{ + struct stm_timer *stm_timer = per_cpu(stm_timers, cpu); + int ret; + + if (WARN_ON(!stm_timer)) + return -EFAULT; + + ret = irq_force_affinity(stm_timer->sce.ced.irq, cpumask_of(cpu)); + if (ret) + return ret; + + clockevents_config_and_register(&stm_timer->sce.ced, stm_timer->rate, 1, 0xffffffff); + + return 0; +} + +static int __init nxp_stm_timer_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct stm_instances *stm_instances; + const char *name = of_node_full_name(np); + void __iomem *base; + int irq, ret; + struct clk *clk; + + stm_instances = (typeof(stm_instances))of_device_get_match_data(dev); + if (!stm_instances) { + dev_err(dev, "No STM instances associated with a cpu"); + return -EINVAL; + } + + base = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(base)) { + dev_err(dev, "Failed to iomap %pOFn\n", np); + return PTR_ERR(base); + } + + irq = irq_of_parse_and_map(np, 0); + if (irq <= 0) { + dev_err(dev, "Failed to parse and map IRQ: %d\n", irq); + return -EINVAL; + } + + clk = devm_clk_get(dev, NULL); + if (IS_ERR(clk)) { + dev_err(dev, "Clock not found\n"); + return PTR_ERR(clk); + } + + ret = clk_prepare_enable(clk); + if (ret) { + dev_err(dev, "Failed to enable STM timer clock: %d\n", ret); + return ret; + } + + if (!stm_instances->clocksource && (stm_instances->features & STM_CLKSRC)) { + + /* + * First probed STM will be a clocksource + */ + ret = nxp_stm_clocksource_init(dev, name, base, clk); + if (ret) + return ret; + stm_instances->clocksource++; + + } else if (!stm_instances->clockevent_broadcast && + (stm_instances->features & STM_CLKEVT_BROADCAST)) { + + /* + * Second probed STM will be a broadcast clockevent + */ + ret = nxp_stm_clockevent_broadcast_init(dev, name, base, irq, clk); + if (ret) + return ret; + stm_instances->clockevent_broadcast++; + + } else if (stm_instances->clockevent_per_cpu < num_possible_cpus() && + (stm_instances->features & STM_CLKEVT_PER_CPU)) { + + /* + * Next probed STM will be a per CPU clockevent, until + * we probe as much as we have CPUs available on the + * system, we do a partial initialization + */ + ret = nxp_stm_clockevent_per_cpu_init(dev, name, base, irq, clk, + stm_instances->clockevent_per_cpu); + if (ret) + return ret; + + stm_instances->clockevent_per_cpu++; + + /* + * The number of probed STM for per CPU clockevent is + * equal to the number of available CPUs on the + * system. We install the cpu hotplug to finish the + * initialization by registering the clockevents + */ + if (stm_instances->clockevent_per_cpu == num_possible_cpus()) { + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "STM timer:starting", + nxp_stm_clockevent_starting_cpu, NULL); + if (ret < 0) + return ret; + } + } + + return 0; +} + +static struct stm_instances s32g_stm_instances = { .features = STM_CLKSRC | STM_CLKEVT_PER_CPU }; + +static const struct of_device_id nxp_stm_of_match[] = { + { .compatible = "nxp,s32g2-stm", &s32g_stm_instances }, + { } +}; +MODULE_DEVICE_TABLE(of, nxp_stm_of_match); + +static struct platform_driver nxp_stm_probe = { + .probe = nxp_stm_timer_probe, + .driver = { + .name = "nxp-stm", + .of_match_table = of_match_ptr(nxp_stm_of_match), + }, +}; +module_platform_driver(nxp_stm_probe); + +MODULE_DESCRIPTION("NXP System Timer Module driver"); +MODULE_LICENSE("GPL");