diff mbox

[2/2] ARM: shmobile: r8a73a4: implement CPU clock scaling for CPUFreq

Message ID Pine.LNX.4.64.1305221111210.12715@axis700.grange (mailing list archive)
State New, archived
Headers show

Commit Message

Guennadi Liakhovetski May 22, 2013, 9:12 a.m. UTC
This patch adds support for the Z-clock on r8a73a4 SoCs, which is driving
the Cortex A15 core, and a "cpufreq-cpu0" platform device. Adding an
"operating-points" property to the CPU0 DT node and a regulator, this
patch allows platforms to use the generic cpufreq-cpu0 driver to use
SoC's DVFS capabilities.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski+renesas@gmail.com>
---
 arch/arm/mach-shmobile/Kconfig         |    2 +
 arch/arm/mach-shmobile/clock-r8a73a4.c |  123 +++++++++++++++++++++++++++++++-
 arch/arm/mach-shmobile/setup-r8a73a4.c |    1 +
 3 files changed, 125 insertions(+), 1 deletions(-)

Comments

Simon Horman June 27, 2013, 7:21 a.m. UTC | #1
On Wed, May 22, 2013 at 11:12:37AM +0200, Guennadi Liakhovetski wrote:
> This patch adds support for the Z-clock on r8a73a4 SoCs, which is driving
> the Cortex A15 core, and a "cpufreq-cpu0" platform device. Adding an
> "operating-points" property to the CPU0 DT node and a regulator, this
> patch allows platforms to use the generic cpufreq-cpu0 driver to use
> SoC's DVFS capabilities.
> 
> Signed-off-by: Guennadi Liakhovetski <g.liakhovetski+renesas@gmail.com>

I have queued this up for v3.12 in the soc branch of the renesas tree.
I have included an Ack from Morimoto-san which he provided here:

"Re: [PATCH 0/4] Add DVFS support on APE6EVM"
http://www.spinics.net/lists/arm-kernel/msg255480.html

> ---
>  arch/arm/mach-shmobile/Kconfig         |    2 +
>  arch/arm/mach-shmobile/clock-r8a73a4.c |  123 +++++++++++++++++++++++++++++++-
>  arch/arm/mach-shmobile/setup-r8a73a4.c |    1 +
>  3 files changed, 125 insertions(+), 1 deletions(-)
> 
> diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
> index 97bc6cf..ec349ad 100644
> --- a/arch/arm/mach-shmobile/Kconfig
> +++ b/arch/arm/mach-shmobile/Kconfig
> @@ -26,6 +26,8 @@ config ARCH_R8A73A4
>  	select ARM_ARCH_TIMER
>  	select SH_CLK_CPG
>  	select RENESAS_IRQC
> +	select ARCH_HAS_CPUFREQ
> +	select ARCH_HAS_OPP
>  
>  config ARCH_R8A7740
>  	bool "R-Mobile A1 (R8A77400)"
> diff --git a/arch/arm/mach-shmobile/clock-r8a73a4.c b/arch/arm/mach-shmobile/clock-r8a73a4.c
> index d5176d0..824789c 100644
> --- a/arch/arm/mach-shmobile/clock-r8a73a4.c
> +++ b/arch/arm/mach-shmobile/clock-r8a73a4.c
> @@ -34,6 +34,7 @@
>  
>  #define FRQCRA		0xE6150000
>  #define FRQCRB		0xE6150004
> +#define FRQCRC		0xE61500E0
>  #define VCLKCR1		0xE6150008
>  #define VCLKCR2		0xE615000C
>  #define VCLKCR3		0xE615001C
> @@ -52,6 +53,7 @@
>  #define HSICKCR		0xE615026C
>  #define M4CKCR		0xE6150098
>  #define PLLECR		0xE61500D0
> +#define PLL0CR		0xE61500D8
>  #define PLL1CR		0xE6150028
>  #define PLL2CR		0xE615002C
>  #define PLL2SCR		0xE61501F4
> @@ -177,6 +179,7 @@ static struct sh_clk_ops pll_clk_ops = {
>  		.mapping	= &cpg_mapping,		\
>  	}
>  
> +PLL_CLOCK(pll0_clk,  &main_clk,      pll_parent_main,      1, 20, PLL0CR,  0);
>  PLL_CLOCK(pll1_clk,  &main_clk,      pll_parent_main,       1, 7, PLL1CR,  1);
>  PLL_CLOCK(pll2_clk,  &main_div2_clk, pll_parent_main_extal, 3, 5, PLL2CR,  2);
>  PLL_CLOCK(pll2s_clk, &main_div2_clk, pll_parent_main_extal, 3, 5, PLL2SCR, 4);
> @@ -184,6 +187,14 @@ PLL_CLOCK(pll2h_clk, &main_div2_clk, pll_parent_main_extal, 3, 5, PLL2HCR, 5);
>  
>  SH_FIXED_RATIO_CLK(pll1_div2_clk,	pll1_clk,	div2);
>  
> +static atomic_t frqcr_lock;
> +
> +/* Several clocks need to access FRQCRB, have to lock */
> +static bool frqcr_kick_check(struct clk *clk)
> +{
> +	return !(ioread32(CPG_MAP(FRQCRB)) & BIT(31));
> +}
> +
>  static int frqcr_kick_do(struct clk *clk)
>  {
>  	int i;
> @@ -199,6 +210,107 @@ static int frqcr_kick_do(struct clk *clk)
>  	return -ETIMEDOUT;
>  }
>  
> +static int zclk_set_rate(struct clk *clk, unsigned long rate)
> +{
> +	void __iomem *frqcrc;
> +	int ret;
> +	unsigned long step, p_rate;
> +	u32 val;
> +
> +	if (!clk->parent || !__clk_get(clk->parent))
> +		return -ENODEV;
> +
> +	if (!atomic_inc_and_test(&frqcr_lock) || !frqcr_kick_check(clk)) {
> +		ret = -EBUSY;
> +		goto done;
> +	}
> +
> +	frqcrc = clk->mapped_reg + (FRQCRC - (u32)clk->enable_reg);
> +
> +	p_rate = clk_get_rate(clk->parent);
> +	if (rate == p_rate) {
> +		val = 0;
> +	} else {
> +		step = DIV_ROUND_CLOSEST(p_rate, 32);
> +		val = 32 - rate / step;
> +	}
> +
> +	iowrite32((ioread32(frqcrc) & ~(clk->div_mask << clk->enable_bit)) |
> +		  (val << clk->enable_bit), frqcrc);
> +
> +	ret = frqcr_kick_do(clk);
> +
> +done:
> +	atomic_dec(&frqcr_lock);
> +	__clk_put(clk->parent);
> +	return ret;
> +}
> +
> +static long zclk_round_rate(struct clk *clk, unsigned long rate)
> +{
> +	/*
> +	 * theoretical rate = parent rate * multiplier / 32,
> +	 * where 1 <= multiplier <= 32. Therefore we should do
> +	 * multiplier = rate * 32 / parent rate
> +	 * rounded rate = parent rate * multiplier / 32.
> +	 * However, multiplication before division won't fit in 32 bits, so
> +	 * we sacrifice some precision by first dividing and then multiplying.
> +	 * To find the nearest divisor we calculate both and pick up the best
> +	 * one. This avoids 64-bit arithmetics.
> +	 */
> +	unsigned long step, mul_min, mul_max, rate_min, rate_max;
> +
> +	rate_max = clk_get_rate(clk->parent);
> +
> +	/* output freq <= parent */
> +	if (rate >= rate_max)
> +		return rate_max;
> +
> +	step = DIV_ROUND_CLOSEST(rate_max, 32);
> +	/* output freq >= parent / 32 */
> +	if (step >= rate)
> +		return step;
> +
> +	mul_min = rate / step;
> +	mul_max = DIV_ROUND_UP(rate, step);
> +	rate_min = step * mul_min;
> +	if (mul_max == mul_min)
> +		return rate_min;
> +
> +	rate_max = step * mul_max;
> +
> +	if (rate_max - rate <  rate - rate_min)
> +		return rate_max;
> +
> +	return rate_min;
> +}
> +
> +static unsigned long zclk_recalc(struct clk *clk)
> +{
> +	void __iomem *frqcrc = FRQCRC - (u32)clk->enable_reg + clk->mapped_reg;
> +	unsigned int max = clk->div_mask + 1;
> +	unsigned long val = ((ioread32(frqcrc) >> clk->enable_bit) &
> +			     clk->div_mask);
> +
> +	return DIV_ROUND_CLOSEST(clk_get_rate(clk->parent), max) *
> +		(max - val);
> +}
> +
> +static struct sh_clk_ops zclk_ops = {
> +	.recalc = zclk_recalc,
> +	.set_rate = zclk_set_rate,
> +	.round_rate = zclk_round_rate,
> +};
> +
> +static struct clk z_clk = {
> +	.parent = &pll0_clk,
> +	.div_mask = 0x1f,
> +	.enable_bit = 8,
> +	/* We'll need to access FRQCRB and FRQCRC */
> +	.enable_reg = (void __iomem *)FRQCRB,
> +	.ops = &zclk_ops,
> +};
> +
>  static struct clk *main_clks[] = {
>  	&extalr_clk,
>  	&extal1_clk,
> @@ -210,17 +322,21 @@ static struct clk *main_clks[] = {
>  	&main_div2_clk,
>  	&fsiack_clk,
>  	&fsibck_clk,
> +	&pll0_clk,
>  	&pll1_clk,
>  	&pll1_div2_clk,
>  	&pll2_clk,
>  	&pll2s_clk,
>  	&pll2h_clk,
> +	&z_clk,
>  };
>  
>  /* DIV4 */
>  static void div4_kick(struct clk *clk)
>  {
> -	frqcr_kick_do(clk);
> +	if (!WARN(!atomic_inc_and_test(&frqcr_lock), "FRQCR* lock broken!\n"))
> +		frqcr_kick_do(clk);
> +	atomic_dec(&frqcr_lock);
>  }
>  
>  static int divisors[] = { 2, 3, 4, 6, 8, 12, 16, 18, 24, 0, 36, 48, 10};
> @@ -396,6 +512,9 @@ static struct clk_lookup lookups[] = {
>  	CLKDEV_CON_ID("pll2s",			&pll2s_clk),
>  	CLKDEV_CON_ID("pll2h",			&pll2h_clk),
>  
> +	/* CPU clock */
> +	CLKDEV_DEV_ID("cpufreq-cpu0",		&z_clk),
> +
>  	/* DIV6 */
>  	CLKDEV_CON_ID("zb",			&div6_clks[DIV6_ZB]),
>  	CLKDEV_CON_ID("vck1",			&div6_clks[DIV6_VCK1]),
> @@ -439,6 +558,8 @@ void __init r8a73a4_clock_init(void)
>  	int k, ret = 0;
>  	u32 ckscr;
>  
> +	atomic_set(&frqcr_lock, -1);
> +
>  	reg = ioremap_nocache(CKSCR, PAGE_SIZE);
>  	BUG_ON(!reg);
>  	ckscr = ioread32(reg);
> diff --git a/arch/arm/mach-shmobile/setup-r8a73a4.c b/arch/arm/mach-shmobile/setup-r8a73a4.c
> index c5a75a7..7265e14 100644
> --- a/arch/arm/mach-shmobile/setup-r8a73a4.c
> +++ b/arch/arm/mach-shmobile/setup-r8a73a4.c
> @@ -185,6 +185,7 @@ void __init r8a73a4_add_standard_devices(void)
>  #ifdef CONFIG_USE_OF
>  void __init r8a73a4_add_standard_devices_dt(void)
>  {
> +	platform_device_register_simple("cpufreq-cpu0", -1, NULL, 0);
>  	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
>  }
>  
> -- 
> 1.7.2.5
>
diff mbox

Patch

diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index 97bc6cf..ec349ad 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -26,6 +26,8 @@  config ARCH_R8A73A4
 	select ARM_ARCH_TIMER
 	select SH_CLK_CPG
 	select RENESAS_IRQC
+	select ARCH_HAS_CPUFREQ
+	select ARCH_HAS_OPP
 
 config ARCH_R8A7740
 	bool "R-Mobile A1 (R8A77400)"
diff --git a/arch/arm/mach-shmobile/clock-r8a73a4.c b/arch/arm/mach-shmobile/clock-r8a73a4.c
index d5176d0..824789c 100644
--- a/arch/arm/mach-shmobile/clock-r8a73a4.c
+++ b/arch/arm/mach-shmobile/clock-r8a73a4.c
@@ -34,6 +34,7 @@ 
 
 #define FRQCRA		0xE6150000
 #define FRQCRB		0xE6150004
+#define FRQCRC		0xE61500E0
 #define VCLKCR1		0xE6150008
 #define VCLKCR2		0xE615000C
 #define VCLKCR3		0xE615001C
@@ -52,6 +53,7 @@ 
 #define HSICKCR		0xE615026C
 #define M4CKCR		0xE6150098
 #define PLLECR		0xE61500D0
+#define PLL0CR		0xE61500D8
 #define PLL1CR		0xE6150028
 #define PLL2CR		0xE615002C
 #define PLL2SCR		0xE61501F4
@@ -177,6 +179,7 @@  static struct sh_clk_ops pll_clk_ops = {
 		.mapping	= &cpg_mapping,		\
 	}
 
+PLL_CLOCK(pll0_clk,  &main_clk,      pll_parent_main,      1, 20, PLL0CR,  0);
 PLL_CLOCK(pll1_clk,  &main_clk,      pll_parent_main,       1, 7, PLL1CR,  1);
 PLL_CLOCK(pll2_clk,  &main_div2_clk, pll_parent_main_extal, 3, 5, PLL2CR,  2);
 PLL_CLOCK(pll2s_clk, &main_div2_clk, pll_parent_main_extal, 3, 5, PLL2SCR, 4);
@@ -184,6 +187,14 @@  PLL_CLOCK(pll2h_clk, &main_div2_clk, pll_parent_main_extal, 3, 5, PLL2HCR, 5);
 
 SH_FIXED_RATIO_CLK(pll1_div2_clk,	pll1_clk,	div2);
 
+static atomic_t frqcr_lock;
+
+/* Several clocks need to access FRQCRB, have to lock */
+static bool frqcr_kick_check(struct clk *clk)
+{
+	return !(ioread32(CPG_MAP(FRQCRB)) & BIT(31));
+}
+
 static int frqcr_kick_do(struct clk *clk)
 {
 	int i;
@@ -199,6 +210,107 @@  static int frqcr_kick_do(struct clk *clk)
 	return -ETIMEDOUT;
 }
 
+static int zclk_set_rate(struct clk *clk, unsigned long rate)
+{
+	void __iomem *frqcrc;
+	int ret;
+	unsigned long step, p_rate;
+	u32 val;
+
+	if (!clk->parent || !__clk_get(clk->parent))
+		return -ENODEV;
+
+	if (!atomic_inc_and_test(&frqcr_lock) || !frqcr_kick_check(clk)) {
+		ret = -EBUSY;
+		goto done;
+	}
+
+	frqcrc = clk->mapped_reg + (FRQCRC - (u32)clk->enable_reg);
+
+	p_rate = clk_get_rate(clk->parent);
+	if (rate == p_rate) {
+		val = 0;
+	} else {
+		step = DIV_ROUND_CLOSEST(p_rate, 32);
+		val = 32 - rate / step;
+	}
+
+	iowrite32((ioread32(frqcrc) & ~(clk->div_mask << clk->enable_bit)) |
+		  (val << clk->enable_bit), frqcrc);
+
+	ret = frqcr_kick_do(clk);
+
+done:
+	atomic_dec(&frqcr_lock);
+	__clk_put(clk->parent);
+	return ret;
+}
+
+static long zclk_round_rate(struct clk *clk, unsigned long rate)
+{
+	/*
+	 * theoretical rate = parent rate * multiplier / 32,
+	 * where 1 <= multiplier <= 32. Therefore we should do
+	 * multiplier = rate * 32 / parent rate
+	 * rounded rate = parent rate * multiplier / 32.
+	 * However, multiplication before division won't fit in 32 bits, so
+	 * we sacrifice some precision by first dividing and then multiplying.
+	 * To find the nearest divisor we calculate both and pick up the best
+	 * one. This avoids 64-bit arithmetics.
+	 */
+	unsigned long step, mul_min, mul_max, rate_min, rate_max;
+
+	rate_max = clk_get_rate(clk->parent);
+
+	/* output freq <= parent */
+	if (rate >= rate_max)
+		return rate_max;
+
+	step = DIV_ROUND_CLOSEST(rate_max, 32);
+	/* output freq >= parent / 32 */
+	if (step >= rate)
+		return step;
+
+	mul_min = rate / step;
+	mul_max = DIV_ROUND_UP(rate, step);
+	rate_min = step * mul_min;
+	if (mul_max == mul_min)
+		return rate_min;
+
+	rate_max = step * mul_max;
+
+	if (rate_max - rate <  rate - rate_min)
+		return rate_max;
+
+	return rate_min;
+}
+
+static unsigned long zclk_recalc(struct clk *clk)
+{
+	void __iomem *frqcrc = FRQCRC - (u32)clk->enable_reg + clk->mapped_reg;
+	unsigned int max = clk->div_mask + 1;
+	unsigned long val = ((ioread32(frqcrc) >> clk->enable_bit) &
+			     clk->div_mask);
+
+	return DIV_ROUND_CLOSEST(clk_get_rate(clk->parent), max) *
+		(max - val);
+}
+
+static struct sh_clk_ops zclk_ops = {
+	.recalc = zclk_recalc,
+	.set_rate = zclk_set_rate,
+	.round_rate = zclk_round_rate,
+};
+
+static struct clk z_clk = {
+	.parent = &pll0_clk,
+	.div_mask = 0x1f,
+	.enable_bit = 8,
+	/* We'll need to access FRQCRB and FRQCRC */
+	.enable_reg = (void __iomem *)FRQCRB,
+	.ops = &zclk_ops,
+};
+
 static struct clk *main_clks[] = {
 	&extalr_clk,
 	&extal1_clk,
@@ -210,17 +322,21 @@  static struct clk *main_clks[] = {
 	&main_div2_clk,
 	&fsiack_clk,
 	&fsibck_clk,
+	&pll0_clk,
 	&pll1_clk,
 	&pll1_div2_clk,
 	&pll2_clk,
 	&pll2s_clk,
 	&pll2h_clk,
+	&z_clk,
 };
 
 /* DIV4 */
 static void div4_kick(struct clk *clk)
 {
-	frqcr_kick_do(clk);
+	if (!WARN(!atomic_inc_and_test(&frqcr_lock), "FRQCR* lock broken!\n"))
+		frqcr_kick_do(clk);
+	atomic_dec(&frqcr_lock);
 }
 
 static int divisors[] = { 2, 3, 4, 6, 8, 12, 16, 18, 24, 0, 36, 48, 10};
@@ -396,6 +512,9 @@  static struct clk_lookup lookups[] = {
 	CLKDEV_CON_ID("pll2s",			&pll2s_clk),
 	CLKDEV_CON_ID("pll2h",			&pll2h_clk),
 
+	/* CPU clock */
+	CLKDEV_DEV_ID("cpufreq-cpu0",		&z_clk),
+
 	/* DIV6 */
 	CLKDEV_CON_ID("zb",			&div6_clks[DIV6_ZB]),
 	CLKDEV_CON_ID("vck1",			&div6_clks[DIV6_VCK1]),
@@ -439,6 +558,8 @@  void __init r8a73a4_clock_init(void)
 	int k, ret = 0;
 	u32 ckscr;
 
+	atomic_set(&frqcr_lock, -1);
+
 	reg = ioremap_nocache(CKSCR, PAGE_SIZE);
 	BUG_ON(!reg);
 	ckscr = ioread32(reg);
diff --git a/arch/arm/mach-shmobile/setup-r8a73a4.c b/arch/arm/mach-shmobile/setup-r8a73a4.c
index c5a75a7..7265e14 100644
--- a/arch/arm/mach-shmobile/setup-r8a73a4.c
+++ b/arch/arm/mach-shmobile/setup-r8a73a4.c
@@ -185,6 +185,7 @@  void __init r8a73a4_add_standard_devices(void)
 #ifdef CONFIG_USE_OF
 void __init r8a73a4_add_standard_devices_dt(void)
 {
+	platform_device_register_simple("cpufreq-cpu0", -1, NULL, 0);
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }