diff mbox

drm/pl111: Register the clock divider and use it.

Message ID 20170424194527.19426-1-eric@anholt.net (mailing list archive)
State New, archived
Headers show

Commit Message

Eric Anholt April 24, 2017, 7:45 p.m. UTC
This is required for the panel to work on bcm911360, where CLCDCLK is
the fixed 200Mhz AXI41 clock.  The rate set is still passed up to the
CLCDCLK, for platforms that have a settable rate on that one.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/pl111/pl111_display.c | 161 ++++++++++++++++++++++++++++++----
 drivers/gpu/drm/pl111/pl111_drm.h     |   8 ++
 drivers/gpu/drm/pl111/pl111_drv.c     |  11 +--
 include/linux/amba/clcd-regs.h        |   5 ++
 4 files changed, 161 insertions(+), 24 deletions(-)

Comments

Linus Walleij April 26, 2017, 2:23 p.m. UTC | #1
On Mon, Apr 24, 2017 at 9:45 PM, Eric Anholt <eric@anholt.net> wrote:

> This is required for the panel to work on bcm911360, where CLCDCLK is
> the fixed 200Mhz AXI41 clock.  The rate set is still passed up to the
> CLCDCLK, for platforms that have a settable rate on that one.
>
> Signed-off-by: Eric Anholt <eric@anholt.net>

I like this, it is pretty.

> +static int pl111_clk_div_choose_div(struct clk_hw *hw, unsigned long rate,
> +                                   unsigned long *prate, bool set_parent)
> +{
> +       int best_div = 1, div;
> +       struct clk_hw *parent = clk_hw_get_parent(hw);
> +       unsigned long best_prate = 0;
> +       unsigned long best_diff = ~0ul;
> +       int max_div = (1 << (TIM2_PCD_LO_BITS + TIM2_PCD_HI_BITS)) - 1;
> +
> +       for (div = 1; div < max_div; div++) {
> +               unsigned long this_prate, div_rate, diff;
> +
> +               if (set_parent)
> +                       this_prate = clk_hw_round_rate(parent, rate * div);
> +               else
> +                       this_prate = *prate;
> +               div_rate = DIV_ROUND_UP_ULL(this_prate, div);
> +               diff = abs(rate - div_rate);
> +
> +               if (diff < best_diff) {
> +                       best_div = div;
> +                       best_diff = diff;
> +                       best_prate = this_prate;
> +               }
> +       }
> +
> +       *prate = best_prate;
> +       return best_div;
> +}

So since this will choose a divisor using clk_hw_round_rate() on the parent
if we can set the rate of the parent...

> +static long pl111_clk_div_round_rate(struct clk_hw *hw, unsigned long rate,
> +                                    unsigned long *prate)
> +{
> +       int div = pl111_clk_div_choose_div(hw, rate, prate, true);

...which we seem to assume that we can, actually why do you pass
this parameter set_parent at all? It is always true in this code.

You should not need to know whether we can set the rate of the
parent or not: clk_hw_round_rate() will simply return the fixed
rate anyway.

And if the divider can make the set rate even more precise:
all the better!

So I think the parameter can go.

> +static int pl111_clk_div_set_rate(struct clk_hw *hw, unsigned long rate,
> +                                 unsigned long prate)
> +{
> +       struct pl111_drm_dev_private *priv =
> +               container_of(hw, struct pl111_drm_dev_private, clk_div);
> +       int div = pl111_clk_div_choose_div(hw, rate, &prate, false);
> +       u32 tim2;
> +
> +       spin_lock(&priv->tim2_lock);
> +       tim2 = readl(priv->regs + CLCD_TIM2);
> +       tim2 &= ~(TIM2_BCD | TIM2_PCD_LO_MASK | TIM2_PCD_HI_MASK);
> +
> +       if (div == 1) {
> +               tim2 |= TIM2_BCD;
> +       } else {
> +               div -= 2;
> +               tim2 |= div & TIM2_PCD_LO_MASK;
> +               tim2 |= (div >> TIM2_PCD_LO_BITS) << TIM2_PCD_HI_SHIFT;
> +       }
> +
> +       writel(tim2, priv->regs + CLCD_TIM2);
> +       spin_unlock(&priv->tim2_lock);
> +
> +       return 0;
> +}

So this will write the divisor, which is nice. But what if we also need
to change the rate of the parent?

> +static int
> +pl111_init_clock_divider(struct drm_device *drm)
> +{
> +       struct pl111_drm_dev_private *priv = drm->dev_private;
> +       struct clk *parent = devm_clk_get(drm->dev, "clcdclk");
> +       struct clk_hw *div = &priv->clk_div;
> +       const char *parent_name;
> +       struct clk_init_data init = {
> +               .name = "pl111_div",
> +               .ops = &pl111_clk_div_ops,
> +               .parent_names = &parent_name,
> +               .num_parents = 1,
> +       };

I think it is necessary to set .flags in the init data to
.flags = CLK_SET_RATE_PARENT,
for this code to work with a parent that can change rate.

>         void *regs;
> +       /* The pixel clock (a reference to our clock divider off of CLCDCLK). */
>         struct clk *clk;

Maybe we should simply rename it pixclk so it is clear what it's for,
but that can be a separate patch.

> - * - Use the internal clock divisor to reduce power consumption by
> - *   using HCLK (apb_pclk) when appropriate.
> + * - Use the CLKSEL bit to support switching between the two external
> + *   clock parents.

OK so that remains to be done. We discussed this previously
so I got a bit confused. The divisor code seems fine, after this
we only need some more code to choose the best parent for
the divided clock.

It seems that what would pe Perfect(TM) would be to calculate the
best end result using clock A and the best end result using clock B,
both utilizing the divisor, and then choose the best of those two.

I think struct clk_mux is supposed to do that so it would eventually
be:

CLK A -|\_ clk_mux --> clk_divider --> pixel clock
CLK B -|/

Yours,
Linus Walleij
Eric Anholt April 26, 2017, 4:54 p.m. UTC | #2
Linus Walleij <linus.walleij@linaro.org> writes:

> On Mon, Apr 24, 2017 at 9:45 PM, Eric Anholt <eric@anholt.net> wrote:
>> +static long pl111_clk_div_round_rate(struct clk_hw *hw, unsigned long rate,
>> +                                    unsigned long *prate)
>> +{
>> +       int div = pl111_clk_div_choose_div(hw, rate, prate, true);
>
> ...which we seem to assume that we can, actually why do you pass
> this parameter set_parent at all? It is always true in this code.

Because the other caller just below passes false: When we're being asked
to set_rate, the parent rate has been set by the core and we just need
to choose our best divider given that.

>> +static int pl111_clk_div_set_rate(struct clk_hw *hw, unsigned long rate,
>> +                                 unsigned long prate)
>> +{
>> +       struct pl111_drm_dev_private *priv =
>> +               container_of(hw, struct pl111_drm_dev_private, clk_div);
>> +       int div = pl111_clk_div_choose_div(hw, rate, &prate, false);
>> +       u32 tim2;
>> +
>> +       spin_lock(&priv->tim2_lock);
>> +       tim2 = readl(priv->regs + CLCD_TIM2);
>> +       tim2 &= ~(TIM2_BCD | TIM2_PCD_LO_MASK | TIM2_PCD_HI_MASK);
>> +
>> +       if (div == 1) {
>> +               tim2 |= TIM2_BCD;
>> +       } else {
>> +               div -= 2;
>> +               tim2 |= div & TIM2_PCD_LO_MASK;
>> +               tim2 |= (div >> TIM2_PCD_LO_BITS) << TIM2_PCD_HI_SHIFT;
>> +       }
>> +
>> +       writel(tim2, priv->regs + CLCD_TIM2);
>> +       spin_unlock(&priv->tim2_lock);
>> +
>> +       return 0;
>> +}
>
> So this will write the divisor, which is nice. But what if we also need
> to change the rate of the parent?

The clk core will have already done that.

>> +static int
>> +pl111_init_clock_divider(struct drm_device *drm)
>> +{
>> +       struct pl111_drm_dev_private *priv = drm->dev_private;
>> +       struct clk *parent = devm_clk_get(drm->dev, "clcdclk");
>> +       struct clk_hw *div = &priv->clk_div;
>> +       const char *parent_name;
>> +       struct clk_init_data init = {
>> +               .name = "pl111_div",
>> +               .ops = &pl111_clk_div_ops,
>> +               .parent_names = &parent_name,
>> +               .num_parents = 1,
>> +       };
>
> I think it is necessary to set .flags in the init data to
> .flags = CLK_SET_RATE_PARENT,
> for this code to work with a parent that can change rate.

I was thinking this flag was used internally in things like
clk-divider.c, but the core uses it too.  I'll fix that, thanks!

>> - * - Use the internal clock divisor to reduce power consumption by
>> - *   using HCLK (apb_pclk) when appropriate.
>> + * - Use the CLKSEL bit to support switching between the two external
>> + *   clock parents.
>
> OK so that remains to be done. We discussed this previously
> so I got a bit confused. The divisor code seems fine, after this
> we only need some more code to choose the best parent for
> the divided clock.
>
> It seems that what would pe Perfect(TM) would be to calculate the
> best end result using clock A and the best end result using clock B,
> both utilizing the divisor, and then choose the best of those two.
>
> I think struct clk_mux is supposed to do that so it would eventually
> be:
>
> CLK A -|\_ clk_mux --> clk_divider --> pixel clock
> CLK B -|/

I agree.  This patch got things going for this platform, without needing
the bindings change (and helped confirm for me that I do understand the
platform design correctly).
Eric Anholt April 26, 2017, 5:01 p.m. UTC | #3
Linus Walleij <linus.walleij@linaro.org> writes:

> On Mon, Apr 24, 2017 at 9:45 PM, Eric Anholt <eric@anholt.net> wrote:
>
>> This is required for the panel to work on bcm911360, where CLCDCLK is
>> the fixed 200Mhz AXI41 clock.  The rate set is still passed up to the
>> CLCDCLK, for platforms that have a settable rate on that one.
>>
>> Signed-off-by: Eric Anholt <eric@anholt.net>
>
> I like this, it is pretty.

An aside, for anyone else considering common clk and thinking "this is a
bit more complicated than I need": It's really nice for platform
debugging to have your divider or mux or whatever show up in
/debug/clk/clk_summary.  I don't know how many times I've been saved by
diffing that file between a good and bad state of my platform.
diff mbox

Patch

diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c
index 39a5c33bce7d..cf674d57465f 100644
--- a/drivers/gpu/drm/pl111/pl111_display.c
+++ b/drivers/gpu/drm/pl111/pl111_display.c
@@ -108,7 +108,7 @@  static void pl111_display_enable(struct drm_simple_display_pipe *pipe,
 	u32 cntl;
 	u32 ppl, hsw, hfp, hbp;
 	u32 lpp, vsw, vfp, vbp;
-	u32 cpl;
+	u32 cpl, tim2;
 	int ret;
 
 	ret = clk_set_rate(priv->clk, mode->clock * 1000);
@@ -142,20 +142,28 @@  static void pl111_display_enable(struct drm_simple_display_pipe *pipe,
 	       (vfp << 16) |
 	       (vbp << 24),
 	       priv->regs + CLCD_TIM1);
-	/* XXX: We currently always use CLCDCLK with no divisor.  We
-	 * could probably reduce power consumption by using HCLK
-	 * (apb_pclk) with a divisor when it gets us near our target
-	 * pixel clock.
-	 */
-	writel(((mode->flags & DRM_MODE_FLAG_NHSYNC) ? TIM2_IHS : 0) |
-	       ((mode->flags & DRM_MODE_FLAG_NVSYNC) ? TIM2_IVS : 0) |
-	       ((connector->display_info.bus_flags &
-		 DRM_BUS_FLAG_DE_LOW) ? TIM2_IOE : 0) |
-	       ((connector->display_info.bus_flags &
-		 DRM_BUS_FLAG_PIXDATA_NEGEDGE) ? TIM2_IPC : 0) |
-	       TIM2_BCD |
-	       (cpl << 16),
-	       priv->regs + CLCD_TIM2);
+
+	spin_lock(&priv->tim2_lock);
+
+	tim2 = readl(priv->regs + CLCD_TIM2);
+	tim2 &= (TIM2_BCD | TIM2_PCD_LO_MASK | TIM2_PCD_HI_MASK);
+
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		tim2 |= TIM2_IHS;
+
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		tim2 |= TIM2_IVS;
+
+	if (connector->display_info.bus_flags & DRM_BUS_FLAG_DE_LOW)
+		tim2 |= TIM2_IOE;
+
+	if (connector->display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
+		tim2 |= TIM2_IPC;
+
+	tim2 |= cpl << 16;
+	writel(tim2, priv->regs + CLCD_TIM2);
+	spin_unlock(&priv->tim2_lock);
+
 	writel(0, priv->regs + CLCD_TIM3);
 
 	drm_panel_prepare(priv->connector.panel);
@@ -288,6 +296,125 @@  const struct drm_simple_display_pipe_funcs pl111_display_funcs = {
 	.prepare_fb = pl111_display_prepare_fb,
 };
 
+static int pl111_clk_div_choose_div(struct clk_hw *hw, unsigned long rate,
+				    unsigned long *prate, bool set_parent)
+{
+	int best_div = 1, div;
+	struct clk_hw *parent = clk_hw_get_parent(hw);
+	unsigned long best_prate = 0;
+	unsigned long best_diff = ~0ul;
+	int max_div = (1 << (TIM2_PCD_LO_BITS + TIM2_PCD_HI_BITS)) - 1;
+
+	for (div = 1; div < max_div; div++) {
+		unsigned long this_prate, div_rate, diff;
+
+		if (set_parent)
+			this_prate = clk_hw_round_rate(parent, rate * div);
+		else
+			this_prate = *prate;
+		div_rate = DIV_ROUND_UP_ULL(this_prate, div);
+		diff = abs(rate - div_rate);
+
+		if (diff < best_diff) {
+			best_div = div;
+			best_diff = diff;
+			best_prate = this_prate;
+		}
+	}
+
+	*prate = best_prate;
+	return best_div;
+}
+
+static long pl111_clk_div_round_rate(struct clk_hw *hw, unsigned long rate,
+				     unsigned long *prate)
+{
+	int div = pl111_clk_div_choose_div(hw, rate, prate, true);
+
+	return DIV_ROUND_UP_ULL(*prate, div);
+}
+
+static unsigned long pl111_clk_div_recalc_rate(struct clk_hw *hw,
+					       unsigned long prate)
+{
+	struct pl111_drm_dev_private *priv =
+		container_of(hw, struct pl111_drm_dev_private, clk_div);
+	u32 tim2 = readl(priv->regs + CLCD_TIM2);
+	int div;
+
+	if (tim2 & TIM2_BCD)
+		return prate;
+
+	div = tim2 & TIM2_PCD_LO_MASK;
+	div |= (tim2 & TIM2_PCD_HI_MASK) >>
+		(TIM2_PCD_HI_SHIFT - TIM2_PCD_LO_BITS);
+	div += 2;
+
+	return DIV_ROUND_UP_ULL(prate, div);
+}
+
+static int pl111_clk_div_set_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long prate)
+{
+	struct pl111_drm_dev_private *priv =
+		container_of(hw, struct pl111_drm_dev_private, clk_div);
+	int div = pl111_clk_div_choose_div(hw, rate, &prate, false);
+	u32 tim2;
+
+	spin_lock(&priv->tim2_lock);
+	tim2 = readl(priv->regs + CLCD_TIM2);
+	tim2 &= ~(TIM2_BCD | TIM2_PCD_LO_MASK | TIM2_PCD_HI_MASK);
+
+	if (div == 1) {
+		tim2 |= TIM2_BCD;
+	} else {
+		div -= 2;
+		tim2 |= div & TIM2_PCD_LO_MASK;
+		tim2 |= (div >> TIM2_PCD_LO_BITS) << TIM2_PCD_HI_SHIFT;
+	}
+
+	writel(tim2, priv->regs + CLCD_TIM2);
+	spin_unlock(&priv->tim2_lock);
+
+	return 0;
+}
+
+const struct clk_ops pl111_clk_div_ops = {
+	.recalc_rate = pl111_clk_div_recalc_rate,
+	.round_rate = pl111_clk_div_round_rate,
+	.set_rate = pl111_clk_div_set_rate,
+};
+
+static int
+pl111_init_clock_divider(struct drm_device *drm)
+{
+	struct pl111_drm_dev_private *priv = drm->dev_private;
+	struct clk *parent = devm_clk_get(drm->dev, "clcdclk");
+	struct clk_hw *div = &priv->clk_div;
+	const char *parent_name;
+	struct clk_init_data init = {
+		.name = "pl111_div",
+		.ops = &pl111_clk_div_ops,
+		.parent_names = &parent_name,
+		.num_parents = 1,
+	};
+	int ret;
+
+	if (IS_ERR(parent)) {
+		dev_err(drm->dev, "CLCD: unable to get clcdclk.\n");
+		return PTR_ERR(parent);
+	}
+	parent_name = __clk_get_name(parent);
+
+	spin_lock_init(&priv->tim2_lock);
+	div->init = &init;
+
+	ret = devm_clk_hw_register(drm->dev, div);
+
+	priv->clk = div->clk;
+	return ret;
+}
+
 int pl111_display_init(struct drm_device *drm)
 {
 	struct pl111_drm_dev_private *priv = drm->dev_private;
@@ -333,6 +460,10 @@  int pl111_display_init(struct drm_device *drm)
 		return -EINVAL;
 	}
 
+	ret = pl111_init_clock_divider(drm);
+	if (ret)
+		return ret;
+
 	ret = drm_simple_display_pipe_init(drm, &priv->pipe,
 					   &pl111_display_funcs,
 					   formats, ARRAY_SIZE(formats),
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
index f381593921b7..4162c6aa5dbb 100644
--- a/drivers/gpu/drm/pl111/pl111_drm.h
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -21,6 +21,7 @@ 
 
 #include <drm/drm_gem.h>
 #include <drm/drm_simple_kms_helper.h>
+#include <linux/clk-provider.h>
 
 #define CLCD_IRQ_NEXTBASE_UPDATE BIT(2)
 
@@ -37,7 +38,14 @@  struct pl111_drm_dev_private {
 	struct drm_fbdev_cma *fbdev;
 
 	void *regs;
+	/* The pixel clock (a reference to our clock divider off of CLCDCLK). */
 	struct clk *clk;
+	/* pl111's internal clock divider. */
+	struct clk_hw clk_div;
+	/* Lock to sync access to CLCD_TIM2 between the common clock
+	 * subsystem and pl111_display_enable().
+	 */
+	spinlock_t tim2_lock;
 };
 
 #define to_pl111_connector(x) \
diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c
index 936403f65508..9d1467492cb9 100644
--- a/drivers/gpu/drm/pl111/pl111_drv.c
+++ b/drivers/gpu/drm/pl111/pl111_drv.c
@@ -50,8 +50,8 @@ 
  * - Read back hardware state at boot to skip reprogramming the
  *   hardware when doing a no-op modeset.
  *
- * - Use the internal clock divisor to reduce power consumption by
- *   using HCLK (apb_pclk) when appropriate.
+ * - Use the CLKSEL bit to support switching between the two external
+ *   clock parents.
  */
 
 #include <linux/amba/bus.h>
@@ -195,13 +195,6 @@  static int pl111_amba_probe(struct amba_device *amba_dev,
 	priv->drm = drm;
 	drm->dev_private = priv;
 
-	priv->clk = devm_clk_get(dev, "clcdclk");
-	if (IS_ERR(priv->clk)) {
-		dev_err(dev, "CLCD: unable to get clk.\n");
-		ret = PTR_ERR(priv->clk);
-		goto dev_unref;
-	}
-
 	priv->regs = devm_ioremap_resource(dev, &amba_dev->res);
 	if (!priv->regs) {
 		dev_err(dev, "%s failed mmio\n", __func__);
diff --git a/include/linux/amba/clcd-regs.h b/include/linux/amba/clcd-regs.h
index 69c0e2143003..516a6fda83c5 100644
--- a/include/linux/amba/clcd-regs.h
+++ b/include/linux/amba/clcd-regs.h
@@ -39,12 +39,17 @@ 
 #define CLCD_PALL 		0x00000200
 #define CLCD_PALETTE		0x00000200
 
+#define TIM2_PCD_LO_MASK	GENMASK(4, 0)
+#define TIM2_PCD_LO_BITS	5
 #define TIM2_CLKSEL		(1 << 5)
 #define TIM2_IVS		(1 << 11)
 #define TIM2_IHS		(1 << 12)
 #define TIM2_IPC		(1 << 13)
 #define TIM2_IOE		(1 << 14)
 #define TIM2_BCD		(1 << 26)
+#define TIM2_PCD_HI_MASK	GENMASK(31, 27)
+#define TIM2_PCD_HI_BITS	5
+#define TIM2_PCD_HI_SHIFT	27
 
 #define CNTL_LCDEN		(1 << 0)
 #define CNTL_LCDBPP1		(0 << 1)