diff mbox series

[v2,1/3] drm/rockchip: dsi: Hold pm-runtime across bind/unbind

Message ID 20210927105928.v2.1.Ic2904d37f30013a7f3d8476203ad3733c186827e@changeid (mailing list archive)
State New, archived
Headers show
Series Fix Rockchip MIPI DSI display init timeouts | expand

Commit Message

Brian Norris Sept. 27, 2021, 5:59 p.m. UTC
In commit 43c2de1002d2, we moved most HW configuration to bind(), but we
didn't move the runtime PM management. Therefore, depending on initial
boot state, runtime-PM workqueue delays, and other timing factors, we
may disable our power domain in between the hardware configuration
(bind()) and when we enable the display. This can cause us to lose
hardware state and fail to configure our display. For example:

  dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
  panel-innolux-p079zca ff960000.mipi.0: failed to write command 0

or:

  dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
  panel-kingdisplay-kd097d04 ff960000.mipi.0: failed write init cmds: -110

We should match the runtime PM to the lifetime of the bind()/unbind()
cycle.

Tested on Acer Chrometab 10 (RK3399 Gru-Scarlet), with panel drivers
built either as modules or built-in.

Side notes: it seems one is more likely to see this problem when the
panel driver is built into the kernel. I've also seen this problem
bisect down to commits that simply changed Kconfig dependencies, because
it changed the order in which driver init functions were compiled into
the kernel, and therefore the ordering and timing of built-in device
probe.

Fixes: 43c2de1002d2 ("drm/rockchip: dsi: move all lane config except LCDC mux to bind()")
Link: https://lore.kernel.org/linux-rockchip/9aedfb528600ecf871885f7293ca4207c84d16c1.camel@gmail.com/
Reported-by: <aleksandr.o.makarov@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
---

Changes in v2:
- Clean up pm-runtime state in error cases.
- Correct git hash for Fixes.

 .../gpu/drm/rockchip/dw-mipi-dsi-rockchip.c   | 37 ++++++++++---------
 1 file changed, 19 insertions(+), 18 deletions(-)

Comments

Tom Hebb Sept. 27, 2021, 7:07 p.m. UTC | #1
Reviewed-by: Thomas Hebb <tommyhebb@gmail.com>

Thank you for catching this, and sorry that my original fix broke things.
There had actually been a report of this breakage from my patch, but I
missed that email until it had already been merged and then didn't have
time to follow up on it. Totally my bad.

On Mon, Sep 27, 2021 at 11:00 AM Brian Norris <briannorris@chromium.org>
wrote:

> In commit 43c2de1002d2, we moved most HW configuration to bind(), but we
> didn't move the runtime PM management. Therefore, depending on initial
> boot state, runtime-PM workqueue delays, and other timing factors, we
> may disable our power domain in between the hardware configuration
> (bind()) and when we enable the display. This can cause us to lose
> hardware state and fail to configure our display. For example:
>
>   dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
>   panel-innolux-p079zca ff960000.mipi.0: failed to write command 0
>
> or:
>
>   dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
>   panel-kingdisplay-kd097d04 ff960000.mipi.0: failed write init cmds: -110
>
> We should match the runtime PM to the lifetime of the bind()/unbind()
> cycle.
>
> Tested on Acer Chrometab 10 (RK3399 Gru-Scarlet), with panel drivers
> built either as modules or built-in.
>
> Side notes: it seems one is more likely to see this problem when the
> panel driver is built into the kernel. I've also seen this problem
> bisect down to commits that simply changed Kconfig dependencies, because
> it changed the order in which driver init functions were compiled into
> the kernel, and therefore the ordering and timing of built-in device
> probe.
>
> Fixes: 43c2de1002d2 ("drm/rockchip: dsi: move all lane config except LCDC
> mux to bind()")
> Link:
> https://lore.kernel.org/linux-rockchip/9aedfb528600ecf871885f7293ca4207c84d16c1.camel@gmail.com/
> Reported-by
> <https://lore.kernel.org/linux-rockchip/9aedfb528600ecf871885f7293ca4207c84d16c1.camel@gmail.com/Reported-by>:
> <aleksandr.o.makarov@gmail.com>
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Brian Norris <briannorris@chromium.org>
> Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
> ---
>
> Changes in v2:
> - Clean up pm-runtime state in error cases.
> - Correct git hash for Fixes.
>
>  .../gpu/drm/rockchip/dw-mipi-dsi-rockchip.c   | 37 ++++++++++---------
>  1 file changed, 19 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
> b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
> index a2262bee5aa4..45676b23c019 100644
> --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
> +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
> @@ -773,10 +773,6 @@ static void dw_mipi_dsi_encoder_enable(struct
> drm_encoder *encoder)
>         if (mux < 0)
>                 return;
>
> -       pm_runtime_get_sync(dsi->dev);
> -       if (dsi->slave)
> -               pm_runtime_get_sync(dsi->slave->dev);
> -
>         /*
>          * For the RK3399, the clk of grf must be enabled before writing
> grf
>          * register. And for RK3288 or other soc, this grf_clk must be
> NULL,
> @@ -795,20 +791,10 @@ static void dw_mipi_dsi_encoder_enable(struct
> drm_encoder *encoder)
>         clk_disable_unprepare(dsi->grf_clk);
>  }
>
> -static void dw_mipi_dsi_encoder_disable(struct drm_encoder *encoder)
> -{
> -       struct dw_mipi_dsi_rockchip *dsi = to_dsi(encoder);
> -
> -       if (dsi->slave)
> -               pm_runtime_put(dsi->slave->dev);
> -       pm_runtime_put(dsi->dev);
> -}
> -
>  static const struct drm_encoder_helper_funcs
>  dw_mipi_dsi_encoder_helper_funcs = {
>         .atomic_check = dw_mipi_dsi_encoder_atomic_check,
>         .enable = dw_mipi_dsi_encoder_enable,
> -       .disable = dw_mipi_dsi_encoder_disable,
>  };
>
>  static int rockchip_dsi_drm_create_encoder(struct dw_mipi_dsi_rockchip
> *dsi,
> @@ -938,10 +924,14 @@ static int dw_mipi_dsi_rockchip_bind(struct device
> *dev,
>                 put_device(second);
>         }
>
> +       pm_runtime_get_sync(dsi->dev);
> +       if (dsi->slave)
> +               pm_runtime_get_sync(dsi->slave->dev);
> +
>         ret = clk_prepare_enable(dsi->pllref_clk);
>         if (ret) {
>                 DRM_DEV_ERROR(dev, "Failed to enable pllref_clk: %d\n",
> ret);
> -               return ret;
> +               goto out_pm_runtime;
>         }
>
>         /*
> @@ -953,7 +943,7 @@ static int dw_mipi_dsi_rockchip_bind(struct device
> *dev,
>         ret = clk_prepare_enable(dsi->grf_clk);
>         if (ret) {
>                 DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n",
> ret);
> -               return ret;
> +               goto out_pm_runtime;
>         }
>
>         dw_mipi_dsi_rockchip_config(dsi);
> @@ -965,16 +955,23 @@ static int dw_mipi_dsi_rockchip_bind(struct device
> *dev,
>         ret = rockchip_dsi_drm_create_encoder(dsi, drm_dev);
>         if (ret) {
>                 DRM_DEV_ERROR(dev, "Failed to create drm encoder\n");
> -               return ret;
> +               goto out_pm_runtime;
>         }
>
>         ret = dw_mipi_dsi_bind(dsi->dmd, &dsi->encoder);
>         if (ret) {
>                 DRM_DEV_ERROR(dev, "Failed to bind: %d\n", ret);
> -               return ret;
> +               goto out_pm_runtime;
>         }
>
>         return 0;
> +
> +out_pm_runtime:
> +       pm_runtime_put(dsi->dev);
> +       if (dsi->slave)
> +               pm_runtime_put(dsi->slave->dev);
> +
> +       return ret;
>  }
>
>  static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
> @@ -989,6 +986,10 @@ static void dw_mipi_dsi_rockchip_unbind(struct device
> *dev,
>         dw_mipi_dsi_unbind(dsi->dmd);
>
>         clk_disable_unprepare(dsi->pllref_clk);
> +
> +       pm_runtime_put(dsi->dev);
> +       if (dsi->slave)
> +               pm_runtime_put(dsi->slave->dev);
>  }
>
>  static const struct component_ops dw_mipi_dsi_rockchip_ops = {
> --
> 2.33.0.685.g46640cef36-goog
>
>
Tom Hebb Sept. 27, 2021, 7:17 p.m. UTC | #2
Reviewed-by: Thomas Hebb <tommyhebb@gmail.com>

Thank you for catching this, and sorry that my original fix broke things.
There had actually been a report of this breakage from my patch, but I
missed that email until it had already been merged and then didn't have
time to follow up on it. Totally my bad.

[Resending because my last reply was HTML.]

On Mon, Sep 27, 2021 at 11:00 AM Brian Norris <briannorris@chromium.org> wrote:
>
> In commit 43c2de1002d2, we moved most HW configuration to bind(), but we
> didn't move the runtime PM management. Therefore, depending on initial
> boot state, runtime-PM workqueue delays, and other timing factors, we
> may disable our power domain in between the hardware configuration
> (bind()) and when we enable the display. This can cause us to lose
> hardware state and fail to configure our display. For example:
>
>   dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
>   panel-innolux-p079zca ff960000.mipi.0: failed to write command 0
>
> or:
>
>   dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
>   panel-kingdisplay-kd097d04 ff960000.mipi.0: failed write init cmds: -110
>
> We should match the runtime PM to the lifetime of the bind()/unbind()
> cycle.
>
> Tested on Acer Chrometab 10 (RK3399 Gru-Scarlet), with panel drivers
> built either as modules or built-in.
>
> Side notes: it seems one is more likely to see this problem when the
> panel driver is built into the kernel. I've also seen this problem
> bisect down to commits that simply changed Kconfig dependencies, because
> it changed the order in which driver init functions were compiled into
> the kernel, and therefore the ordering and timing of built-in device
> probe.
>
> Fixes: 43c2de1002d2 ("drm/rockchip: dsi: move all lane config except LCDC mux to bind()")
> Link: https://lore.kernel.org/linux-rockchip/9aedfb528600ecf871885f7293ca4207c84d16c1.camel@gmail.com/
> Reported-by: <aleksandr.o.makarov@gmail.com>
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Brian Norris <briannorris@chromium.org>
> Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
> ---
>
> Changes in v2:
> - Clean up pm-runtime state in error cases.
> - Correct git hash for Fixes.
>
>  .../gpu/drm/rockchip/dw-mipi-dsi-rockchip.c   | 37 ++++++++++---------
>  1 file changed, 19 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
> index a2262bee5aa4..45676b23c019 100644
> --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
> +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
> @@ -773,10 +773,6 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
>         if (mux < 0)
>                 return;
>
> -       pm_runtime_get_sync(dsi->dev);
> -       if (dsi->slave)
> -               pm_runtime_get_sync(dsi->slave->dev);
> -
>         /*
>          * For the RK3399, the clk of grf must be enabled before writing grf
>          * register. And for RK3288 or other soc, this grf_clk must be NULL,
> @@ -795,20 +791,10 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
>         clk_disable_unprepare(dsi->grf_clk);
>  }
>
> -static void dw_mipi_dsi_encoder_disable(struct drm_encoder *encoder)
> -{
> -       struct dw_mipi_dsi_rockchip *dsi = to_dsi(encoder);
> -
> -       if (dsi->slave)
> -               pm_runtime_put(dsi->slave->dev);
> -       pm_runtime_put(dsi->dev);
> -}
> -
>  static const struct drm_encoder_helper_funcs
>  dw_mipi_dsi_encoder_helper_funcs = {
>         .atomic_check = dw_mipi_dsi_encoder_atomic_check,
>         .enable = dw_mipi_dsi_encoder_enable,
> -       .disable = dw_mipi_dsi_encoder_disable,
>  };
>
>  static int rockchip_dsi_drm_create_encoder(struct dw_mipi_dsi_rockchip *dsi,
> @@ -938,10 +924,14 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
>                 put_device(second);
>         }
>
> +       pm_runtime_get_sync(dsi->dev);
> +       if (dsi->slave)
> +               pm_runtime_get_sync(dsi->slave->dev);
> +
>         ret = clk_prepare_enable(dsi->pllref_clk);
>         if (ret) {
>                 DRM_DEV_ERROR(dev, "Failed to enable pllref_clk: %d\n", ret);
> -               return ret;
> +               goto out_pm_runtime;
>         }
>
>         /*
> @@ -953,7 +943,7 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
>         ret = clk_prepare_enable(dsi->grf_clk);
>         if (ret) {
>                 DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret);
> -               return ret;
> +               goto out_pm_runtime;
>         }
>
>         dw_mipi_dsi_rockchip_config(dsi);
> @@ -965,16 +955,23 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev,
>         ret = rockchip_dsi_drm_create_encoder(dsi, drm_dev);
>         if (ret) {
>                 DRM_DEV_ERROR(dev, "Failed to create drm encoder\n");
> -               return ret;
> +               goto out_pm_runtime;
>         }
>
>         ret = dw_mipi_dsi_bind(dsi->dmd, &dsi->encoder);
>         if (ret) {
>                 DRM_DEV_ERROR(dev, "Failed to bind: %d\n", ret);
> -               return ret;
> +               goto out_pm_runtime;
>         }
>
>         return 0;
> +
> +out_pm_runtime:
> +       pm_runtime_put(dsi->dev);
> +       if (dsi->slave)
> +               pm_runtime_put(dsi->slave->dev);
> +
> +       return ret;
>  }
>
>  static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
> @@ -989,6 +986,10 @@ static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
>         dw_mipi_dsi_unbind(dsi->dmd);
>
>         clk_disable_unprepare(dsi->pllref_clk);
> +
> +       pm_runtime_put(dsi->dev);
> +       if (dsi->slave)
> +               pm_runtime_put(dsi->slave->dev);
>  }
>
>  static const struct component_ops dw_mipi_dsi_rockchip_ops = {
> --
> 2.33.0.685.g46640cef36-goog
>
Brian Norris Sept. 27, 2021, 7:57 p.m. UTC | #3
On Mon, Sep 27, 2021 at 12:18 PM Tom Hebb <tommyhebb@gmail.com> wrote:
> Reviewed-by: Thomas Hebb <tommyhebb@gmail.com>

Thanks!

> Thank you for catching this, and sorry that my original fix broke things.
> There had actually been a report of this breakage from my patch, but I
> missed that email until it had already been merged and then didn't have
> time to follow up on it. Totally my bad.

No worries. It was a 1 step forward, 1 step backward kind of a thing
anyway -- things were broken in many cases before your patch too (with
very similar-looking symptoms) -- so the net result is still good,
having both issues fixed.

I'm not sure how that ideally should have been handled [1], but it's
totally fair to not have time to follow up on everything. At the
worst, we could have reverted things; but again, I'm pretty sure
things were broken just as well without your fix, just with a
different root cause.

Regards,
Brian

[1] Don't accept (or, revert?) your bugfix until what may or may not
have been a regression is fixed? I'm not sure.
Brian Norris Sept. 27, 2021, 10:53 p.m. UTC | #4
On Mon, Sep 27, 2021 at 10:59:42AM -0700, Brian Norris wrote:
> In commit 43c2de1002d2, we moved most HW configuration to bind(), but we
> didn't move the runtime PM management. Therefore, depending on initial
> boot state, runtime-PM workqueue delays, and other timing factors, we
> may disable our power domain in between the hardware configuration
> (bind()) and when we enable the display. This can cause us to lose
> hardware state and fail to configure our display. For example:
> 
>   dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
>   panel-innolux-p079zca ff960000.mipi.0: failed to write command 0
> 
> or:
> 
>   dw-mipi-dsi-rockchip ff968000.mipi: failed to write command FIFO
>   panel-kingdisplay-kd097d04 ff960000.mipi.0: failed write init cmds: -110
> 
> We should match the runtime PM to the lifetime of the bind()/unbind()
> cycle.

Hmm, sorry to reply to my own patch so quickly, but after a bit more
testing I'm finding we still have yet another problem here -- that
suspend/resume does not work. For suspend/resume,
drm_mode_config_helper_{suspend,resume}() are expecting to only do
teardown/setup via disable()/enable() -- there is no re-bind() (which
makes sense). But the DSI hardware state may be lost, so the resume-time
enable() may find the panel initialization timing out yet again.

Possible solutions:

(1) I can add PM suspend()/resume() operations just to call
    dw_mipi_dsi_rockchip_config().

(2) Switch back to using mode_set() for HW configuration, like the
    downstream/BSP driver does (and the initial versions Rockchip and
    later Heiko were working on did the same), since that's always
    called at the right time before both panel and encoder enable().
    That also happens to be where some other DSI drivers [1] do similar
    init.

Have we been avoiding (2) just because that doesn't really match the
intended purpose of the callback? I can't find any cleaner callback for
this at the moment, and I'd rather not try to introduce entirely new drm
helper callbacks just for this particularly-unfriendly sequence.

I have a patch written for option (1), and may send a v3 soon to include
that as well (because that's also a regression from the same commit).

Brian

[1] e.g., drivers/gpu/drm/bridge/nwl-dsi.c
diff mbox series

Patch

diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
index a2262bee5aa4..45676b23c019 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -773,10 +773,6 @@  static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
 	if (mux < 0)
 		return;
 
-	pm_runtime_get_sync(dsi->dev);
-	if (dsi->slave)
-		pm_runtime_get_sync(dsi->slave->dev);
-
 	/*
 	 * For the RK3399, the clk of grf must be enabled before writing grf
 	 * register. And for RK3288 or other soc, this grf_clk must be NULL,
@@ -795,20 +791,10 @@  static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder)
 	clk_disable_unprepare(dsi->grf_clk);
 }
 
-static void dw_mipi_dsi_encoder_disable(struct drm_encoder *encoder)
-{
-	struct dw_mipi_dsi_rockchip *dsi = to_dsi(encoder);
-
-	if (dsi->slave)
-		pm_runtime_put(dsi->slave->dev);
-	pm_runtime_put(dsi->dev);
-}
-
 static const struct drm_encoder_helper_funcs
 dw_mipi_dsi_encoder_helper_funcs = {
 	.atomic_check = dw_mipi_dsi_encoder_atomic_check,
 	.enable = dw_mipi_dsi_encoder_enable,
-	.disable = dw_mipi_dsi_encoder_disable,
 };
 
 static int rockchip_dsi_drm_create_encoder(struct dw_mipi_dsi_rockchip *dsi,
@@ -938,10 +924,14 @@  static int dw_mipi_dsi_rockchip_bind(struct device *dev,
 		put_device(second);
 	}
 
+	pm_runtime_get_sync(dsi->dev);
+	if (dsi->slave)
+		pm_runtime_get_sync(dsi->slave->dev);
+
 	ret = clk_prepare_enable(dsi->pllref_clk);
 	if (ret) {
 		DRM_DEV_ERROR(dev, "Failed to enable pllref_clk: %d\n", ret);
-		return ret;
+		goto out_pm_runtime;
 	}
 
 	/*
@@ -953,7 +943,7 @@  static int dw_mipi_dsi_rockchip_bind(struct device *dev,
 	ret = clk_prepare_enable(dsi->grf_clk);
 	if (ret) {
 		DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret);
-		return ret;
+		goto out_pm_runtime;
 	}
 
 	dw_mipi_dsi_rockchip_config(dsi);
@@ -965,16 +955,23 @@  static int dw_mipi_dsi_rockchip_bind(struct device *dev,
 	ret = rockchip_dsi_drm_create_encoder(dsi, drm_dev);
 	if (ret) {
 		DRM_DEV_ERROR(dev, "Failed to create drm encoder\n");
-		return ret;
+		goto out_pm_runtime;
 	}
 
 	ret = dw_mipi_dsi_bind(dsi->dmd, &dsi->encoder);
 	if (ret) {
 		DRM_DEV_ERROR(dev, "Failed to bind: %d\n", ret);
-		return ret;
+		goto out_pm_runtime;
 	}
 
 	return 0;
+
+out_pm_runtime:
+	pm_runtime_put(dsi->dev);
+	if (dsi->slave)
+		pm_runtime_put(dsi->slave->dev);
+
+	return ret;
 }
 
 static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
@@ -989,6 +986,10 @@  static void dw_mipi_dsi_rockchip_unbind(struct device *dev,
 	dw_mipi_dsi_unbind(dsi->dmd);
 
 	clk_disable_unprepare(dsi->pllref_clk);
+
+	pm_runtime_put(dsi->dev);
+	if (dsi->slave)
+		pm_runtime_put(dsi->slave->dev);
 }
 
 static const struct component_ops dw_mipi_dsi_rockchip_ops = {