diff mbox series

[v2] drm/msm/dpu: Correct dpu destroy and disable order

Message ID 1541162957-12796-1-git-send-email-jshekhar@codeaurora.org (mailing list archive)
State Mainlined
Headers show
Series [v2] drm/msm/dpu: Correct dpu destroy and disable order | expand

Commit Message

Jayant Shekhar Nov. 2, 2018, 12:49 p.m. UTC
In case of msm drm bind failure, dpu_mdss_destroy is triggered.
In this function, resources are freed and pm runtime disable is
called, which triggers dpu_mdss_disable. Now in dpu_mdss_disable,
driver tries to access a memory which is already freed. This
results in kernel panic. Fix this by ensuring proper sequence
of dpu destroy and disable calls.

Changes in v2:
   - Removed double spacings [Jeykumar]

Signed-off-by: Jayant Shekhar <jshekhar@codeaurora.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

Comments

Sean Paul Nov. 2, 2018, 1:15 p.m. UTC | #1
On Fri, Nov 02, 2018 at 06:19:17PM +0530, Jayant Shekhar wrote:
> In case of msm drm bind failure, dpu_mdss_destroy is triggered.
> In this function, resources are freed and pm runtime disable is
> called, which triggers dpu_mdss_disable. Now in dpu_mdss_disable,
> driver tries to access a memory which is already freed. This
> results in kernel panic. Fix this by ensuring proper sequence
> of dpu destroy and disable calls.
> 
> Changes in v2:
>    - Removed double spacings [Jeykumar]
> 
> Signed-off-by: Jayant Shekhar <jshekhar@codeaurora.org>

Reviewed-by: Sean Paul <sean@poorly.run>

> ---
>  drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 5 +----
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> index fd9c893..902bb4c 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> @@ -156,18 +156,15 @@ static void dpu_mdss_destroy(struct drm_device *dev)
>  	struct dpu_mdss *dpu_mdss = to_dpu_mdss(priv->mdss);
>  	struct dss_module_power *mp = &dpu_mdss->mp;
>  
> +	pm_runtime_disable(dev->dev);
>  	_dpu_mdss_irq_domain_fini(dpu_mdss);
> -
>  	free_irq(platform_get_irq(pdev, 0), dpu_mdss);
> -
>  	msm_dss_put_clk(mp->clk_config, mp->num_clk);
>  	devm_kfree(&pdev->dev, mp->clk_config);
>  
>  	if (dpu_mdss->mmio)
>  		devm_iounmap(&pdev->dev, dpu_mdss->mmio);
>  	dpu_mdss->mmio = NULL;
> -
> -	pm_runtime_disable(dev->dev);
>  	priv->mdss = NULL;
>  }
>  
> -- 
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
>
Jordan Crouse Nov. 2, 2018, 2:32 p.m. UTC | #2
On Fri, Nov 02, 2018 at 06:19:17PM +0530, Jayant Shekhar wrote:
> In case of msm drm bind failure, dpu_mdss_destroy is triggered.
> In this function, resources are freed and pm runtime disable is
> called, which triggers dpu_mdss_disable. Now in dpu_mdss_disable,
> driver tries to access a memory which is already freed. This
> results in kernel panic. Fix this by ensuring proper sequence
> of dpu destroy and disable calls.

Since you are in this specific area, I noticed we also had some memory leaks
on bind "failure" and I just sent out a patch for that.

Jordan
Doug Anderson Nov. 5, 2018, 7:31 p.m. UTC | #3
Hi,

On Fri, Nov 2, 2018 at 5:49 AM Jayant Shekhar <jshekhar@codeaurora.org> wrote:
>
> In case of msm drm bind failure, dpu_mdss_destroy is triggered.
> In this function, resources are freed and pm runtime disable is
> called, which triggers dpu_mdss_disable. Now in dpu_mdss_disable,
> driver tries to access a memory which is already freed. This
> results in kernel panic. Fix this by ensuring proper sequence
> of dpu destroy and disable calls.
>
> Changes in v2:
>    - Removed double spacings [Jeykumar]
>
> Signed-off-by: Jayant Shekhar <jshekhar@codeaurora.org>
> ---
>  drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 5 +----
>  1 file changed, 1 insertion(+), 4 deletions(-)

I can confirm that this fixes a crash I was running into.
Specifically it looked like this:

---

msm_dsi ae94000.dsi: ae94000.dsi supply gdsc not found, using dummy regulator
msm_dsi ae94000.dsi: Linked as a consumer to regulator.0
msm_dsi ae94000.dsi: Dropping the link to regulator.0
msm_dsi ae94000.dsi: Linked as a consumer to regulator.35
msm_dsi_manager_register: failed to register mipi dsi host for DSI 0
msm_dsi ae94000.dsi: Dropping the link to regulator.35
msm ae00000.mdss: failed to bind ae94000.dsi (ops dsi_ops): -517
Unable to handle kernel paging request at virtual address 006b6b6b6b6b6b6b
...
Workqueue: pm pm_runtime_work
...
Call trace:
 clk_disable+0x24/0x4c
 msm_dss_enable_clk+0xec/0x164
 dpu_mdss_disable+0x2c/0x84
 msm_runtime_suspend+0x50/0x5c
 pm_generic_runtime_suspend+0x3c/0x48
 genpd_runtime_suspend+0xf4/0x1cc
 __rpm_callback+0x128/0x1a4
 rpm_callback+0x70/0x90
 rpm_suspend+0x2f4/0x564
 rpm_idle+0x3c0/0x3d8
 pm_runtime_work+0x6c/0xa0
 process_one_work+0x38c/0x634
 worker_thread+0x218/0x360
 kthread+0x130/0x138
 ret_from_fork+0x10/0x18

---

Thus:

Tested-by: Douglas Anderson <dianders@chromium.org>


>
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> index fd9c893..902bb4c 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> @@ -156,18 +156,15 @@ static void dpu_mdss_destroy(struct drm_device *dev)
>         struct dpu_mdss *dpu_mdss = to_dpu_mdss(priv->mdss);
>         struct dss_module_power *mp = &dpu_mdss->mp;
>
> +       pm_runtime_disable(dev->dev);
>         _dpu_mdss_irq_domain_fini(dpu_mdss);
> -
>         free_irq(platform_get_irq(pdev, 0), dpu_mdss);
> -
>         msm_dss_put_clk(mp->clk_config, mp->num_clk);
>         devm_kfree(&pdev->dev, mp->clk_config);
>
>         if (dpu_mdss->mmio)
>                 devm_iounmap(&pdev->dev, dpu_mdss->mmio);
>         dpu_mdss->mmio = NULL;
> -
> -       pm_runtime_disable(dev->dev);
>         priv->mdss = NULL;
>  }
>
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project
>
Rajendra Nayak Nov. 9, 2018, 6:07 a.m. UTC | #4
On 11/2/2018 6:19 PM, Jayant Shekhar wrote:
> In case of msm drm bind failure, dpu_mdss_destroy is triggered.
> In this function, resources are freed and pm runtime disable is
> called, which triggers dpu_mdss_disable. Now in dpu_mdss_disable,
> driver tries to access a memory which is already freed. This
> results in kernel panic. Fix this by ensuring proper sequence
> of dpu destroy and disable calls.
> 
> Changes in v2:
>     - Removed double spacings [Jeykumar]
> 
> Signed-off-by: Jayant Shekhar <jshekhar@codeaurora.org>

I was testing this patch out and in my setup I see that with this
change, during a dsi probe defer case, a dpu_mdss_enable() is called
followed by a dpu_mdss_destroy() but dpu_mdss_disable() is never called.

This results in a mismatch in clock usecount causing the mdp clocks to
stay enabled even with display turned off.

localhost ~ # cat /sys/kernel/debug/clk/clk_summary | grep mdss
  disp_cc_mdss_rscc_ahb_clk            0        0        0           0          0     0  50000
  disp_cc_mdss_axi_clk                 0        0        0           0          0     0  50000
  disp_cc_mdss_ahb_clk                 0        0        0           0          0     0  50000
        disp_cc_mdss_mdp_clk_src       1        1        0    19200000          0     0  50000
           disp_cc_mdss_mdp_lut_clk       0        0        0    19200000          0     0  50000
           disp_cc_mdss_mdp_clk        1        1        0    19200000          0     0  50000
        disp_cc_mdss_vsync_clk_src       0        0        0    19200000          0     0  50000

Also to note, on an older 4.14 kernel, I do not see any issue in the order in which
these functions are called in the dsi probe defer case, i.e its an enable followed by
a disable and then a destroy (and that's without this patch)

> ---
>   drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 5 +----
>   1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> index fd9c893..902bb4c 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> @@ -156,18 +156,15 @@ static void dpu_mdss_destroy(struct drm_device *dev)
>   	struct dpu_mdss *dpu_mdss = to_dpu_mdss(priv->mdss);
>   	struct dss_module_power *mp = &dpu_mdss->mp;
>   
> +	pm_runtime_disable(dev->dev);
>   	_dpu_mdss_irq_domain_fini(dpu_mdss);
> -
>   	free_irq(platform_get_irq(pdev, 0), dpu_mdss);
> -
>   	msm_dss_put_clk(mp->clk_config, mp->num_clk);
>   	devm_kfree(&pdev->dev, mp->clk_config);
>   
>   	if (dpu_mdss->mmio)
>   		devm_iounmap(&pdev->dev, dpu_mdss->mmio);
>   	dpu_mdss->mmio = NULL;
> -
> -	pm_runtime_disable(dev->dev);
>   	priv->mdss = NULL;
>   }
>   
>
Sean Paul Nov. 13, 2018, 3:40 p.m. UTC | #5
On Fri, Nov 09, 2018 at 11:37:17AM +0530, Rajendra Nayak wrote:
> 
> On 11/2/2018 6:19 PM, Jayant Shekhar wrote:
> > In case of msm drm bind failure, dpu_mdss_destroy is triggered.
> > In this function, resources are freed and pm runtime disable is
> > called, which triggers dpu_mdss_disable. Now in dpu_mdss_disable,
> > driver tries to access a memory which is already freed. This
> > results in kernel panic. Fix this by ensuring proper sequence
> > of dpu destroy and disable calls.
> > 
> > Changes in v2:
> >     - Removed double spacings [Jeykumar]
> > 
> > Signed-off-by: Jayant Shekhar <jshekhar@codeaurora.org>
> 
> I was testing this patch out and in my setup I see that with this
> change, during a dsi probe defer case, a dpu_mdss_enable() is called
> followed by a dpu_mdss_destroy() but dpu_mdss_disable() is never called.
> 
> This results in a mismatch in clock usecount causing the mdp clocks to
> stay enabled even with display turned off.

This suggests an unmatched pm_runtime_get/put. I just audited the code and
nothing stuck out to me in the probe defer case, things seem to be well matched.
Could you instrument the pm_runtime_* calls in msm/dpu and see if there's a _get
without a _put?

Sean

> 
> localhost ~ # cat /sys/kernel/debug/clk/clk_summary | grep mdss
>  disp_cc_mdss_rscc_ahb_clk            0        0        0           0          0     0  50000
>  disp_cc_mdss_axi_clk                 0        0        0           0          0     0  50000
>  disp_cc_mdss_ahb_clk                 0        0        0           0          0     0  50000
>        disp_cc_mdss_mdp_clk_src       1        1        0    19200000          0     0  50000
>           disp_cc_mdss_mdp_lut_clk       0        0        0    19200000          0     0  50000
>           disp_cc_mdss_mdp_clk        1        1        0    19200000          0     0  50000
>        disp_cc_mdss_vsync_clk_src       0        0        0    19200000          0     0  50000
> 
> Also to note, on an older 4.14 kernel, I do not see any issue in the order in which
> these functions are called in the dsi probe defer case, i.e its an enable followed by
> a disable and then a destroy (and that's without this patch)
> 
> > ---
> >   drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c | 5 +----
> >   1 file changed, 1 insertion(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> > index fd9c893..902bb4c 100644
> > --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> > +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
> > @@ -156,18 +156,15 @@ static void dpu_mdss_destroy(struct drm_device *dev)
> >   	struct dpu_mdss *dpu_mdss = to_dpu_mdss(priv->mdss);
> >   	struct dss_module_power *mp = &dpu_mdss->mp;
> > +	pm_runtime_disable(dev->dev);
> >   	_dpu_mdss_irq_domain_fini(dpu_mdss);
> > -
> >   	free_irq(platform_get_irq(pdev, 0), dpu_mdss);
> > -
> >   	msm_dss_put_clk(mp->clk_config, mp->num_clk);
> >   	devm_kfree(&pdev->dev, mp->clk_config);
> >   	if (dpu_mdss->mmio)
> >   		devm_iounmap(&pdev->dev, dpu_mdss->mmio);
> >   	dpu_mdss->mmio = NULL;
> > -
> > -	pm_runtime_disable(dev->dev);
> >   	priv->mdss = NULL;
> >   }
> > 
> _______________________________________________
> Freedreno mailing list
> Freedreno@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/freedreno
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
index fd9c893..902bb4c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
@@ -156,18 +156,15 @@  static void dpu_mdss_destroy(struct drm_device *dev)
 	struct dpu_mdss *dpu_mdss = to_dpu_mdss(priv->mdss);
 	struct dss_module_power *mp = &dpu_mdss->mp;
 
+	pm_runtime_disable(dev->dev);
 	_dpu_mdss_irq_domain_fini(dpu_mdss);
-
 	free_irq(platform_get_irq(pdev, 0), dpu_mdss);
-
 	msm_dss_put_clk(mp->clk_config, mp->num_clk);
 	devm_kfree(&pdev->dev, mp->clk_config);
 
 	if (dpu_mdss->mmio)
 		devm_iounmap(&pdev->dev, dpu_mdss->mmio);
 	dpu_mdss->mmio = NULL;
-
-	pm_runtime_disable(dev->dev);
 	priv->mdss = NULL;
 }