diff mbox series

[v5,3/7] pmdomain: rockchip: forward rockchip_do_pmu_set_power_domain errors

Message ID 20241211143044.9550-4-sebastian.reichel@collabora.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series Fix RK3588 GPU power domain | expand

Commit Message

Sebastian Reichel Dec. 11, 2024, 2:26 p.m. UTC
Currently rockchip_do_pmu_set_power_domain prints a warning if there
have been errors turning on the power domain, but it does not return
any errors and rockchip_pd_power() tries to continue setting up the
QOS registers. This usually results in accessing unpowered registers,
which triggers an SError and a full system hang.

This improves the error handling by forwarding the error to avoid
kernel panics.

Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Tested-by: Adrian Larumbe <adrian.larumbe@collabora.com> # On Rock 5B
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/pmdomain/rockchip/pm-domains.c | 34 +++++++++++++++++---------
 1 file changed, 22 insertions(+), 12 deletions(-)

Comments

Peter Geis Dec. 11, 2024, 7:53 p.m. UTC | #1
On Wed, Dec 11, 2024 at 9:32 AM Sebastian Reichel
<sebastian.reichel@collabora.com> wrote:
>
> Currently rockchip_do_pmu_set_power_domain prints a warning if there
> have been errors turning on the power domain, but it does not return
> any errors and rockchip_pd_power() tries to continue setting up the
> QOS registers. This usually results in accessing unpowered registers,
> which triggers an SError and a full system hang.
>
> This improves the error handling by forwarding the error to avoid
> kernel panics.

Good Afternoon,

I think we should merge your patch here with my patch for returning
errors from rockchip_pmu_set_idle_request [1].

>
> Reviewed-by: Heiko Stuebner <heiko@sntech.de>
> Tested-by: Heiko Stuebner <heiko@sntech.de>
> Tested-by: Adrian Larumbe <adrian.larumbe@collabora.com> # On Rock 5B
> Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
> ---
>  drivers/pmdomain/rockchip/pm-domains.c | 34 +++++++++++++++++---------
>  1 file changed, 22 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/pmdomain/rockchip/pm-domains.c b/drivers/pmdomain/rockchip/pm-domains.c
> index a161ee13c633..8f440f2883db 100644
> --- a/drivers/pmdomain/rockchip/pm-domains.c
> +++ b/drivers/pmdomain/rockchip/pm-domains.c
> @@ -533,16 +533,17 @@ static int rockchip_pmu_domain_mem_reset(struct rockchip_pm_domain *pd)
>         return ret;
>  }
>
> -static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> -                                            bool on)
> +static int rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> +                                           bool on)
>  {
>         struct rockchip_pmu *pmu = pd->pmu;
>         struct generic_pm_domain *genpd = &pd->genpd;
>         u32 pd_pwr_offset = pd->info->pwr_offset;
>         bool is_on, is_mem_on = false;
> +       int ret;
>
>         if (pd->info->pwr_mask == 0)
> -               return;
> +               return 0;
>
>         if (on && pd->info->mem_status_mask)
>                 is_mem_on = rockchip_pmu_domain_is_mem_on(pd);
> @@ -557,16 +558,21 @@ static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
>
>         wmb();
>
> -       if (is_mem_on && rockchip_pmu_domain_mem_reset(pd))
> -               return;
> +       if (is_mem_on) {
> +               ret = rockchip_pmu_domain_mem_reset(pd);
> +               if (ret)
> +                       return ret;
> +       }
>
> -       if (readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> -                                     is_on == on, 0, 10000)) {
> -               dev_err(pmu->dev,
> -                       "failed to set domain '%s', val=%d\n",
> -                       genpd->name, is_on);
> -               return;
> +       ret = readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> +                                       is_on == on, 0, 10000);
> +       if (ret) {
> +               dev_err(pmu->dev, "failed to set domain '%s' %s, val=%d\n",
> +                       genpd->name, on ? "on" : "off", is_on);
> +               return ret;
>         }
> +
> +       return 0;
>  }
>
>  static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
> @@ -592,7 +598,11 @@ static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
>                         rockchip_pmu_set_idle_request(pd, true);
>                 }
>
> -               rockchip_do_pmu_set_power_domain(pd, power_on);
> +               ret = rockchip_do_pmu_set_power_domain(pd, power_on);
> +               if (ret < 0) {
> +                       clk_bulk_disable(pd->num_clks, pd->clks);
> +                       return ret;

Looking at it, we shouldn't return directly from here because the
mutex never gets unlocked.
Instead of repeating clk_bulk_disable and return ret for each failure,
we can initialize ret = 0, have a goto: out pointing to
clk_bulk_disable, and change return 0 to return ret at the end.

What do you think?

Very Respectfully,
Peter Geis

[1] https://lore.kernel.org/linux-rockchip/20241210013010.81257-2-pgwipeout@gmail.com/

> +               }
>
>                 if (power_on) {
>                         /* if powering up, leave idle mode */
> --
> 2.45.2
>
>
> _______________________________________________
> Linux-rockchip mailing list
> Linux-rockchip@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-rockchip
Sebastian Reichel Dec. 11, 2024, 8:45 p.m. UTC | #2
Hello Peter,

On Wed, Dec 11, 2024 at 02:53:34PM -0500, Peter Geis wrote:
> On Wed, Dec 11, 2024 at 9:32 AM Sebastian Reichel
> <sebastian.reichel@collabora.com> wrote:
> >
> > Currently rockchip_do_pmu_set_power_domain prints a warning if there
> > have been errors turning on the power domain, but it does not return
> > any errors and rockchip_pd_power() tries to continue setting up the
> > QOS registers. This usually results in accessing unpowered registers,
> > which triggers an SError and a full system hang.
> >
> > This improves the error handling by forwarding the error to avoid
> > kernel panics.
> 
> I think we should merge your patch here with my patch for returning
> errors from rockchip_pmu_set_idle_request [1].

I will have a look.

> > Reviewed-by: Heiko Stuebner <heiko@sntech.de>
> > Tested-by: Heiko Stuebner <heiko@sntech.de>
> > Tested-by: Adrian Larumbe <adrian.larumbe@collabora.com> # On Rock 5B
> > Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
> > ---
> >  drivers/pmdomain/rockchip/pm-domains.c | 34 +++++++++++++++++---------
> >  1 file changed, 22 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/pmdomain/rockchip/pm-domains.c b/drivers/pmdomain/rockchip/pm-domains.c
> > index a161ee13c633..8f440f2883db 100644
> > --- a/drivers/pmdomain/rockchip/pm-domains.c
> > +++ b/drivers/pmdomain/rockchip/pm-domains.c
> > @@ -533,16 +533,17 @@ static int rockchip_pmu_domain_mem_reset(struct rockchip_pm_domain *pd)
> >         return ret;
> >  }
> >
> > -static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > -                                            bool on)
> > +static int rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > +                                           bool on)
> >  {
> >         struct rockchip_pmu *pmu = pd->pmu;
> >         struct generic_pm_domain *genpd = &pd->genpd;
> >         u32 pd_pwr_offset = pd->info->pwr_offset;
> >         bool is_on, is_mem_on = false;
> > +       int ret;
> >
> >         if (pd->info->pwr_mask == 0)
> > -               return;
> > +               return 0;
> >
> >         if (on && pd->info->mem_status_mask)
> >                 is_mem_on = rockchip_pmu_domain_is_mem_on(pd);
> > @@ -557,16 +558,21 @@ static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> >
> >         wmb();
> >
> > -       if (is_mem_on && rockchip_pmu_domain_mem_reset(pd))
> > -               return;
> > +       if (is_mem_on) {
> > +               ret = rockchip_pmu_domain_mem_reset(pd);
> > +               if (ret)
> > +                       return ret;
> > +       }
> >
> > -       if (readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> > -                                     is_on == on, 0, 10000)) {
> > -               dev_err(pmu->dev,
> > -                       "failed to set domain '%s', val=%d\n",
> > -                       genpd->name, is_on);
> > -               return;
> > +       ret = readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> > +                                       is_on == on, 0, 10000);
> > +       if (ret) {
> > +               dev_err(pmu->dev, "failed to set domain '%s' %s, val=%d\n",
> > +                       genpd->name, on ? "on" : "off", is_on);
> > +               return ret;
> >         }
> > +
> > +       return 0;
> >  }
> >
> >  static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
> > @@ -592,7 +598,11 @@ static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
> >                         rockchip_pmu_set_idle_request(pd, true);
> >                 }
> >
> > -               rockchip_do_pmu_set_power_domain(pd, power_on);
> > +               ret = rockchip_do_pmu_set_power_domain(pd, power_on);
> > +               if (ret < 0) {
> > +                       clk_bulk_disable(pd->num_clks, pd->clks);
> > +                       return ret;
> 
> Looking at it, we shouldn't return directly from here because the
> mutex never gets unlocked.

Yes, we should do that after patch 2/7 from this series :)

> Instead of repeating clk_bulk_disable and return ret for each failure,
> we can initialize ret = 0, have a goto: out pointing to
> clk_bulk_disable, and change return 0 to return ret at the end.

Right now there is only a single clk_bulk_disable() in an error
case, so I did not use the typical error goto chain. I suppose
it makes a lot more sense with proper error handling for the calls
to rockchip_pmu_set_idle_request().

Greetings,

-- Sebastian

> 
> What do you think?
> 
> Very Respectfully,
> Peter Geis
> 
> [1] https://lore.kernel.org/linux-rockchip/20241210013010.81257-2-pgwipeout@gmail.com/
> 
> > +               }
> >
> >                 if (power_on) {
> >                         /* if powering up, leave idle mode */
> > --
> > 2.45.2
> >
> >
> > _______________________________________________
> > Linux-rockchip mailing list
> > Linux-rockchip@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-rockchip
Peter Geis Dec. 11, 2024, 11:11 p.m. UTC | #3
On Wed, Dec 11, 2024 at 3:46 PM Sebastian Reichel
<sebastian.reichel@collabora.com> wrote:
>
> Hello Peter,
>
> On Wed, Dec 11, 2024 at 02:53:34PM -0500, Peter Geis wrote:
> > On Wed, Dec 11, 2024 at 9:32 AM Sebastian Reichel
> > <sebastian.reichel@collabora.com> wrote:
> > >
> > > Currently rockchip_do_pmu_set_power_domain prints a warning if there
> > > have been errors turning on the power domain, but it does not return
> > > any errors and rockchip_pd_power() tries to continue setting up the
> > > QOS registers. This usually results in accessing unpowered registers,
> > > which triggers an SError and a full system hang.
> > >
> > > This improves the error handling by forwarding the error to avoid
> > > kernel panics.
> >
> > I think we should merge your patch here with my patch for returning
> > errors from rockchip_pmu_set_idle_request [1].
>
> I will have a look.
>
> > > Reviewed-by: Heiko Stuebner <heiko@sntech.de>
> > > Tested-by: Heiko Stuebner <heiko@sntech.de>
> > > Tested-by: Adrian Larumbe <adrian.larumbe@collabora.com> # On Rock 5B
> > > Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
> > > ---
> > >  drivers/pmdomain/rockchip/pm-domains.c | 34 +++++++++++++++++---------
> > >  1 file changed, 22 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/drivers/pmdomain/rockchip/pm-domains.c b/drivers/pmdomain/rockchip/pm-domains.c
> > > index a161ee13c633..8f440f2883db 100644
> > > --- a/drivers/pmdomain/rockchip/pm-domains.c
> > > +++ b/drivers/pmdomain/rockchip/pm-domains.c
> > > @@ -533,16 +533,17 @@ static int rockchip_pmu_domain_mem_reset(struct rockchip_pm_domain *pd)
> > >         return ret;
> > >  }
> > >
> > > -static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > > -                                            bool on)
> > > +static int rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > > +                                           bool on)
> > >  {
> > >         struct rockchip_pmu *pmu = pd->pmu;
> > >         struct generic_pm_domain *genpd = &pd->genpd;
> > >         u32 pd_pwr_offset = pd->info->pwr_offset;
> > >         bool is_on, is_mem_on = false;
> > > +       int ret;
> > >
> > >         if (pd->info->pwr_mask == 0)
> > > -               return;
> > > +               return 0;
> > >
> > >         if (on && pd->info->mem_status_mask)
> > >                 is_mem_on = rockchip_pmu_domain_is_mem_on(pd);
> > > @@ -557,16 +558,21 @@ static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > >
> > >         wmb();
> > >
> > > -       if (is_mem_on && rockchip_pmu_domain_mem_reset(pd))
> > > -               return;
> > > +       if (is_mem_on) {
> > > +               ret = rockchip_pmu_domain_mem_reset(pd);
> > > +               if (ret)
> > > +                       return ret;
> > > +       }
> > >
> > > -       if (readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> > > -                                     is_on == on, 0, 10000)) {
> > > -               dev_err(pmu->dev,
> > > -                       "failed to set domain '%s', val=%d\n",
> > > -                       genpd->name, is_on);
> > > -               return;
> > > +       ret = readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> > > +                                       is_on == on, 0, 10000);
> > > +       if (ret) {
> > > +               dev_err(pmu->dev, "failed to set domain '%s' %s, val=%d\n",
> > > +                       genpd->name, on ? "on" : "off", is_on);
> > > +               return ret;
> > >         }
> > > +
> > > +       return 0;
> > >  }
> > >
> > >  static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
> > > @@ -592,7 +598,11 @@ static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
> > >                         rockchip_pmu_set_idle_request(pd, true);
> > >                 }
> > >
> > > -               rockchip_do_pmu_set_power_domain(pd, power_on);
> > > +               ret = rockchip_do_pmu_set_power_domain(pd, power_on);
> > > +               if (ret < 0) {
> > > +                       clk_bulk_disable(pd->num_clks, pd->clks);
> > > +                       return ret;
> >
> > Looking at it, we shouldn't return directly from here because the
> > mutex never gets unlocked.
>
> Yes, we should do that after patch 2/7 from this series :)

That's excellent!

>
> > Instead of repeating clk_bulk_disable and return ret for each failure,
> > we can initialize ret = 0, have a goto: out pointing to
> > clk_bulk_disable, and change return 0 to return ret at the end.
>
> Right now there is only a single clk_bulk_disable() in an error
> case, so I did not use the typical error goto chain. I suppose
> it makes a lot more sense with proper error handling for the calls
> to rockchip_pmu_set_idle_request().

If you'd like, I can base my v2 on this patch series with the changes
I'm suggesting?

>
> Greetings,
>
> -- Sebastian
>
> >
> > What do you think?
> >
> > Very Respectfully,
> > Peter Geis
> >
> > [1] https://lore.kernel.org/linux-rockchip/20241210013010.81257-2-pgwipeout@gmail.com/
> >
> > > +               }
> > >
> > >                 if (power_on) {
> > >                         /* if powering up, leave idle mode */
> > > --
> > > 2.45.2
> > >
> > >
> > > _______________________________________________
> > > Linux-rockchip mailing list
> > > Linux-rockchip@lists.infradead.org
> > > http://lists.infradead.org/mailman/listinfo/linux-rockchip
Ulf Hansson Dec. 12, 2024, 11:26 a.m. UTC | #4
On Thu, 12 Dec 2024 at 00:11, Peter Geis <pgwipeout@gmail.com> wrote:
>
> On Wed, Dec 11, 2024 at 3:46 PM Sebastian Reichel
> <sebastian.reichel@collabora.com> wrote:
> >
> > Hello Peter,
> >
> > On Wed, Dec 11, 2024 at 02:53:34PM -0500, Peter Geis wrote:
> > > On Wed, Dec 11, 2024 at 9:32 AM Sebastian Reichel
> > > <sebastian.reichel@collabora.com> wrote:
> > > >
> > > > Currently rockchip_do_pmu_set_power_domain prints a warning if there
> > > > have been errors turning on the power domain, but it does not return
> > > > any errors and rockchip_pd_power() tries to continue setting up the
> > > > QOS registers. This usually results in accessing unpowered registers,
> > > > which triggers an SError and a full system hang.
> > > >
> > > > This improves the error handling by forwarding the error to avoid
> > > > kernel panics.
> > >
> > > I think we should merge your patch here with my patch for returning
> > > errors from rockchip_pmu_set_idle_request [1].
> >
> > I will have a look.
> >
> > > > Reviewed-by: Heiko Stuebner <heiko@sntech.de>
> > > > Tested-by: Heiko Stuebner <heiko@sntech.de>
> > > > Tested-by: Adrian Larumbe <adrian.larumbe@collabora.com> # On Rock 5B
> > > > Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
> > > > ---
> > > >  drivers/pmdomain/rockchip/pm-domains.c | 34 +++++++++++++++++---------
> > > >  1 file changed, 22 insertions(+), 12 deletions(-)
> > > >
> > > > diff --git a/drivers/pmdomain/rockchip/pm-domains.c b/drivers/pmdomain/rockchip/pm-domains.c
> > > > index a161ee13c633..8f440f2883db 100644
> > > > --- a/drivers/pmdomain/rockchip/pm-domains.c
> > > > +++ b/drivers/pmdomain/rockchip/pm-domains.c
> > > > @@ -533,16 +533,17 @@ static int rockchip_pmu_domain_mem_reset(struct rockchip_pm_domain *pd)
> > > >         return ret;
> > > >  }
> > > >
> > > > -static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > > > -                                            bool on)
> > > > +static int rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > > > +                                           bool on)
> > > >  {
> > > >         struct rockchip_pmu *pmu = pd->pmu;
> > > >         struct generic_pm_domain *genpd = &pd->genpd;
> > > >         u32 pd_pwr_offset = pd->info->pwr_offset;
> > > >         bool is_on, is_mem_on = false;
> > > > +       int ret;
> > > >
> > > >         if (pd->info->pwr_mask == 0)
> > > > -               return;
> > > > +               return 0;
> > > >
> > > >         if (on && pd->info->mem_status_mask)
> > > >                 is_mem_on = rockchip_pmu_domain_is_mem_on(pd);
> > > > @@ -557,16 +558,21 @@ static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > > >
> > > >         wmb();
> > > >
> > > > -       if (is_mem_on && rockchip_pmu_domain_mem_reset(pd))
> > > > -               return;
> > > > +       if (is_mem_on) {
> > > > +               ret = rockchip_pmu_domain_mem_reset(pd);
> > > > +               if (ret)
> > > > +                       return ret;
> > > > +       }
> > > >
> > > > -       if (readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> > > > -                                     is_on == on, 0, 10000)) {
> > > > -               dev_err(pmu->dev,
> > > > -                       "failed to set domain '%s', val=%d\n",
> > > > -                       genpd->name, is_on);
> > > > -               return;
> > > > +       ret = readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> > > > +                                       is_on == on, 0, 10000);
> > > > +       if (ret) {
> > > > +               dev_err(pmu->dev, "failed to set domain '%s' %s, val=%d\n",
> > > > +                       genpd->name, on ? "on" : "off", is_on);
> > > > +               return ret;
> > > >         }
> > > > +
> > > > +       return 0;
> > > >  }
> > > >
> > > >  static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
> > > > @@ -592,7 +598,11 @@ static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
> > > >                         rockchip_pmu_set_idle_request(pd, true);
> > > >                 }
> > > >
> > > > -               rockchip_do_pmu_set_power_domain(pd, power_on);
> > > > +               ret = rockchip_do_pmu_set_power_domain(pd, power_on);
> > > > +               if (ret < 0) {
> > > > +                       clk_bulk_disable(pd->num_clks, pd->clks);
> > > > +                       return ret;
> > >
> > > Looking at it, we shouldn't return directly from here because the
> > > mutex never gets unlocked.
> >
> > Yes, we should do that after patch 2/7 from this series :)
>
> That's excellent!
>
> >
> > > Instead of repeating clk_bulk_disable and return ret for each failure,
> > > we can initialize ret = 0, have a goto: out pointing to
> > > clk_bulk_disable, and change return 0 to return ret at the end.
> >
> > Right now there is only a single clk_bulk_disable() in an error
> > case, so I did not use the typical error goto chain. I suppose
> > it makes a lot more sense with proper error handling for the calls
> > to rockchip_pmu_set_idle_request().
>
> If you'd like, I can base my v2 on this patch series with the changes
> I'm suggesting?

I leave you guys to decide the best way forward, but please keep in
mind that fixes/stable patches are easier managed if they are as
simple as possible and without relying on cleanup patches. Better fix
the problem first, then clean up the code.

Kind regards
Uffe
Sebastian Reichel Dec. 12, 2024, 7:13 p.m. UTC | #5
Hi,

On Thu, Dec 12, 2024 at 12:26:42PM +0100, Ulf Hansson wrote:
> On Thu, 12 Dec 2024 at 00:11, Peter Geis <pgwipeout@gmail.com> wrote:
> > On Wed, Dec 11, 2024 at 3:46 PM Sebastian Reichel
> > <sebastian.reichel@collabora.com> wrote:
> > > On Wed, Dec 11, 2024 at 02:53:34PM -0500, Peter Geis wrote:
> > > > On Wed, Dec 11, 2024 at 9:32 AM Sebastian Reichel
> > > > <sebastian.reichel@collabora.com> wrote:
> > > > >
> > > > > Currently rockchip_do_pmu_set_power_domain prints a warning if there
> > > > > have been errors turning on the power domain, but it does not return
> > > > > any errors and rockchip_pd_power() tries to continue setting up the
> > > > > QOS registers. This usually results in accessing unpowered registers,
> > > > > which triggers an SError and a full system hang.
> > > > >
> > > > > This improves the error handling by forwarding the error to avoid
> > > > > kernel panics.
> > > >
> > > > I think we should merge your patch here with my patch for returning
> > > > errors from rockchip_pmu_set_idle_request [1].
> > >
> > > I will have a look.
> > >
> > > > > Reviewed-by: Heiko Stuebner <heiko@sntech.de>
> > > > > Tested-by: Heiko Stuebner <heiko@sntech.de>
> > > > > Tested-by: Adrian Larumbe <adrian.larumbe@collabora.com> # On Rock 5B
> > > > > Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
> > > > > ---
> > > > >  drivers/pmdomain/rockchip/pm-domains.c | 34 +++++++++++++++++---------
> > > > >  1 file changed, 22 insertions(+), 12 deletions(-)
> > > > >
> > > > > diff --git a/drivers/pmdomain/rockchip/pm-domains.c b/drivers/pmdomain/rockchip/pm-domains.c
> > > > > index a161ee13c633..8f440f2883db 100644
> > > > > --- a/drivers/pmdomain/rockchip/pm-domains.c
> > > > > +++ b/drivers/pmdomain/rockchip/pm-domains.c
> > > > > @@ -533,16 +533,17 @@ static int rockchip_pmu_domain_mem_reset(struct rockchip_pm_domain *pd)
> > > > >         return ret;
> > > > >  }
> > > > >
> > > > > -static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > > > > -                                            bool on)
> > > > > +static int rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > > > > +                                           bool on)
> > > > >  {
> > > > >         struct rockchip_pmu *pmu = pd->pmu;
> > > > >         struct generic_pm_domain *genpd = &pd->genpd;
> > > > >         u32 pd_pwr_offset = pd->info->pwr_offset;
> > > > >         bool is_on, is_mem_on = false;
> > > > > +       int ret;
> > > > >
> > > > >         if (pd->info->pwr_mask == 0)
> > > > > -               return;
> > > > > +               return 0;
> > > > >
> > > > >         if (on && pd->info->mem_status_mask)
> > > > >                 is_mem_on = rockchip_pmu_domain_is_mem_on(pd);
> > > > > @@ -557,16 +558,21 @@ static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > > > >
> > > > >         wmb();
> > > > >
> > > > > -       if (is_mem_on && rockchip_pmu_domain_mem_reset(pd))
> > > > > -               return;
> > > > > +       if (is_mem_on) {
> > > > > +               ret = rockchip_pmu_domain_mem_reset(pd);
> > > > > +               if (ret)
> > > > > +                       return ret;
> > > > > +       }
> > > > >
> > > > > -       if (readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> > > > > -                                     is_on == on, 0, 10000)) {
> > > > > -               dev_err(pmu->dev,
> > > > > -                       "failed to set domain '%s', val=%d\n",
> > > > > -                       genpd->name, is_on);
> > > > > -               return;
> > > > > +       ret = readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> > > > > +                                       is_on == on, 0, 10000);
> > > > > +       if (ret) {
> > > > > +               dev_err(pmu->dev, "failed to set domain '%s' %s, val=%d\n",
> > > > > +                       genpd->name, on ? "on" : "off", is_on);
> > > > > +               return ret;
> > > > >         }
> > > > > +
> > > > > +       return 0;
> > > > >  }
> > > > >
> > > > >  static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
> > > > > @@ -592,7 +598,11 @@ static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
> > > > >                         rockchip_pmu_set_idle_request(pd, true);
> > > > >                 }
> > > > >
> > > > > -               rockchip_do_pmu_set_power_domain(pd, power_on);
> > > > > +               ret = rockchip_do_pmu_set_power_domain(pd, power_on);
> > > > > +               if (ret < 0) {
> > > > > +                       clk_bulk_disable(pd->num_clks, pd->clks);
> > > > > +                       return ret;
> > > >
> > > > Looking at it, we shouldn't return directly from here because the
> > > > mutex never gets unlocked.
> > >
> > > Yes, we should do that after patch 2/7 from this series :)
> >
> > That's excellent!
> >
> > >
> > > > Instead of repeating clk_bulk_disable and return ret for each failure,
> > > > we can initialize ret = 0, have a goto: out pointing to
> > > > clk_bulk_disable, and change return 0 to return ret at the end.
> > >
> > > Right now there is only a single clk_bulk_disable() in an error
> > > case, so I did not use the typical error goto chain. I suppose
> > > it makes a lot more sense with proper error handling for the calls
> > > to rockchip_pmu_set_idle_request().
> >
> > If you'd like, I can base my v2 on this patch series with the changes
> > I'm suggesting?
> 
> I leave you guys to decide the best way forward, but please keep in
> mind that fixes/stable patches are easier managed if they are as
> simple as possible and without relying on cleanup patches. Better fix
> the problem first, then clean up the code.

I had this ordered the other way around initially and as Heiko
pointed out that makes things more complicated overall:

https://lore.kernel.org/linux-rockchip/4864529.A9s0UXYOmP@diego/

Greetings,

-- Sebastian
Ulf Hansson Dec. 19, 2024, 1:54 p.m. UTC | #6
On Thu, 12 Dec 2024 at 20:14, Sebastian Reichel
<sebastian.reichel@collabora.com> wrote:
>
> Hi,
>
> On Thu, Dec 12, 2024 at 12:26:42PM +0100, Ulf Hansson wrote:
> > On Thu, 12 Dec 2024 at 00:11, Peter Geis <pgwipeout@gmail.com> wrote:
> > > On Wed, Dec 11, 2024 at 3:46 PM Sebastian Reichel
> > > <sebastian.reichel@collabora.com> wrote:
> > > > On Wed, Dec 11, 2024 at 02:53:34PM -0500, Peter Geis wrote:
> > > > > On Wed, Dec 11, 2024 at 9:32 AM Sebastian Reichel
> > > > > <sebastian.reichel@collabora.com> wrote:
> > > > > >
> > > > > > Currently rockchip_do_pmu_set_power_domain prints a warning if there
> > > > > > have been errors turning on the power domain, but it does not return
> > > > > > any errors and rockchip_pd_power() tries to continue setting up the
> > > > > > QOS registers. This usually results in accessing unpowered registers,
> > > > > > which triggers an SError and a full system hang.
> > > > > >
> > > > > > This improves the error handling by forwarding the error to avoid
> > > > > > kernel panics.
> > > > >
> > > > > I think we should merge your patch here with my patch for returning
> > > > > errors from rockchip_pmu_set_idle_request [1].
> > > >
> > > > I will have a look.
> > > >
> > > > > > Reviewed-by: Heiko Stuebner <heiko@sntech.de>
> > > > > > Tested-by: Heiko Stuebner <heiko@sntech.de>
> > > > > > Tested-by: Adrian Larumbe <adrian.larumbe@collabora.com> # On Rock 5B
> > > > > > Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
> > > > > > ---
> > > > > >  drivers/pmdomain/rockchip/pm-domains.c | 34 +++++++++++++++++---------
> > > > > >  1 file changed, 22 insertions(+), 12 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/pmdomain/rockchip/pm-domains.c b/drivers/pmdomain/rockchip/pm-domains.c
> > > > > > index a161ee13c633..8f440f2883db 100644
> > > > > > --- a/drivers/pmdomain/rockchip/pm-domains.c
> > > > > > +++ b/drivers/pmdomain/rockchip/pm-domains.c
> > > > > > @@ -533,16 +533,17 @@ static int rockchip_pmu_domain_mem_reset(struct rockchip_pm_domain *pd)
> > > > > >         return ret;
> > > > > >  }
> > > > > >
> > > > > > -static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > > > > > -                                            bool on)
> > > > > > +static int rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > > > > > +                                           bool on)
> > > > > >  {
> > > > > >         struct rockchip_pmu *pmu = pd->pmu;
> > > > > >         struct generic_pm_domain *genpd = &pd->genpd;
> > > > > >         u32 pd_pwr_offset = pd->info->pwr_offset;
> > > > > >         bool is_on, is_mem_on = false;
> > > > > > +       int ret;
> > > > > >
> > > > > >         if (pd->info->pwr_mask == 0)
> > > > > > -               return;
> > > > > > +               return 0;
> > > > > >
> > > > > >         if (on && pd->info->mem_status_mask)
> > > > > >                 is_mem_on = rockchip_pmu_domain_is_mem_on(pd);
> > > > > > @@ -557,16 +558,21 @@ static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
> > > > > >
> > > > > >         wmb();
> > > > > >
> > > > > > -       if (is_mem_on && rockchip_pmu_domain_mem_reset(pd))
> > > > > > -               return;
> > > > > > +       if (is_mem_on) {
> > > > > > +               ret = rockchip_pmu_domain_mem_reset(pd);
> > > > > > +               if (ret)
> > > > > > +                       return ret;
> > > > > > +       }
> > > > > >
> > > > > > -       if (readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> > > > > > -                                     is_on == on, 0, 10000)) {
> > > > > > -               dev_err(pmu->dev,
> > > > > > -                       "failed to set domain '%s', val=%d\n",
> > > > > > -                       genpd->name, is_on);
> > > > > > -               return;
> > > > > > +       ret = readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
> > > > > > +                                       is_on == on, 0, 10000);
> > > > > > +       if (ret) {
> > > > > > +               dev_err(pmu->dev, "failed to set domain '%s' %s, val=%d\n",
> > > > > > +                       genpd->name, on ? "on" : "off", is_on);
> > > > > > +               return ret;
> > > > > >         }
> > > > > > +
> > > > > > +       return 0;
> > > > > >  }
> > > > > >
> > > > > >  static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
> > > > > > @@ -592,7 +598,11 @@ static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
> > > > > >                         rockchip_pmu_set_idle_request(pd, true);
> > > > > >                 }
> > > > > >
> > > > > > -               rockchip_do_pmu_set_power_domain(pd, power_on);
> > > > > > +               ret = rockchip_do_pmu_set_power_domain(pd, power_on);
> > > > > > +               if (ret < 0) {
> > > > > > +                       clk_bulk_disable(pd->num_clks, pd->clks);
> > > > > > +                       return ret;
> > > > >
> > > > > Looking at it, we shouldn't return directly from here because the
> > > > > mutex never gets unlocked.
> > > >
> > > > Yes, we should do that after patch 2/7 from this series :)
> > >
> > > That's excellent!
> > >
> > > >
> > > > > Instead of repeating clk_bulk_disable and return ret for each failure,
> > > > > we can initialize ret = 0, have a goto: out pointing to
> > > > > clk_bulk_disable, and change return 0 to return ret at the end.
> > > >
> > > > Right now there is only a single clk_bulk_disable() in an error
> > > > case, so I did not use the typical error goto chain. I suppose
> > > > it makes a lot more sense with proper error handling for the calls
> > > > to rockchip_pmu_set_idle_request().
> > >
> > > If you'd like, I can base my v2 on this patch series with the changes
> > > I'm suggesting?
> >
> > I leave you guys to decide the best way forward, but please keep in
> > mind that fixes/stable patches are easier managed if they are as
> > simple as possible and without relying on cleanup patches. Better fix
> > the problem first, then clean up the code.
>
> I had this ordered the other way around initially and as Heiko
> pointed out that makes things more complicated overall:
>
> https://lore.kernel.org/linux-rockchip/4864529.A9s0UXYOmP@diego/

Right, I have no strong opinions, but leave the decision to you.

Still, we need confirmation on the regulator-patch (patch1) from Mark,
before I can apply any of this.

Kind regards
Uffe
diff mbox series

Patch

diff --git a/drivers/pmdomain/rockchip/pm-domains.c b/drivers/pmdomain/rockchip/pm-domains.c
index a161ee13c633..8f440f2883db 100644
--- a/drivers/pmdomain/rockchip/pm-domains.c
+++ b/drivers/pmdomain/rockchip/pm-domains.c
@@ -533,16 +533,17 @@  static int rockchip_pmu_domain_mem_reset(struct rockchip_pm_domain *pd)
 	return ret;
 }
 
-static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
-					     bool on)
+static int rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
+					    bool on)
 {
 	struct rockchip_pmu *pmu = pd->pmu;
 	struct generic_pm_domain *genpd = &pd->genpd;
 	u32 pd_pwr_offset = pd->info->pwr_offset;
 	bool is_on, is_mem_on = false;
+	int ret;
 
 	if (pd->info->pwr_mask == 0)
-		return;
+		return 0;
 
 	if (on && pd->info->mem_status_mask)
 		is_mem_on = rockchip_pmu_domain_is_mem_on(pd);
@@ -557,16 +558,21 @@  static void rockchip_do_pmu_set_power_domain(struct rockchip_pm_domain *pd,
 
 	wmb();
 
-	if (is_mem_on && rockchip_pmu_domain_mem_reset(pd))
-		return;
+	if (is_mem_on) {
+		ret = rockchip_pmu_domain_mem_reset(pd);
+		if (ret)
+			return ret;
+	}
 
-	if (readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
-				      is_on == on, 0, 10000)) {
-		dev_err(pmu->dev,
-			"failed to set domain '%s', val=%d\n",
-			genpd->name, is_on);
-		return;
+	ret = readx_poll_timeout_atomic(rockchip_pmu_domain_is_on, pd, is_on,
+					is_on == on, 0, 10000);
+	if (ret) {
+		dev_err(pmu->dev, "failed to set domain '%s' %s, val=%d\n",
+			genpd->name, on ? "on" : "off", is_on);
+		return ret;
 	}
+
+	return 0;
 }
 
 static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
@@ -592,7 +598,11 @@  static int rockchip_pd_power(struct rockchip_pm_domain *pd, bool power_on)
 			rockchip_pmu_set_idle_request(pd, true);
 		}
 
-		rockchip_do_pmu_set_power_domain(pd, power_on);
+		ret = rockchip_do_pmu_set_power_domain(pd, power_on);
+		if (ret < 0) {
+			clk_bulk_disable(pd->num_clks, pd->clks);
+			return ret;
+		}
 
 		if (power_on) {
 			/* if powering up, leave idle mode */