diff mbox series

drm/lima: Convert to clk_bulk API

Message ID 20210716182051.218575-1-marex@denx.de (mailing list archive)
State New, archived
Headers show
Series drm/lima: Convert to clk_bulk API | expand

Commit Message

Marek Vasut July 16, 2021, 6:20 p.m. UTC
Instead of requesting two separate clock and then handling them
separately in various places of the driver, use clk_bulk_*() API.
This permits handling devices with more than "bus"/"core" clock,
like ZynqMP, which has "gpu"/"gpu_pp0"/"gpu_pp1" all as separate
clock.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Qiang Yu <yuq825@gmail.com>
Cc: lima@lists.freedesktop.org
---
 drivers/gpu/drm/lima/lima_devfreq.c | 17 +++++++++---
 drivers/gpu/drm/lima/lima_devfreq.h |  1 +
 drivers/gpu/drm/lima/lima_device.c  | 42 +++++++++++------------------
 drivers/gpu/drm/lima/lima_device.h  |  4 +--
 4 files changed, 32 insertions(+), 32 deletions(-)

Comments

Qiang Yu July 17, 2021, 12:34 p.m. UTC | #1
On Sat, Jul 17, 2021 at 2:20 AM Marek Vasut <marex@denx.de> wrote:
>
> Instead of requesting two separate clock and then handling them
> separately in various places of the driver, use clk_bulk_*() API.
> This permits handling devices with more than "bus"/"core" clock,
> like ZynqMP, which has "gpu"/"gpu_pp0"/"gpu_pp1" all as separate
> clock.

I can't find the ZynqMP DTS file under arch/arm64/boot/dts/xilinx
which has mali GPU node with an upstream kernel, where is it?

So what's the relationship between "gpu" clk and "gpu_pp0"/"gpu_pp1"
clk? Do they need to be controlled separately or we can just control the
"gpu" clk? Because the devfreq code just controls a single module clk.

>
> Signed-off-by: Marek Vasut <marex@denx.de>
> Cc: Qiang Yu <yuq825@gmail.com>
> Cc: lima@lists.freedesktop.org
> ---
>  drivers/gpu/drm/lima/lima_devfreq.c | 17 +++++++++---
>  drivers/gpu/drm/lima/lima_devfreq.h |  1 +
>  drivers/gpu/drm/lima/lima_device.c  | 42 +++++++++++------------------
>  drivers/gpu/drm/lima/lima_device.h  |  4 +--
>  4 files changed, 32 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c
> index 8989e215dfc9..533b36932f79 100644
> --- a/drivers/gpu/drm/lima/lima_devfreq.c
> +++ b/drivers/gpu/drm/lima/lima_devfreq.c
> @@ -58,7 +58,7 @@ static int lima_devfreq_get_dev_status(struct device *dev,
>         struct lima_devfreq *devfreq = &ldev->devfreq;
>         unsigned long irqflags;
>
> -       status->current_frequency = clk_get_rate(ldev->clk_gpu);
> +       status->current_frequency = clk_get_rate(devfreq->clk_gpu);
>
>         spin_lock_irqsave(&devfreq->lock, irqflags);
>
> @@ -110,12 +110,23 @@ int lima_devfreq_init(struct lima_device *ldev)
>         struct lima_devfreq *ldevfreq = &ldev->devfreq;
>         struct dev_pm_opp *opp;
>         unsigned long cur_freq;
> -       int ret;
> +       int i, ret;
>
>         if (!device_property_present(dev, "operating-points-v2"))
>                 /* Optional, continue without devfreq */
>                 return 0;
>
> +       /* Find first clock which are not "bus" clock */
> +       for (i = 0; i < ldev->nr_clks; i++) {
> +               if (!strcmp(ldev->clks[i].id, "bus"))
> +                       continue;
> +               ldevfreq->clk_gpu = ldev->clks[i].clk;
> +               break;
> +       }

I'd prefer an explicit name for the required clk name. If some DTS has different
name other than "core" for the module clk (ie. "gpu"), it should be changed to
"core".

> +
> +       if (!ldevfreq->clk_gpu)
> +               return -ENODEV;
> +
>         spin_lock_init(&ldevfreq->lock);
>
>         ret = devm_pm_opp_set_clkname(dev, "core");
> @@ -135,7 +146,7 @@ int lima_devfreq_init(struct lima_device *ldev)
>
>         lima_devfreq_reset(ldevfreq);
>
> -       cur_freq = clk_get_rate(ldev->clk_gpu);
> +       cur_freq = clk_get_rate(ldevfreq->clk_gpu);
>
>         opp = devfreq_recommended_opp(dev, &cur_freq, 0);
>         if (IS_ERR(opp))
> diff --git a/drivers/gpu/drm/lima/lima_devfreq.h b/drivers/gpu/drm/lima/lima_devfreq.h
> index b8e50feaeab6..ffef5c91795d 100644
> --- a/drivers/gpu/drm/lima/lima_devfreq.h
> +++ b/drivers/gpu/drm/lima/lima_devfreq.h
> @@ -17,6 +17,7 @@ struct lima_devfreq {
>         struct devfreq *devfreq;
>         struct thermal_cooling_device *cooling;
>         struct devfreq_simple_ondemand_data gov_data;
> +       struct clk *clk_gpu;
>
>         ktime_t busy_time;
>         ktime_t idle_time;
> diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c
> index 65fdca366e41..9f7bde7e9d22 100644
> --- a/drivers/gpu/drm/lima/lima_device.c
> +++ b/drivers/gpu/drm/lima/lima_device.c
> @@ -85,29 +85,23 @@ static int lima_clk_enable(struct lima_device *dev)
>  {
>         int err;
>
> -       err = clk_prepare_enable(dev->clk_bus);
> +       err = clk_bulk_prepare_enable(dev->nr_clks, dev->clks);
>         if (err)
>                 return err;
>
> -       err = clk_prepare_enable(dev->clk_gpu);
> -       if (err)
> -               goto error_out0;
> -
>         if (dev->reset) {
>                 err = reset_control_deassert(dev->reset);
>                 if (err) {
>                         dev_err(dev->dev,
>                                 "reset controller deassert failed %d\n", err);
> -                       goto error_out1;
> +                       goto error;
>                 }
>         }
>
>         return 0;
>
> -error_out1:
> -       clk_disable_unprepare(dev->clk_gpu);
> -error_out0:
> -       clk_disable_unprepare(dev->clk_bus);
> +error:
> +       clk_bulk_disable_unprepare(dev->nr_clks, dev->clks);
>         return err;
>  }
>
> @@ -115,31 +109,23 @@ static void lima_clk_disable(struct lima_device *dev)
>  {
>         if (dev->reset)
>                 reset_control_assert(dev->reset);
> -       clk_disable_unprepare(dev->clk_gpu);
> -       clk_disable_unprepare(dev->clk_bus);
> +       clk_bulk_disable_unprepare(dev->nr_clks, dev->clks);
>  }
>
>  static int lima_clk_init(struct lima_device *dev)
>  {
>         int err;
>
> -       dev->clk_bus = devm_clk_get(dev->dev, "bus");
> -       if (IS_ERR(dev->clk_bus)) {
> -               err = PTR_ERR(dev->clk_bus);
> +       err = devm_clk_bulk_get_all(dev->dev, &dev->clks);
> +       if (err < 1) {
> +               if (err == 0)   /* No clock at all is an error too */
> +                       err = -ENODEV;
>                 if (err != -EPROBE_DEFER)
> -                       dev_err(dev->dev, "get bus clk failed %d\n", err);
> -               dev->clk_bus = NULL;
> +                       dev_err(dev->dev, "get clk failed %d\n", err);
>                 return err;
>         }
>
> -       dev->clk_gpu = devm_clk_get(dev->dev, "core");
> -       if (IS_ERR(dev->clk_gpu)) {
> -               err = PTR_ERR(dev->clk_gpu);
> -               if (err != -EPROBE_DEFER)
> -                       dev_err(dev->dev, "get core clk failed %d\n", err);
> -               dev->clk_gpu = NULL;
> -               return err;
> -       }
> +       dev->nr_clks = err;
>
>         dev->reset = devm_reset_control_array_get_optional_shared(dev->dev);
>         if (IS_ERR(dev->reset)) {
> @@ -412,8 +398,10 @@ int lima_device_init(struct lima_device *ldev)
>         INIT_LIST_HEAD(&ldev->error_task_list);
>         mutex_init(&ldev->error_task_list_lock);
>
> -       dev_info(ldev->dev, "bus rate = %lu\n", clk_get_rate(ldev->clk_bus));
> -       dev_info(ldev->dev, "mod rate = %lu", clk_get_rate(ldev->clk_gpu));
> +       for (i = 0; i < ldev->nr_clks; i++) {
> +               dev_info(ldev->dev, "clk %s = %lu Hz\n", ldev->clks[i].id,
> +                        clk_get_rate(ldev->clks[i].clk));
> +       }
>
>         return 0;
>
> diff --git a/drivers/gpu/drm/lima/lima_device.h b/drivers/gpu/drm/lima/lima_device.h
> index 41b9d7b4bcc7..de11570c9903 100644
> --- a/drivers/gpu/drm/lima/lima_device.h
> +++ b/drivers/gpu/drm/lima/lima_device.h
> @@ -85,8 +85,8 @@ struct lima_device {
>         int num_pp;
>
>         void __iomem *iomem;
> -       struct clk *clk_bus;
> -       struct clk *clk_gpu;
> +       struct clk_bulk_data *clks;
> +       int nr_clks;
>         struct reset_control *reset;
>         struct regulator *regulator;
>
> --
> 2.30.2
>
Marek Vasut July 17, 2021, 1:08 p.m. UTC | #2
On 7/17/21 2:34 PM, Qiang Yu wrote:
> On Sat, Jul 17, 2021 at 2:20 AM Marek Vasut <marex@denx.de> wrote:
>>
>> Instead of requesting two separate clock and then handling them
>> separately in various places of the driver, use clk_bulk_*() API.
>> This permits handling devices with more than "bus"/"core" clock,
>> like ZynqMP, which has "gpu"/"gpu_pp0"/"gpu_pp1" all as separate
>> clock.
> 
> I can't find the ZynqMP DTS file under arch/arm64/boot/dts/xilinx
> which has mali GPU node with an upstream kernel, where is it?

Posted here:
https://patchwork.kernel.org/project/linux-arm-kernel/patch/20210716182544.219490-1-marex@denx.de/

> So what's the relationship between "gpu" clk and "gpu_pp0"/"gpu_pp1"
> clk? Do they need to be controlled separately or we can just control the
> "gpu" clk? Because the devfreq code just controls a single module clk.

Per the docs, they are separate enable bits and the zynqmp clock 
controller exports them as separate clock, see bits 24..26 here:

https://www.xilinx.com/html_docs/registers/ug1087/crf_apb___gpu_ref_ctrl.html

>> Signed-off-by: Marek Vasut <marex@denx.de>
>> Cc: Qiang Yu <yuq825@gmail.com>
>> Cc: lima@lists.freedesktop.org
>> ---
>>   drivers/gpu/drm/lima/lima_devfreq.c | 17 +++++++++---
>>   drivers/gpu/drm/lima/lima_devfreq.h |  1 +
>>   drivers/gpu/drm/lima/lima_device.c  | 42 +++++++++++------------------
>>   drivers/gpu/drm/lima/lima_device.h  |  4 +--
>>   4 files changed, 32 insertions(+), 32 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c
>> index 8989e215dfc9..533b36932f79 100644
>> --- a/drivers/gpu/drm/lima/lima_devfreq.c
>> +++ b/drivers/gpu/drm/lima/lima_devfreq.c
>> @@ -58,7 +58,7 @@ static int lima_devfreq_get_dev_status(struct device *dev,
>>          struct lima_devfreq *devfreq = &ldev->devfreq;
>>          unsigned long irqflags;
>>
>> -       status->current_frequency = clk_get_rate(ldev->clk_gpu);
>> +       status->current_frequency = clk_get_rate(devfreq->clk_gpu);
>>
>>          spin_lock_irqsave(&devfreq->lock, irqflags);
>>
>> @@ -110,12 +110,23 @@ int lima_devfreq_init(struct lima_device *ldev)
>>          struct lima_devfreq *ldevfreq = &ldev->devfreq;
>>          struct dev_pm_opp *opp;
>>          unsigned long cur_freq;
>> -       int ret;
>> +       int i, ret;
>>
>>          if (!device_property_present(dev, "operating-points-v2"))
>>                  /* Optional, continue without devfreq */
>>                  return 0;
>>
>> +       /* Find first clock which are not "bus" clock */
>> +       for (i = 0; i < ldev->nr_clks; i++) {
>> +               if (!strcmp(ldev->clks[i].id, "bus"))
>> +                       continue;
>> +               ldevfreq->clk_gpu = ldev->clks[i].clk;
>> +               break;
>> +       }
> 
> I'd prefer an explicit name for the required clk name. If some DTS has different
> name other than "core" for the module clk (ie. "gpu"), it should be changed to
> "core".

The problem here is, the zynqmp has no core clock, it has "gpu and both 
pixel pipes" super-clock-gate which controls everything, and then 
per-pixel-pipe sub-clock-gates.
Qiang Yu July 17, 2021, 2:21 p.m. UTC | #3
On Sat, Jul 17, 2021 at 9:08 PM Marek Vasut <marex@denx.de> wrote:
>
> On 7/17/21 2:34 PM, Qiang Yu wrote:
> > On Sat, Jul 17, 2021 at 2:20 AM Marek Vasut <marex@denx.de> wrote:
> >>
> >> Instead of requesting two separate clock and then handling them
> >> separately in various places of the driver, use clk_bulk_*() API.
> >> This permits handling devices with more than "bus"/"core" clock,
> >> like ZynqMP, which has "gpu"/"gpu_pp0"/"gpu_pp1" all as separate
> >> clock.
> >
> > I can't find the ZynqMP DTS file under arch/arm64/boot/dts/xilinx
> > which has mali GPU node with an upstream kernel, where is it?
>
> Posted here:
> https://patchwork.kernel.org/project/linux-arm-kernel/patch/20210716182544.219490-1-marex@denx.de/
>
> > So what's the relationship between "gpu" clk and "gpu_pp0"/"gpu_pp1"
> > clk? Do they need to be controlled separately or we can just control the
> > "gpu" clk? Because the devfreq code just controls a single module clk.
>
> Per the docs, they are separate enable bits and the zynqmp clock
> controller exports them as separate clock, see bits 24..26 here:
>
> https://www.xilinx.com/html_docs/registers/ug1087/crf_apb___gpu_ref_ctrl.html
>
> >> Signed-off-by: Marek Vasut <marex@denx.de>
> >> Cc: Qiang Yu <yuq825@gmail.com>
> >> Cc: lima@lists.freedesktop.org
> >> ---
> >>   drivers/gpu/drm/lima/lima_devfreq.c | 17 +++++++++---
> >>   drivers/gpu/drm/lima/lima_devfreq.h |  1 +
> >>   drivers/gpu/drm/lima/lima_device.c  | 42 +++++++++++------------------
> >>   drivers/gpu/drm/lima/lima_device.h  |  4 +--
> >>   4 files changed, 32 insertions(+), 32 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c
> >> index 8989e215dfc9..533b36932f79 100644
> >> --- a/drivers/gpu/drm/lima/lima_devfreq.c
> >> +++ b/drivers/gpu/drm/lima/lima_devfreq.c
> >> @@ -58,7 +58,7 @@ static int lima_devfreq_get_dev_status(struct device *dev,
> >>          struct lima_devfreq *devfreq = &ldev->devfreq;
> >>          unsigned long irqflags;
> >>
> >> -       status->current_frequency = clk_get_rate(ldev->clk_gpu);
> >> +       status->current_frequency = clk_get_rate(devfreq->clk_gpu);
> >>
> >>          spin_lock_irqsave(&devfreq->lock, irqflags);
> >>
> >> @@ -110,12 +110,23 @@ int lima_devfreq_init(struct lima_device *ldev)
> >>          struct lima_devfreq *ldevfreq = &ldev->devfreq;
> >>          struct dev_pm_opp *opp;
> >>          unsigned long cur_freq;
> >> -       int ret;
> >> +       int i, ret;
> >>
> >>          if (!device_property_present(dev, "operating-points-v2"))
> >>                  /* Optional, continue without devfreq */
> >>                  return 0;
> >>
> >> +       /* Find first clock which are not "bus" clock */
> >> +       for (i = 0; i < ldev->nr_clks; i++) {
> >> +               if (!strcmp(ldev->clks[i].id, "bus"))
> >> +                       continue;
> >> +               ldevfreq->clk_gpu = ldev->clks[i].clk;
> >> +               break;
> >> +       }
> >
> > I'd prefer an explicit name for the required clk name. If some DTS has different
> > name other than "core" for the module clk (ie. "gpu"), it should be changed to
> > "core".
>
> The problem here is, the zynqmp has no core clock, it has "gpu and both
> pixel pipes" super-clock-gate which controls everything, and then
> per-pixel-pipe sub-clock-gates.

So the "gpu" clk can gate both "gpu_pp0" and "gpu_pp1" clk, how about frequency?
Can we set clock rate for "gpu" then "gpu_pp0" and "gpu_pp1" pass
through the same
rate? If so, "gpu" works just like "core".
Marek Vasut July 17, 2021, 2:52 p.m. UTC | #4
On 7/17/21 4:21 PM, Qiang Yu wrote:
> On Sat, Jul 17, 2021 at 9:08 PM Marek Vasut <marex@denx.de> wrote:
>>
>> On 7/17/21 2:34 PM, Qiang Yu wrote:
>>> On Sat, Jul 17, 2021 at 2:20 AM Marek Vasut <marex@denx.de> wrote:
>>>>
>>>> Instead of requesting two separate clock and then handling them
>>>> separately in various places of the driver, use clk_bulk_*() API.
>>>> This permits handling devices with more than "bus"/"core" clock,
>>>> like ZynqMP, which has "gpu"/"gpu_pp0"/"gpu_pp1" all as separate
>>>> clock.
>>>
>>> I can't find the ZynqMP DTS file under arch/arm64/boot/dts/xilinx
>>> which has mali GPU node with an upstream kernel, where is it?
>>
>> Posted here:
>> https://patchwork.kernel.org/project/linux-arm-kernel/patch/20210716182544.219490-1-marex@denx.de/
>>
>>> So what's the relationship between "gpu" clk and "gpu_pp0"/"gpu_pp1"
>>> clk? Do they need to be controlled separately or we can just control the
>>> "gpu" clk? Because the devfreq code just controls a single module clk.
>>
>> Per the docs, they are separate enable bits and the zynqmp clock
>> controller exports them as separate clock, see bits 24..26 here:
>>
>> https://www.xilinx.com/html_docs/registers/ug1087/crf_apb___gpu_ref_ctrl.html
>>
>>>> Signed-off-by: Marek Vasut <marex@denx.de>
>>>> Cc: Qiang Yu <yuq825@gmail.com>
>>>> Cc: lima@lists.freedesktop.org
>>>> ---
>>>>    drivers/gpu/drm/lima/lima_devfreq.c | 17 +++++++++---
>>>>    drivers/gpu/drm/lima/lima_devfreq.h |  1 +
>>>>    drivers/gpu/drm/lima/lima_device.c  | 42 +++++++++++------------------
>>>>    drivers/gpu/drm/lima/lima_device.h  |  4 +--
>>>>    4 files changed, 32 insertions(+), 32 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c
>>>> index 8989e215dfc9..533b36932f79 100644
>>>> --- a/drivers/gpu/drm/lima/lima_devfreq.c
>>>> +++ b/drivers/gpu/drm/lima/lima_devfreq.c
>>>> @@ -58,7 +58,7 @@ static int lima_devfreq_get_dev_status(struct device *dev,
>>>>           struct lima_devfreq *devfreq = &ldev->devfreq;
>>>>           unsigned long irqflags;
>>>>
>>>> -       status->current_frequency = clk_get_rate(ldev->clk_gpu);
>>>> +       status->current_frequency = clk_get_rate(devfreq->clk_gpu);
>>>>
>>>>           spin_lock_irqsave(&devfreq->lock, irqflags);
>>>>
>>>> @@ -110,12 +110,23 @@ int lima_devfreq_init(struct lima_device *ldev)
>>>>           struct lima_devfreq *ldevfreq = &ldev->devfreq;
>>>>           struct dev_pm_opp *opp;
>>>>           unsigned long cur_freq;
>>>> -       int ret;
>>>> +       int i, ret;
>>>>
>>>>           if (!device_property_present(dev, "operating-points-v2"))
>>>>                   /* Optional, continue without devfreq */
>>>>                   return 0;
>>>>
>>>> +       /* Find first clock which are not "bus" clock */
>>>> +       for (i = 0; i < ldev->nr_clks; i++) {
>>>> +               if (!strcmp(ldev->clks[i].id, "bus"))
>>>> +                       continue;
>>>> +               ldevfreq->clk_gpu = ldev->clks[i].clk;
>>>> +               break;
>>>> +       }
>>>
>>> I'd prefer an explicit name for the required clk name. If some DTS has different
>>> name other than "core" for the module clk (ie. "gpu"), it should be changed to
>>> "core".
>>
>> The problem here is, the zynqmp has no core clock, it has "gpu and both
>> pixel pipes" super-clock-gate which controls everything, and then
>> per-pixel-pipe sub-clock-gates.
> 
> So the "gpu" clk can gate both "gpu_pp0" and "gpu_pp1" clk, how about frequency?

I don't think it is a good idea to just gate off the root clock while 
the sub-clock are still enabled. That might lead to latch ups (+CC 
Michal, he might know more).

And who would enable the sub-clock anyway, it should be the GPU driver, no?

> Can we set clock rate for "gpu" then "gpu_pp0" and "gpu_pp1" pass
> through the same
> rate? If so, "gpu" works just like "core".

I don't think the zynqmp is capable of any DVFS on the GPU at all, it 
just runs at fixed frequency.
Qiang Yu July 18, 2021, 2:56 a.m. UTC | #5
On Sat, Jul 17, 2021 at 10:52 PM Marek Vasut <marex@denx.de> wrote:
>
> On 7/17/21 4:21 PM, Qiang Yu wrote:
> > On Sat, Jul 17, 2021 at 9:08 PM Marek Vasut <marex@denx.de> wrote:
> >>
> >> On 7/17/21 2:34 PM, Qiang Yu wrote:
> >>> On Sat, Jul 17, 2021 at 2:20 AM Marek Vasut <marex@denx.de> wrote:
> >>>>
> >>>> Instead of requesting two separate clock and then handling them
> >>>> separately in various places of the driver, use clk_bulk_*() API.
> >>>> This permits handling devices with more than "bus"/"core" clock,
> >>>> like ZynqMP, which has "gpu"/"gpu_pp0"/"gpu_pp1" all as separate
> >>>> clock.
> >>>
> >>> I can't find the ZynqMP DTS file under arch/arm64/boot/dts/xilinx
> >>> which has mali GPU node with an upstream kernel, where is it?
> >>
> >> Posted here:
> >> https://patchwork.kernel.org/project/linux-arm-kernel/patch/20210716182544.219490-1-marex@denx.de/
> >>
> >>> So what's the relationship between "gpu" clk and "gpu_pp0"/"gpu_pp1"
> >>> clk? Do they need to be controlled separately or we can just control the
> >>> "gpu" clk? Because the devfreq code just controls a single module clk.
> >>
> >> Per the docs, they are separate enable bits and the zynqmp clock
> >> controller exports them as separate clock, see bits 24..26 here:
> >>
> >> https://www.xilinx.com/html_docs/registers/ug1087/crf_apb___gpu_ref_ctrl.html
> >>
> >>>> Signed-off-by: Marek Vasut <marex@denx.de>
> >>>> Cc: Qiang Yu <yuq825@gmail.com>
> >>>> Cc: lima@lists.freedesktop.org
> >>>> ---
> >>>>    drivers/gpu/drm/lima/lima_devfreq.c | 17 +++++++++---
> >>>>    drivers/gpu/drm/lima/lima_devfreq.h |  1 +
> >>>>    drivers/gpu/drm/lima/lima_device.c  | 42 +++++++++++------------------
> >>>>    drivers/gpu/drm/lima/lima_device.h  |  4 +--
> >>>>    4 files changed, 32 insertions(+), 32 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c
> >>>> index 8989e215dfc9..533b36932f79 100644
> >>>> --- a/drivers/gpu/drm/lima/lima_devfreq.c
> >>>> +++ b/drivers/gpu/drm/lima/lima_devfreq.c
> >>>> @@ -58,7 +58,7 @@ static int lima_devfreq_get_dev_status(struct device *dev,
> >>>>           struct lima_devfreq *devfreq = &ldev->devfreq;
> >>>>           unsigned long irqflags;
> >>>>
> >>>> -       status->current_frequency = clk_get_rate(ldev->clk_gpu);
> >>>> +       status->current_frequency = clk_get_rate(devfreq->clk_gpu);
> >>>>
> >>>>           spin_lock_irqsave(&devfreq->lock, irqflags);
> >>>>
> >>>> @@ -110,12 +110,23 @@ int lima_devfreq_init(struct lima_device *ldev)
> >>>>           struct lima_devfreq *ldevfreq = &ldev->devfreq;
> >>>>           struct dev_pm_opp *opp;
> >>>>           unsigned long cur_freq;
> >>>> -       int ret;
> >>>> +       int i, ret;
> >>>>
> >>>>           if (!device_property_present(dev, "operating-points-v2"))
> >>>>                   /* Optional, continue without devfreq */
> >>>>                   return 0;
> >>>>
> >>>> +       /* Find first clock which are not "bus" clock */
> >>>> +       for (i = 0; i < ldev->nr_clks; i++) {
> >>>> +               if (!strcmp(ldev->clks[i].id, "bus"))
> >>>> +                       continue;
> >>>> +               ldevfreq->clk_gpu = ldev->clks[i].clk;
> >>>> +               break;
> >>>> +       }
> >>>
> >>> I'd prefer an explicit name for the required clk name. If some DTS has different
> >>> name other than "core" for the module clk (ie. "gpu"), it should be changed to
> >>> "core".
> >>
> >> The problem here is, the zynqmp has no core clock, it has "gpu and both
> >> pixel pipes" super-clock-gate which controls everything, and then
> >> per-pixel-pipe sub-clock-gates.
> >
> > So the "gpu" clk can gate both "gpu_pp0" and "gpu_pp1" clk, how about frequency?
>
> I don't think it is a good idea to just gate off the root clock while
> the sub-clock are still enabled. That might lead to latch ups (+CC
> Michal, he might know more).
>
> And who would enable the sub-clock anyway, it should be the GPU driver, no?
>
Right, I understand it's not proper either by HW or SW point of view to just
use root clk gate.

> > Can we set clock rate for "gpu" then "gpu_pp0" and "gpu_pp1" pass
> > through the same
> > rate? If so, "gpu" works just like "core".
>
> I don't think the zynqmp is capable of any DVFS on the GPU at all, it
> just runs at fixed frequency.

I see the GPU_REF_CTRL register 13:8 is a divisor, is this for all
"gpu"/"gpu_pp0"/"gpu_pp1" clk rating? If so, can we use it to dynamically
change the GPU clk freq because other SoC also use system clock
to do GPU DVFS, see sun8i-h3.dtsi. If we can't then zynqmp won't finish
lima_devfreq_init() and get here at all because it does not have
an OPP table.
Michal Simek July 19, 2021, 7:52 a.m. UTC | #6
On 7/18/21 4:56 AM, Qiang Yu wrote:
> On Sat, Jul 17, 2021 at 10:52 PM Marek Vasut <marex@denx.de> wrote:
>>
>> On 7/17/21 4:21 PM, Qiang Yu wrote:
>>> On Sat, Jul 17, 2021 at 9:08 PM Marek Vasut <marex@denx.de> wrote:
>>>>
>>>> On 7/17/21 2:34 PM, Qiang Yu wrote:
>>>>> On Sat, Jul 17, 2021 at 2:20 AM Marek Vasut <marex@denx.de> wrote:
>>>>>>
>>>>>> Instead of requesting two separate clock and then handling them
>>>>>> separately in various places of the driver, use clk_bulk_*() API.
>>>>>> This permits handling devices with more than "bus"/"core" clock,
>>>>>> like ZynqMP, which has "gpu"/"gpu_pp0"/"gpu_pp1" all as separate
>>>>>> clock.
>>>>>
>>>>> I can't find the ZynqMP DTS file under arch/arm64/boot/dts/xilinx
>>>>> which has mali GPU node with an upstream kernel, where is it?
>>>>
>>>> Posted here:
>>>> https://patchwork.kernel.org/project/linux-arm-kernel/patch/20210716182544.219490-1-marex@denx.de/
>>>>
>>>>> So what's the relationship between "gpu" clk and "gpu_pp0"/"gpu_pp1"
>>>>> clk? Do they need to be controlled separately or we can just control the
>>>>> "gpu" clk? Because the devfreq code just controls a single module clk.
>>>>
>>>> Per the docs, they are separate enable bits and the zynqmp clock
>>>> controller exports them as separate clock, see bits 24..26 here:
>>>>
>>>> https://www.xilinx.com/html_docs/registers/ug1087/crf_apb___gpu_ref_ctrl.html
>>>>
>>>>>> Signed-off-by: Marek Vasut <marex@denx.de>
>>>>>> Cc: Qiang Yu <yuq825@gmail.com>
>>>>>> Cc: lima@lists.freedesktop.org
>>>>>> ---
>>>>>>    drivers/gpu/drm/lima/lima_devfreq.c | 17 +++++++++---
>>>>>>    drivers/gpu/drm/lima/lima_devfreq.h |  1 +
>>>>>>    drivers/gpu/drm/lima/lima_device.c  | 42 +++++++++++------------------
>>>>>>    drivers/gpu/drm/lima/lima_device.h  |  4 +--
>>>>>>    4 files changed, 32 insertions(+), 32 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c
>>>>>> index 8989e215dfc9..533b36932f79 100644
>>>>>> --- a/drivers/gpu/drm/lima/lima_devfreq.c
>>>>>> +++ b/drivers/gpu/drm/lima/lima_devfreq.c
>>>>>> @@ -58,7 +58,7 @@ static int lima_devfreq_get_dev_status(struct device *dev,
>>>>>>           struct lima_devfreq *devfreq = &ldev->devfreq;
>>>>>>           unsigned long irqflags;
>>>>>>
>>>>>> -       status->current_frequency = clk_get_rate(ldev->clk_gpu);
>>>>>> +       status->current_frequency = clk_get_rate(devfreq->clk_gpu);
>>>>>>
>>>>>>           spin_lock_irqsave(&devfreq->lock, irqflags);
>>>>>>
>>>>>> @@ -110,12 +110,23 @@ int lima_devfreq_init(struct lima_device *ldev)
>>>>>>           struct lima_devfreq *ldevfreq = &ldev->devfreq;
>>>>>>           struct dev_pm_opp *opp;
>>>>>>           unsigned long cur_freq;
>>>>>> -       int ret;
>>>>>> +       int i, ret;
>>>>>>
>>>>>>           if (!device_property_present(dev, "operating-points-v2"))
>>>>>>                   /* Optional, continue without devfreq */
>>>>>>                   return 0;
>>>>>>
>>>>>> +       /* Find first clock which are not "bus" clock */
>>>>>> +       for (i = 0; i < ldev->nr_clks; i++) {
>>>>>> +               if (!strcmp(ldev->clks[i].id, "bus"))
>>>>>> +                       continue;
>>>>>> +               ldevfreq->clk_gpu = ldev->clks[i].clk;
>>>>>> +               break;
>>>>>> +       }
>>>>>
>>>>> I'd prefer an explicit name for the required clk name. If some DTS has different
>>>>> name other than "core" for the module clk (ie. "gpu"), it should be changed to
>>>>> "core".
>>>>
>>>> The problem here is, the zynqmp has no core clock, it has "gpu and both
>>>> pixel pipes" super-clock-gate which controls everything, and then
>>>> per-pixel-pipe sub-clock-gates.
>>>
>>> So the "gpu" clk can gate both "gpu_pp0" and "gpu_pp1" clk, how about frequency?
>>
>> I don't think it is a good idea to just gate off the root clock while
>> the sub-clock are still enabled. That might lead to latch ups (+CC
>> Michal, he might know more).
>>
>> And who would enable the sub-clock anyway, it should be the GPU driver, no?
>>
> Right, I understand it's not proper either by HW or SW point of view to just
> use root clk gate.
> 
>>> Can we set clock rate for "gpu" then "gpu_pp0" and "gpu_pp1" pass
>>> through the same
>>> rate? If so, "gpu" works just like "core".
>>
>> I don't think the zynqmp is capable of any DVFS on the GPU at all, it
>> just runs at fixed frequency.
> 
> I see the GPU_REF_CTRL register 13:8 is a divisor, is this for all
> "gpu"/"gpu_pp0"/"gpu_pp1" clk rating? If so, can we use it to dynamically
> change the GPU clk freq because other SoC also use system clock
> to do GPU DVFS, see sun8i-h3.dtsi. If we can't then zynqmp won't finish
> lima_devfreq_init() and get here at all because it does not have
> an OPP table.
> 

Jianqiang: Please take a look at this from zynqmp point of view.

Thanks,
Michal
Jianqiang Chen July 19, 2021, 3:38 p.m. UTC | #7
Add Mads.

Thanks,
Jason

> -----Original Message-----
> From: Michal Simek <michal.simek@xilinx.com>
> Sent: Monday, July 19, 2021 12:53 AM
> To: Qiang Yu <yuq825@gmail.com>; Marek Vasut <marex@denx.de>;
> Jianqiang Chen <jianqian@xilinx.com>
> Cc: dri-devel <dri-devel@lists.freedesktop.org>; lima@lists.freedesktop.org;
> Michal Simek <michals@xilinx.com>; Michal Simek <monstr@monstr.eu>
> Subject: Re: [PATCH] drm/lima: Convert to clk_bulk API
> 
> 
> 
> On 7/18/21 4:56 AM, Qiang Yu wrote:
> > On Sat, Jul 17, 2021 at 10:52 PM Marek Vasut <marex@denx.de> wrote:
> >>
> >> On 7/17/21 4:21 PM, Qiang Yu wrote:
> >>> On Sat, Jul 17, 2021 at 9:08 PM Marek Vasut <marex@denx.de> wrote:
> >>>>
> >>>> On 7/17/21 2:34 PM, Qiang Yu wrote:
> >>>>> On Sat, Jul 17, 2021 at 2:20 AM Marek Vasut <marex@denx.de> wrote:
> >>>>>>
> >>>>>> Instead of requesting two separate clock and then handling them
> >>>>>> separately in various places of the driver, use clk_bulk_*() API.
> >>>>>> This permits handling devices with more than "bus"/"core" clock,
> >>>>>> like ZynqMP, which has "gpu"/"gpu_pp0"/"gpu_pp1" all as separate
> >>>>>> clock.
> >>>>>
> >>>>> I can't find the ZynqMP DTS file under arch/arm64/boot/dts/xilinx
> >>>>> which has mali GPU node with an upstream kernel, where is it?
> >>>>
> >>>> Posted here:
> >>>> https://patchwork.kernel.org/project/linux-arm-kernel/patch/2021071
> >>>> 6182544.219490-1-marex@denx.de/
> >>>>
> >>>>> So what's the relationship between "gpu" clk and
> "gpu_pp0"/"gpu_pp1"
> >>>>> clk? Do they need to be controlled separately or we can just
> >>>>> control the "gpu" clk? Because the devfreq code just controls a single
> module clk.
> >>>>
> >>>> Per the docs, they are separate enable bits and the zynqmp clock
> >>>> controller exports them as separate clock, see bits 24..26 here:
> >>>>
> >>>>
> https://www.xilinx.com/html_docs/registers/ug1087/crf_apb___gpu_ref
> >>>> _ctrl.html
> >>>>
> >>>>>> Signed-off-by: Marek Vasut <marex@denx.de>
> >>>>>> Cc: Qiang Yu <yuq825@gmail.com>
> >>>>>> Cc: lima@lists.freedesktop.org
> >>>>>> ---
> >>>>>>    drivers/gpu/drm/lima/lima_devfreq.c | 17 +++++++++---
> >>>>>>    drivers/gpu/drm/lima/lima_devfreq.h |  1 +
> >>>>>>    drivers/gpu/drm/lima/lima_device.c  | 42 +++++++++++-------------
> -----
> >>>>>>    drivers/gpu/drm/lima/lima_device.h  |  4 +--
> >>>>>>    4 files changed, 32 insertions(+), 32 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/lima/lima_devfreq.c
> >>>>>> b/drivers/gpu/drm/lima/lima_devfreq.c
> >>>>>> index 8989e215dfc9..533b36932f79 100644
> >>>>>> --- a/drivers/gpu/drm/lima/lima_devfreq.c
> >>>>>> +++ b/drivers/gpu/drm/lima/lima_devfreq.c
> >>>>>> @@ -58,7 +58,7 @@ static int lima_devfreq_get_dev_status(struct
> device *dev,
> >>>>>>           struct lima_devfreq *devfreq = &ldev->devfreq;
> >>>>>>           unsigned long irqflags;
> >>>>>>
> >>>>>> -       status->current_frequency = clk_get_rate(ldev->clk_gpu);
> >>>>>> +       status->current_frequency =
> >>>>>> + clk_get_rate(devfreq->clk_gpu);
> >>>>>>
> >>>>>>           spin_lock_irqsave(&devfreq->lock, irqflags);
> >>>>>>
> >>>>>> @@ -110,12 +110,23 @@ int lima_devfreq_init(struct lima_device
> *ldev)
> >>>>>>           struct lima_devfreq *ldevfreq = &ldev->devfreq;
> >>>>>>           struct dev_pm_opp *opp;
> >>>>>>           unsigned long cur_freq;
> >>>>>> -       int ret;
> >>>>>> +       int i, ret;
> >>>>>>
> >>>>>>           if (!device_property_present(dev, "operating-points-v2"))
> >>>>>>                   /* Optional, continue without devfreq */
> >>>>>>                   return 0;
> >>>>>>
> >>>>>> +       /* Find first clock which are not "bus" clock */
> >>>>>> +       for (i = 0; i < ldev->nr_clks; i++) {
> >>>>>> +               if (!strcmp(ldev->clks[i].id, "bus"))
> >>>>>> +                       continue;
> >>>>>> +               ldevfreq->clk_gpu = ldev->clks[i].clk;
> >>>>>> +               break;
> >>>>>> +       }
> >>>>>
> >>>>> I'd prefer an explicit name for the required clk name. If some DTS
> >>>>> has different name other than "core" for the module clk (ie.
> >>>>> "gpu"), it should be changed to "core".
> >>>>
> >>>> The problem here is, the zynqmp has no core clock, it has "gpu and
> >>>> both pixel pipes" super-clock-gate which controls everything, and
> >>>> then per-pixel-pipe sub-clock-gates.
> >>>
> >>> So the "gpu" clk can gate both "gpu_pp0" and "gpu_pp1" clk, how about
> frequency?
> >>
> >> I don't think it is a good idea to just gate off the root clock while
> >> the sub-clock are still enabled. That might lead to latch ups (+CC
> >> Michal, he might know more).
> >>
> >> And who would enable the sub-clock anyway, it should be the GPU driver,
> no?
> >>
> > Right, I understand it's not proper either by HW or SW point of view
> > to just use root clk gate.
> >
> >>> Can we set clock rate for "gpu" then "gpu_pp0" and "gpu_pp1" pass
> >>> through the same rate? If so, "gpu" works just like "core".
> >>
> >> I don't think the zynqmp is capable of any DVFS on the GPU at all, it
> >> just runs at fixed frequency.
> >
> > I see the GPU_REF_CTRL register 13:8 is a divisor, is this for all
> > "gpu"/"gpu_pp0"/"gpu_pp1" clk rating? If so, can we use it to
> > dynamically change the GPU clk freq because other SoC also use system
> > clock to do GPU DVFS, see sun8i-h3.dtsi. If we can't then zynqmp won't
> > finish
> > lima_devfreq_init() and get here at all because it does not have an
> > OPP table.
> >
> 
> Jianqiang: Please take a look at this from zynqmp point of view.

Hi Mads, can you comment on this?

> 
> Thanks,
> Michal
Madhurkiran Harikrishnan July 19, 2021, 4:03 p.m. UTC | #8
Hi,

I had created a patch sometimes ago to test on our platform. That is correct, we have three clocks going to gp,pp0 and pp1 (Although all are at same rate). DVFS is not supported primarily because at zynqmp clocks are shared, for example it could come from VPLL or IOPLL. 

All zynqmp specific patches can be found here: https://github.com/Xilinx/meta-xilinx/tree/rel-v2021.1/meta-xilinx-bsp/recipes-graphics/mali/kernel-module-mali

-Mads

On 7/19/21, 10:38 AM, "Jianqiang Chen" <jianqian@xilinx.com> wrote:

    Add Mads.

    Thanks,
    Jason

    > -----Original Message-----
    > From: Michal Simek <michal.simek@xilinx.com>
    > Sent: Monday, July 19, 2021 12:53 AM
    > To: Qiang Yu <yuq825@gmail.com>; Marek Vasut <marex@denx.de>;
    > Jianqiang Chen <jianqian@xilinx.com>
    > Cc: dri-devel <dri-devel@lists.freedesktop.org>; lima@lists.freedesktop.org;
    > Michal Simek <michals@xilinx.com>; Michal Simek <monstr@monstr.eu>
    > Subject: Re: [PATCH] drm/lima: Convert to clk_bulk API
    > 
    > 
    > 
    > On 7/18/21 4:56 AM, Qiang Yu wrote:
    > > On Sat, Jul 17, 2021 at 10:52 PM Marek Vasut <marex@denx.de> wrote:
    > >>
    > >> On 7/17/21 4:21 PM, Qiang Yu wrote:
    > >>> On Sat, Jul 17, 2021 at 9:08 PM Marek Vasut <marex@denx.de> wrote:
    > >>>>
    > >>>> On 7/17/21 2:34 PM, Qiang Yu wrote:
    > >>>>> On Sat, Jul 17, 2021 at 2:20 AM Marek Vasut <marex@denx.de> wrote:
    > >>>>>>
    > >>>>>> Instead of requesting two separate clock and then handling them
    > >>>>>> separately in various places of the driver, use clk_bulk_*() API.
    > >>>>>> This permits handling devices with more than "bus"/"core" clock,
    > >>>>>> like ZynqMP, which has "gpu"/"gpu_pp0"/"gpu_pp1" all as separate
    > >>>>>> clock.
    > >>>>>
    > >>>>> I can't find the ZynqMP DTS file under arch/arm64/boot/dts/xilinx
    > >>>>> which has mali GPU node with an upstream kernel, where is it?
    > >>>>
    > >>>> Posted here:
    > >>>> https://patchwork.kernel.org/project/linux-arm-kernel/patch/2021071
    > >>>> 6182544.219490-1-marex@denx.de/
    > >>>>
    > >>>>> So what's the relationship between "gpu" clk and
    > "gpu_pp0"/"gpu_pp1"
    > >>>>> clk? Do they need to be controlled separately or we can just
    > >>>>> control the "gpu" clk? Because the devfreq code just controls a single
    > module clk.
    > >>>>
    > >>>> Per the docs, they are separate enable bits and the zynqmp clock
    > >>>> controller exports them as separate clock, see bits 24..26 here:
    > >>>>
    > >>>>
    > https://www.xilinx.com/html_docs/registers/ug1087/crf_apb___gpu_ref
    > >>>> _ctrl.html
    > >>>>
    > >>>>>> Signed-off-by: Marek Vasut <marex@denx.de>
    > >>>>>> Cc: Qiang Yu <yuq825@gmail.com>
    > >>>>>> Cc: lima@lists.freedesktop.org
    > >>>>>> ---
    > >>>>>>    drivers/gpu/drm/lima/lima_devfreq.c | 17 +++++++++---
    > >>>>>>    drivers/gpu/drm/lima/lima_devfreq.h |  1 +
    > >>>>>>    drivers/gpu/drm/lima/lima_device.c  | 42 +++++++++++-------------
    > -----
    > >>>>>>    drivers/gpu/drm/lima/lima_device.h  |  4 +--
    > >>>>>>    4 files changed, 32 insertions(+), 32 deletions(-)
    > >>>>>>
    > >>>>>> diff --git a/drivers/gpu/drm/lima/lima_devfreq.c
    > >>>>>> b/drivers/gpu/drm/lima/lima_devfreq.c
    > >>>>>> index 8989e215dfc9..533b36932f79 100644
    > >>>>>> --- a/drivers/gpu/drm/lima/lima_devfreq.c
    > >>>>>> +++ b/drivers/gpu/drm/lima/lima_devfreq.c
    > >>>>>> @@ -58,7 +58,7 @@ static int lima_devfreq_get_dev_status(struct
    > device *dev,
    > >>>>>>           struct lima_devfreq *devfreq = &ldev->devfreq;
    > >>>>>>           unsigned long irqflags;
    > >>>>>>
    > >>>>>> -       status->current_frequency = clk_get_rate(ldev->clk_gpu);
    > >>>>>> +       status->current_frequency =
    > >>>>>> + clk_get_rate(devfreq->clk_gpu);
    > >>>>>>
    > >>>>>>           spin_lock_irqsave(&devfreq->lock, irqflags);
    > >>>>>>
    > >>>>>> @@ -110,12 +110,23 @@ int lima_devfreq_init(struct lima_device
    > *ldev)
    > >>>>>>           struct lima_devfreq *ldevfreq = &ldev->devfreq;
    > >>>>>>           struct dev_pm_opp *opp;
    > >>>>>>           unsigned long cur_freq;
    > >>>>>> -       int ret;
    > >>>>>> +       int i, ret;
    > >>>>>>
    > >>>>>>           if (!device_property_present(dev, "operating-points-v2"))
    > >>>>>>                   /* Optional, continue without devfreq */
    > >>>>>>                   return 0;
    > >>>>>>
    > >>>>>> +       /* Find first clock which are not "bus" clock */
    > >>>>>> +       for (i = 0; i < ldev->nr_clks; i++) {
    > >>>>>> +               if (!strcmp(ldev->clks[i].id, "bus"))
    > >>>>>> +                       continue;
    > >>>>>> +               ldevfreq->clk_gpu = ldev->clks[i].clk;
    > >>>>>> +               break;
    > >>>>>> +       }
    > >>>>>
    > >>>>> I'd prefer an explicit name for the required clk name. If some DTS
    > >>>>> has different name other than "core" for the module clk (ie.
    > >>>>> "gpu"), it should be changed to "core".
    > >>>>
    > >>>> The problem here is, the zynqmp has no core clock, it has "gpu and
    > >>>> both pixel pipes" super-clock-gate which controls everything, and
    > >>>> then per-pixel-pipe sub-clock-gates.
    > >>>
    > >>> So the "gpu" clk can gate both "gpu_pp0" and "gpu_pp1" clk, how about
    > frequency?
    > >>
    > >> I don't think it is a good idea to just gate off the root clock while
    > >> the sub-clock are still enabled. That might lead to latch ups (+CC
    > >> Michal, he might know more).
    > >>
    > >> And who would enable the sub-clock anyway, it should be the GPU driver,
    > no?
    > >>
    > > Right, I understand it's not proper either by HW or SW point of view
    > > to just use root clk gate.
    > >
    > >>> Can we set clock rate for "gpu" then "gpu_pp0" and "gpu_pp1" pass
    > >>> through the same rate? If so, "gpu" works just like "core".
    > >>
    > >> I don't think the zynqmp is capable of any DVFS on the GPU at all, it
    > >> just runs at fixed frequency.
    > >
    > > I see the GPU_REF_CTRL register 13:8 is a divisor, is this for all
    > > "gpu"/"gpu_pp0"/"gpu_pp1" clk rating? If so, can we use it to
    > > dynamically change the GPU clk freq because other SoC also use system
    > > clock to do GPU DVFS, see sun8i-h3.dtsi. If we can't then zynqmp won't
    > > finish
    > > lima_devfreq_init() and get here at all because it does not have an
    > > OPP table.
    > >
    > 
    > Jianqiang: Please take a look at this from zynqmp point of view.

    Hi Mads, can you comment on this?

    > 
    > Thanks,
    > Michal
Qiang Yu July 20, 2021, 1:20 a.m. UTC | #9
On Tue, Jul 20, 2021 at 12:03 AM Madhurkiran Harikrishnan
<MADHURKI@xilinx.com> wrote:
>
> Hi,
>
> I had created a patch sometimes ago to test on our platform. That is correct, we have three clocks going to gp,pp0 and pp1 (Although all are at same rate). DVFS is not supported primarily because at zynqmp clocks are shared, for example it could come from VPLL or IOPLL.
>
It's not necessary to change the root PLL clock rate to support DVFS.
You can also change a divisor to a sub clock if the sub clock is not
shared by another module. For example if root PLL is 700MHz, you can
set the divisor to GPU clock to 2 to make GPU run at 350MHz. My
question is if the GPU_REF_CTRL[13:8] is just such a divisor and can
be used to support DVFS (an OPP table in DTS)?

> All zynqmp specific patches can be found here: https://github.com/Xilinx/meta-xilinx/tree/rel-v2021.1/meta-xilinx-bsp/recipes-graphics/mali/kernel-module-mali
>
> -Mads
>
> On 7/19/21, 10:38 AM, "Jianqiang Chen" <jianqian@xilinx.com> wrote:
>
>     Add Mads.
>
>     Thanks,
>     Jason
>
>     > -----Original Message-----
>     > From: Michal Simek <michal.simek@xilinx.com>
>     > Sent: Monday, July 19, 2021 12:53 AM
>     > To: Qiang Yu <yuq825@gmail.com>; Marek Vasut <marex@denx.de>;
>     > Jianqiang Chen <jianqian@xilinx.com>
>     > Cc: dri-devel <dri-devel@lists.freedesktop.org>; lima@lists.freedesktop.org;
>     > Michal Simek <michals@xilinx.com>; Michal Simek <monstr@monstr.eu>
>     > Subject: Re: [PATCH] drm/lima: Convert to clk_bulk API
>     >
>     >
>     >
>     > On 7/18/21 4:56 AM, Qiang Yu wrote:
>     > > On Sat, Jul 17, 2021 at 10:52 PM Marek Vasut <marex@denx.de> wrote:
>     > >>
>     > >> On 7/17/21 4:21 PM, Qiang Yu wrote:
>     > >>> On Sat, Jul 17, 2021 at 9:08 PM Marek Vasut <marex@denx.de> wrote:
>     > >>>>
>     > >>>> On 7/17/21 2:34 PM, Qiang Yu wrote:
>     > >>>>> On Sat, Jul 17, 2021 at 2:20 AM Marek Vasut <marex@denx.de> wrote:
>     > >>>>>>
>     > >>>>>> Instead of requesting two separate clock and then handling them
>     > >>>>>> separately in various places of the driver, use clk_bulk_*() API.
>     > >>>>>> This permits handling devices with more than "bus"/"core" clock,
>     > >>>>>> like ZynqMP, which has "gpu"/"gpu_pp0"/"gpu_pp1" all as separate
>     > >>>>>> clock.
>     > >>>>>
>     > >>>>> I can't find the ZynqMP DTS file under arch/arm64/boot/dts/xilinx
>     > >>>>> which has mali GPU node with an upstream kernel, where is it?
>     > >>>>
>     > >>>> Posted here:
>     > >>>> https://patchwork.kernel.org/project/linux-arm-kernel/patch/2021071
>     > >>>> 6182544.219490-1-marex@denx.de/
>     > >>>>
>     > >>>>> So what's the relationship between "gpu" clk and
>     > "gpu_pp0"/"gpu_pp1"
>     > >>>>> clk? Do they need to be controlled separately or we can just
>     > >>>>> control the "gpu" clk? Because the devfreq code just controls a single
>     > module clk.
>     > >>>>
>     > >>>> Per the docs, they are separate enable bits and the zynqmp clock
>     > >>>> controller exports them as separate clock, see bits 24..26 here:
>     > >>>>
>     > >>>>
>     > https://www.xilinx.com/html_docs/registers/ug1087/crf_apb___gpu_ref
>     > >>>> _ctrl.html
>     > >>>>
>     > >>>>>> Signed-off-by: Marek Vasut <marex@denx.de>
>     > >>>>>> Cc: Qiang Yu <yuq825@gmail.com>
>     > >>>>>> Cc: lima@lists.freedesktop.org
>     > >>>>>> ---
>     > >>>>>>    drivers/gpu/drm/lima/lima_devfreq.c | 17 +++++++++---
>     > >>>>>>    drivers/gpu/drm/lima/lima_devfreq.h |  1 +
>     > >>>>>>    drivers/gpu/drm/lima/lima_device.c  | 42 +++++++++++-------------
>     > -----
>     > >>>>>>    drivers/gpu/drm/lima/lima_device.h  |  4 +--
>     > >>>>>>    4 files changed, 32 insertions(+), 32 deletions(-)
>     > >>>>>>
>     > >>>>>> diff --git a/drivers/gpu/drm/lima/lima_devfreq.c
>     > >>>>>> b/drivers/gpu/drm/lima/lima_devfreq.c
>     > >>>>>> index 8989e215dfc9..533b36932f79 100644
>     > >>>>>> --- a/drivers/gpu/drm/lima/lima_devfreq.c
>     > >>>>>> +++ b/drivers/gpu/drm/lima/lima_devfreq.c
>     > >>>>>> @@ -58,7 +58,7 @@ static int lima_devfreq_get_dev_status(struct
>     > device *dev,
>     > >>>>>>           struct lima_devfreq *devfreq = &ldev->devfreq;
>     > >>>>>>           unsigned long irqflags;
>     > >>>>>>
>     > >>>>>> -       status->current_frequency = clk_get_rate(ldev->clk_gpu);
>     > >>>>>> +       status->current_frequency =
>     > >>>>>> + clk_get_rate(devfreq->clk_gpu);
>     > >>>>>>
>     > >>>>>>           spin_lock_irqsave(&devfreq->lock, irqflags);
>     > >>>>>>
>     > >>>>>> @@ -110,12 +110,23 @@ int lima_devfreq_init(struct lima_device
>     > *ldev)
>     > >>>>>>           struct lima_devfreq *ldevfreq = &ldev->devfreq;
>     > >>>>>>           struct dev_pm_opp *opp;
>     > >>>>>>           unsigned long cur_freq;
>     > >>>>>> -       int ret;
>     > >>>>>> +       int i, ret;
>     > >>>>>>
>     > >>>>>>           if (!device_property_present(dev, "operating-points-v2"))
>     > >>>>>>                   /* Optional, continue without devfreq */
>     > >>>>>>                   return 0;
>     > >>>>>>
>     > >>>>>> +       /* Find first clock which are not "bus" clock */
>     > >>>>>> +       for (i = 0; i < ldev->nr_clks; i++) {
>     > >>>>>> +               if (!strcmp(ldev->clks[i].id, "bus"))
>     > >>>>>> +                       continue;
>     > >>>>>> +               ldevfreq->clk_gpu = ldev->clks[i].clk;
>     > >>>>>> +               break;
>     > >>>>>> +       }
>     > >>>>>
>     > >>>>> I'd prefer an explicit name for the required clk name. If some DTS
>     > >>>>> has different name other than "core" for the module clk (ie.
>     > >>>>> "gpu"), it should be changed to "core".
>     > >>>>
>     > >>>> The problem here is, the zynqmp has no core clock, it has "gpu and
>     > >>>> both pixel pipes" super-clock-gate which controls everything, and
>     > >>>> then per-pixel-pipe sub-clock-gates.
>     > >>>
>     > >>> So the "gpu" clk can gate both "gpu_pp0" and "gpu_pp1" clk, how about
>     > frequency?
>     > >>
>     > >> I don't think it is a good idea to just gate off the root clock while
>     > >> the sub-clock are still enabled. That might lead to latch ups (+CC
>     > >> Michal, he might know more).
>     > >>
>     > >> And who would enable the sub-clock anyway, it should be the GPU driver,
>     > no?
>     > >>
>     > > Right, I understand it's not proper either by HW or SW point of view
>     > > to just use root clk gate.
>     > >
>     > >>> Can we set clock rate for "gpu" then "gpu_pp0" and "gpu_pp1" pass
>     > >>> through the same rate? If so, "gpu" works just like "core".
>     > >>
>     > >> I don't think the zynqmp is capable of any DVFS on the GPU at all, it
>     > >> just runs at fixed frequency.
>     > >
>     > > I see the GPU_REF_CTRL register 13:8 is a divisor, is this for all
>     > > "gpu"/"gpu_pp0"/"gpu_pp1" clk rating? If so, can we use it to
>     > > dynamically change the GPU clk freq because other SoC also use system
>     > > clock to do GPU DVFS, see sun8i-h3.dtsi. If we can't then zynqmp won't
>     > > finish
>     > > lima_devfreq_init() and get here at all because it does not have an
>     > > OPP table.
>     > >
>     >
>     > Jianqiang: Please take a look at this from zynqmp point of view.
>
>     Hi Mads, can you comment on this?
>
>     >
>     > Thanks,
>     > Michal
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c
index 8989e215dfc9..533b36932f79 100644
--- a/drivers/gpu/drm/lima/lima_devfreq.c
+++ b/drivers/gpu/drm/lima/lima_devfreq.c
@@ -58,7 +58,7 @@  static int lima_devfreq_get_dev_status(struct device *dev,
 	struct lima_devfreq *devfreq = &ldev->devfreq;
 	unsigned long irqflags;
 
-	status->current_frequency = clk_get_rate(ldev->clk_gpu);
+	status->current_frequency = clk_get_rate(devfreq->clk_gpu);
 
 	spin_lock_irqsave(&devfreq->lock, irqflags);
 
@@ -110,12 +110,23 @@  int lima_devfreq_init(struct lima_device *ldev)
 	struct lima_devfreq *ldevfreq = &ldev->devfreq;
 	struct dev_pm_opp *opp;
 	unsigned long cur_freq;
-	int ret;
+	int i, ret;
 
 	if (!device_property_present(dev, "operating-points-v2"))
 		/* Optional, continue without devfreq */
 		return 0;
 
+	/* Find first clock which are not "bus" clock */
+	for (i = 0; i < ldev->nr_clks; i++) {
+		if (!strcmp(ldev->clks[i].id, "bus"))
+			continue;
+		ldevfreq->clk_gpu = ldev->clks[i].clk;
+		break;
+	}
+
+	if (!ldevfreq->clk_gpu)
+		return -ENODEV;
+
 	spin_lock_init(&ldevfreq->lock);
 
 	ret = devm_pm_opp_set_clkname(dev, "core");
@@ -135,7 +146,7 @@  int lima_devfreq_init(struct lima_device *ldev)
 
 	lima_devfreq_reset(ldevfreq);
 
-	cur_freq = clk_get_rate(ldev->clk_gpu);
+	cur_freq = clk_get_rate(ldevfreq->clk_gpu);
 
 	opp = devfreq_recommended_opp(dev, &cur_freq, 0);
 	if (IS_ERR(opp))
diff --git a/drivers/gpu/drm/lima/lima_devfreq.h b/drivers/gpu/drm/lima/lima_devfreq.h
index b8e50feaeab6..ffef5c91795d 100644
--- a/drivers/gpu/drm/lima/lima_devfreq.h
+++ b/drivers/gpu/drm/lima/lima_devfreq.h
@@ -17,6 +17,7 @@  struct lima_devfreq {
 	struct devfreq *devfreq;
 	struct thermal_cooling_device *cooling;
 	struct devfreq_simple_ondemand_data gov_data;
+	struct clk *clk_gpu;
 
 	ktime_t busy_time;
 	ktime_t idle_time;
diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c
index 65fdca366e41..9f7bde7e9d22 100644
--- a/drivers/gpu/drm/lima/lima_device.c
+++ b/drivers/gpu/drm/lima/lima_device.c
@@ -85,29 +85,23 @@  static int lima_clk_enable(struct lima_device *dev)
 {
 	int err;
 
-	err = clk_prepare_enable(dev->clk_bus);
+	err = clk_bulk_prepare_enable(dev->nr_clks, dev->clks);
 	if (err)
 		return err;
 
-	err = clk_prepare_enable(dev->clk_gpu);
-	if (err)
-		goto error_out0;
-
 	if (dev->reset) {
 		err = reset_control_deassert(dev->reset);
 		if (err) {
 			dev_err(dev->dev,
 				"reset controller deassert failed %d\n", err);
-			goto error_out1;
+			goto error;
 		}
 	}
 
 	return 0;
 
-error_out1:
-	clk_disable_unprepare(dev->clk_gpu);
-error_out0:
-	clk_disable_unprepare(dev->clk_bus);
+error:
+	clk_bulk_disable_unprepare(dev->nr_clks, dev->clks);
 	return err;
 }
 
@@ -115,31 +109,23 @@  static void lima_clk_disable(struct lima_device *dev)
 {
 	if (dev->reset)
 		reset_control_assert(dev->reset);
-	clk_disable_unprepare(dev->clk_gpu);
-	clk_disable_unprepare(dev->clk_bus);
+	clk_bulk_disable_unprepare(dev->nr_clks, dev->clks);
 }
 
 static int lima_clk_init(struct lima_device *dev)
 {
 	int err;
 
-	dev->clk_bus = devm_clk_get(dev->dev, "bus");
-	if (IS_ERR(dev->clk_bus)) {
-		err = PTR_ERR(dev->clk_bus);
+	err = devm_clk_bulk_get_all(dev->dev, &dev->clks);
+	if (err < 1) {
+		if (err == 0)	/* No clock at all is an error too */
+			err = -ENODEV;
 		if (err != -EPROBE_DEFER)
-			dev_err(dev->dev, "get bus clk failed %d\n", err);
-		dev->clk_bus = NULL;
+			dev_err(dev->dev, "get clk failed %d\n", err);
 		return err;
 	}
 
-	dev->clk_gpu = devm_clk_get(dev->dev, "core");
-	if (IS_ERR(dev->clk_gpu)) {
-		err = PTR_ERR(dev->clk_gpu);
-		if (err != -EPROBE_DEFER)
-			dev_err(dev->dev, "get core clk failed %d\n", err);
-		dev->clk_gpu = NULL;
-		return err;
-	}
+	dev->nr_clks = err;
 
 	dev->reset = devm_reset_control_array_get_optional_shared(dev->dev);
 	if (IS_ERR(dev->reset)) {
@@ -412,8 +398,10 @@  int lima_device_init(struct lima_device *ldev)
 	INIT_LIST_HEAD(&ldev->error_task_list);
 	mutex_init(&ldev->error_task_list_lock);
 
-	dev_info(ldev->dev, "bus rate = %lu\n", clk_get_rate(ldev->clk_bus));
-	dev_info(ldev->dev, "mod rate = %lu", clk_get_rate(ldev->clk_gpu));
+	for (i = 0; i < ldev->nr_clks; i++) {
+		dev_info(ldev->dev, "clk %s = %lu Hz\n", ldev->clks[i].id,
+			 clk_get_rate(ldev->clks[i].clk));
+	}
 
 	return 0;
 
diff --git a/drivers/gpu/drm/lima/lima_device.h b/drivers/gpu/drm/lima/lima_device.h
index 41b9d7b4bcc7..de11570c9903 100644
--- a/drivers/gpu/drm/lima/lima_device.h
+++ b/drivers/gpu/drm/lima/lima_device.h
@@ -85,8 +85,8 @@  struct lima_device {
 	int num_pp;
 
 	void __iomem *iomem;
-	struct clk *clk_bus;
-	struct clk *clk_gpu;
+	struct clk_bulk_data *clks;
+	int nr_clks;
 	struct reset_control *reset;
 	struct regulator *regulator;