diff mbox series

[2/2] mailbox: mtk-cmdq: Move pm_runimte_get and put to mbox_chan_ops API

Message ID 20240614040133.24967-3-jason-jh.lin@mediatek.com (mailing list archive)
State New
Headers show
Series Fix sleeping function called from invalid context | expand

Commit Message

Jason-JH Lin (林睿祥) June 14, 2024, 4:01 a.m. UTC
When we run kernel with lockdebug option, we will get the BUG below:
  BUG: sleeping function called from invalid context at drivers/base/power/runtime.c:1164
  in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 3616, name: kworker/u17:3
    preempt_count: 1, expected: 0
    RCU nest depth: 0, expected: 0
    INFO: lockdep is turned off.
    irq event stamp: 0
    CPU: 1 PID: 3616 Comm: kworker/u17:3 Not tainted 6.1.87-lockdep-14133-g26e933aca785 #1
    Hardware name: Google Ciri sku0/unprovisioned board (DT)
    Workqueue: imgsys_runner imgsys_runner_func
    Call trace:
     dump_backtrace+0x100/0x120
     show_stack+0x20/0x2c
     dump_stack_lvl+0x84/0xb4
     dump_stack+0x18/0x48
     __might_resched+0x354/0x4c0
     __might_sleep+0x98/0xe4
     __pm_runtime_resume+0x70/0x124
     cmdq_mbox_send_data+0xe4/0xb1c
     msg_submit+0x194/0x2dc
     mbox_send_message+0x190/0x330
     imgsys_cmdq_sendtask+0x1618/0x2224
     imgsys_runner_func+0xac/0x11c
     process_one_work+0x638/0xf84
     worker_thread+0x808/0xcd0
     kthread+0x24c/0x324
     ret_from_fork+0x10/0x20

We found that there is a spin_lock_irqsave protection in msg_submit()
of mailbox.c and it is in the atomic context.
So when cmdq driver calls pm_runtime_get_sync() in cmdq_mbox_send_data(),
it will get this BUG report.

To avoid using sleep in atomic context, move pm_runtime_get_sync to
mbox_chan_ops->power_get and also move pm_runtime_put_autosuspend to
mbox_chan_ops->power_put.

Fixes: 8afe816b0c99 ("mailbox: mtk-cmdq-mailbox: Implement Runtime PM with autosuspend")
Signed-off-by: Jason-JH.Lin <jason-jh.lin@mediatek.com>
---
 drivers/mailbox/mtk-cmdq-mailbox.c | 37 +++++++++++++++---------------
 1 file changed, 18 insertions(+), 19 deletions(-)

Comments

Jassi Brar June 17, 2024, 6:18 p.m. UTC | #1
On Thu, Jun 13, 2024 at 11:01 PM Jason-JH.Lin <jason-jh.lin@mediatek.com> wrote:
>
> When we run kernel with lockdebug option, we will get the BUG below:
>   BUG: sleeping function called from invalid context at drivers/base/power/runtime.c:1164
>   in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 3616, name: kworker/u17:3
>     preempt_count: 1, expected: 0
>     RCU nest depth: 0, expected: 0
>     INFO: lockdep is turned off.
>     irq event stamp: 0
>     CPU: 1 PID: 3616 Comm: kworker/u17:3 Not tainted 6.1.87-lockdep-14133-g26e933aca785 #1
>     Hardware name: Google Ciri sku0/unprovisioned board (DT)
>     Workqueue: imgsys_runner imgsys_runner_func
>     Call trace:
>      dump_backtrace+0x100/0x120
>      show_stack+0x20/0x2c
>      dump_stack_lvl+0x84/0xb4
>      dump_stack+0x18/0x48
>      __might_resched+0x354/0x4c0
>      __might_sleep+0x98/0xe4
>      __pm_runtime_resume+0x70/0x124
>      cmdq_mbox_send_data+0xe4/0xb1c
>      msg_submit+0x194/0x2dc
>      mbox_send_message+0x190/0x330
>      imgsys_cmdq_sendtask+0x1618/0x2224
>      imgsys_runner_func+0xac/0x11c
>      process_one_work+0x638/0xf84
>      worker_thread+0x808/0xcd0
>      kthread+0x24c/0x324
>      ret_from_fork+0x10/0x20
>
> We found that there is a spin_lock_irqsave protection in msg_submit()
> of mailbox.c and it is in the atomic context.
> So when cmdq driver calls pm_runtime_get_sync() in cmdq_mbox_send_data(),
> it will get this BUG report.
>
> To avoid using sleep in atomic context, move pm_runtime_get_sync to
> mbox_chan_ops->power_get and also move pm_runtime_put_autosuspend to
> mbox_chan_ops->power_put.
>
> Fixes: 8afe816b0c99 ("mailbox: mtk-cmdq-mailbox: Implement Runtime PM with autosuspend")

Can you please share the pattern of mailbox transfers on your platform?
As in, how often and long are the "channel idle" periods? And when
active, how many transfers/sec ?
I see every TX is acked by one RX packet. How long is the typical gap
between a TX and its ack?

Thanks
Jason-JH Lin (林睿祥) June 18, 2024, 8:42 a.m. UTC | #2
Hi Jassi,

On Mon, 2024-06-17 at 13:18 -0500, Jassi Brar wrote:
>  	 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>  On Thu, Jun 13, 2024 at 11:01 PM Jason-JH.Lin <
> jason-jh.lin@mediatek.com> wrote:
> >
> > When we run kernel with lockdebug option, we will get the BUG
> below:
> >   BUG: sleeping function called from invalid context at
> drivers/base/power/runtime.c:1164
> >   in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 3616,
> name: kworker/u17:3
> >     preempt_count: 1, expected: 0
> >     RCU nest depth: 0, expected: 0
> >     INFO: lockdep is turned off.
> >     irq event stamp: 0
> >     CPU: 1 PID: 3616 Comm: kworker/u17:3 Not tainted 6.1.87-
> lockdep-14133-g26e933aca785 #1
> >     Hardware name: Google Ciri sku0/unprovisioned board (DT)
> >     Workqueue: imgsys_runner imgsys_runner_func
> >     Call trace:
> >      dump_backtrace+0x100/0x120
> >      show_stack+0x20/0x2c
> >      dump_stack_lvl+0x84/0xb4
> >      dump_stack+0x18/0x48
> >      __might_resched+0x354/0x4c0
> >      __might_sleep+0x98/0xe4
> >      __pm_runtime_resume+0x70/0x124
> >      cmdq_mbox_send_data+0xe4/0xb1c
> >      msg_submit+0x194/0x2dc
> >      mbox_send_message+0x190/0x330
> >      imgsys_cmdq_sendtask+0x1618/0x2224
> >      imgsys_runner_func+0xac/0x11c
> >      process_one_work+0x638/0xf84
> >      worker_thread+0x808/0xcd0
> >      kthread+0x24c/0x324
> >      ret_from_fork+0x10/0x20
> >
> > We found that there is a spin_lock_irqsave protection in
> msg_submit()
> > of mailbox.c and it is in the atomic context.
> > So when cmdq driver calls pm_runtime_get_sync() in
> cmdq_mbox_send_data(),
> > it will get this BUG report.
> >
> > To avoid using sleep in atomic context, move pm_runtime_get_sync to
> > mbox_chan_ops->power_get and also move pm_runtime_put_autosuspend
> to
> > mbox_chan_ops->power_put.
> >
> > Fixes: 8afe816b0c99 ("mailbox: mtk-cmdq-mailbox: Implement Runtime
> PM with autosuspend")
> 
> Can you please share the pattern of mailbox transfers on your
> platform?

I'm not familiar with imgsys driver, so I provide the mbox transfer
pattern of DRM driver in this scenario:

1. mbox_request_channel() in mtk_crtc.c: mtk_crtc_create()
    mtk_crtc->cmdq_client.client.dev = mtk_crtc->mmsys_dev;
    mtk_crtc->cmdq_client.client.tx_block = false;
    mtk_crtc->cmdq_client.client.knows_txdone = true;
    mtk_crtc->cmdq_client.client.rx_callback = ddp_cmdq_cb;
    mtk_crtc->cmdq_client.chan = mbox_request_channel(&client, i);

2. mbox_send_message() in mtk_crtc.c: mtk_crtc_update_config()
    mbox_flush(cmdq_client.chan, 2000);
    // prepare cmd to a buffer in cmdq_handle structure
    mbox_send_message(cmdq_client.chan, cmdq_handle);
    mbox_client_txdone(cmdq_client.chan, 0);

3. mbox_chan_received_data() in mtk-cmdq-mailbox.c: cmdq_task_done()
   // When CMDQ driver get an GCE IRQ from hardware means all cmd in
cmd buffer are executed

> As in, how often and long are the "channel idle" periods? And when
> active, how many transfers/sec ?
Is there any debug logs in mailbox can measure this?

This mailbox channel is use to configure display hardware in every
VSYNC, so the channel idle periods may be less than 16.66ms.

It should call rx_callback() before VACTIVE, but sometimes it will be
dropped by mbox_flush() if the new message is coming.

> I see every TX is acked by one RX packet. How long is the typical gap
> between a TX and its ack?

Typical gap between a TX and its ack is less than 16.66ms.


I'll address to imgsys part after confirming with imgsys owner.

Regards,
Jason-JH Lin
> 
> Thanks
Jassi Brar June 18, 2024, 3:59 p.m. UTC | #3
On Tue, Jun 18, 2024 at 3:42 AM Jason-JH Lin (林睿祥)
<Jason-JH.Lin@mediatek.com> wrote:
>
> On Mon, 2024-06-17 at 13:18 -0500, Jassi Brar wrote:
> >
......

> > As in, how often and long are the "channel idle" periods? And when
> > active, how many transfers/sec ?
> Is there any debug logs in mailbox can measure this?
>
> This mailbox channel is use to configure display hardware in every
> VSYNC, so the channel idle periods may be less than 16.66ms.
>
> It should call rx_callback() before VACTIVE, but sometimes it will be
> dropped by mbox_flush() if the new message is coming.
>
> > I see every TX is acked by one RX packet. How long is the typical gap
> > between a TX and its ack?
>
> Typical gap between a TX and its ack is less than 16.66ms.
>
So basically the usecase is driving some display at 60Hz. And I
believe the display is driven
continuously for at least some minutes ?
If so, RPM between xfers is not really in effect because the
autosuspend delay is 100ms while you
get() it every 16.66ms. So all that is happening is some variables
changed under a spinlock.
I think you should consider get/put RPM in cmdq_mbox_startup() and
cmdq_mbox_shutdown().

Thanks
AngeloGioacchino Del Regno June 19, 2024, 8:18 a.m. UTC | #4
Il 18/06/24 17:59, Jassi Brar ha scritto:
> On Tue, Jun 18, 2024 at 3:42 AM Jason-JH Lin (林睿祥)
> <Jason-JH.Lin@mediatek.com> wrote:
>>
>> On Mon, 2024-06-17 at 13:18 -0500, Jassi Brar wrote:
>>>
> ......
> 
>>> As in, how often and long are the "channel idle" periods? And when
>>> active, how many transfers/sec ?
>> Is there any debug logs in mailbox can measure this?
>>
>> This mailbox channel is use to configure display hardware in every
>> VSYNC, so the channel idle periods may be less than 16.66ms.
>>
>> It should call rx_callback() before VACTIVE, but sometimes it will be
>> dropped by mbox_flush() if the new message is coming.
>>
>>> I see every TX is acked by one RX packet. How long is the typical gap
>>> between a TX and its ack?
>>
>> Typical gap between a TX and its ack is less than 16.66ms.
>>
> So basically the usecase is driving some display at 60Hz. And I
> believe the display is driven
> continuously for at least some minutes ?
> If so, RPM between xfers is not really in effect because the
> autosuspend delay is 100ms while you
> get() it every 16.66ms. So all that is happening is some variables
> changed under a spinlock.
> I think you should consider get/put RPM in cmdq_mbox_startup() and
> cmdq_mbox_shutdown().
> 
> Thanks

With at least MediaTek (but surely more than just mtk), a system that is idling
while having display ON doesn't mean that the display is continuously refreshed
by the CPU and with CMDQ messaging.

For example, when static content is displayed on screen, the CMDQ mailbox never
gets shut down, but no communication happens for a relatively long time; the
overhead of actually shutting down the mailbox and setting it back up would be
increasing latency in an unacceptable manner.

This is why I opted for autosuspend - it's only bringing down certain clocks for
the CMDQ HW, adding up a bit of power saving to the mix which, for some use cases
such as mobile devices with relatively small batteries, is definitely important.

I'll also briefly (and only briefly) mention that 120Hz displays are already a
common thing and in this case the gap between TX and ACK is ~8.33ms instead, let
alone that displays with a framerate of more than 120Hz also do exist even though
they're less common.

All of the above describes a few of the reasons why autosuspend is a good choice
here, instead of a shutdown->startup flow.
And again - I can place some bets that PM would also be applicable to SoCs from
other vendors as well, with most probably different benefits (but still with some
power related benefits!) compared to MediaTek.

....And that's the reason why I think that implementing a way to cleanly perform
this kind-of-aggressive power management in mailboxes is something that needs to
be done.

Cheers,
Angelo
Jassi Brar June 19, 2024, 3:38 p.m. UTC | #5
On Wed, Jun 19, 2024 at 3:18 AM AngeloGioacchino Del Regno
<angelogioacchino.delregno@collabora.com> wrote:
> Il 18/06/24 17:59, Jassi Brar ha scritto:
.....

> For example, when static content is displayed on screen, the CMDQ mailbox never
> gets shut down, but no communication happens for a relatively long time; the
> overhead of actually shutting down the mailbox and setting it back up would be
> increasing latency in an unacceptable manner.
>
Hmm...  in your driver,  startup() is _empty_   and  shutdown() is all
under a spin-lock with irqs disabled, so that too shouldn't be
expensive. Right?
Then what causes unacceptable latencies?

> This is why I opted for autosuspend - it's only bringing down certain clocks for
> the CMDQ HW, adding up a bit of power saving to the mix which, for some use cases
> such as mobile devices with relatively small batteries, is definitely important.
>
> I'll also briefly (and only briefly) mention that 120Hz displays are already a
> common thing and in this case the gap between TX and ACK is ~8.33ms instead, let
> alone that displays with a framerate of more than 120Hz also do exist even though
> they're less common.
>
I don't know how even busier channels help your point.

> All of the above describes a few of the reasons why autosuspend is a good choice
> here, instead of a shutdown->startup flow.
> And again - I can place some bets that PM would also be applicable to SoCs from
> other vendors as well, with most probably different benefits (but still with some
> power related benefits!) compared to MediaTek.
>
Sure, if some platform _actually_ has a high channel
startup()/shutdown() cost, it may need finer PM control. And I have a
way for that, but even with that I am sure someone from MTK will
realize they didn't need that.

Thanks.
Jason-JH Lin (林睿祥) June 20, 2024, 6:32 a.m. UTC | #6
On Wed, 2024-06-19 at 10:38 -0500, Jassi Brar wrote:
>  	 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>  On Wed, Jun 19, 2024 at 3:18 AM AngeloGioacchino Del Regno
> <angelogioacchino.delregno@collabora.com> wrote:
> > Il 18/06/24 17:59, Jassi Brar ha scritto:
> .....
> 
> > For example, when static content is displayed on screen, the CMDQ
> mailbox never
> > gets shut down, but no communication happens for a relatively long
> time; the
> > overhead of actually shutting down the mailbox and setting it back
> up would be
> > increasing latency in an unacceptable manner.
> >
> Hmm...  in your driver,  startup() is _empty_   and  shutdown() is
> all
> under a spin-lock with irqs disabled, so that too shouldn't be
> expensive. Right?
> Then what causes unacceptable latencies?
> 

I found that the BUG report only occurred when opening the camera APP.
Maybe something in imgsys_cmdq_sendtask() is too expensive or somewhere
else in imgsys driver.

I'm wondering why this BUG report is not occurred in display use case
which is more frequent than imgsys use case.
Does this mean sleep() is not always called in pm_runtime_get_sync()
and if we can guarantee this sleep() won't be called, then using
pm_runtime_get_sync() in atomic context is OK?

> > This is why I opted for autosuspend - it's only bringing down
> certain clocks for
> > the CMDQ HW, adding up a bit of power saving to the mix which, for
> some use cases
> > such as mobile devices with relatively small batteries, is
> definitely important.
> >
> > I'll also briefly (and only briefly) mention that 120Hz displays
> are already a
> > common thing and in this case the gap between TX and ACK is ~8.33ms
> instead, let
> > alone that displays with a framerate of more than 120Hz also do
> exist even though
> > they're less common.
> >
> I don't know how even busier channels help your point.
> 
> > All of the above describes a few of the reasons why autosuspend is
> a good choice
> > here, instead of a shutdown->startup flow.
> > And again - I can place some bets that PM would also be applicable
> to SoCs from
> > other vendors as well, with most probably different benefits (but
> still with some
> > power related benefits!) compared to MediaTek.

I agree with Angelo's point!


Regard,
Jason-JH Lin

> >
> Sure, if some platform _actually_ has a high channel
> startup()/shutdown() cost, it may need finer PM control. And I have a
> way for that, but even with that I am sure someone from MTK will
> realize they didn't need that.
> 
> Thanks.
>
Jassi Brar June 20, 2024, 2:39 p.m. UTC | #7
On Thu, Jun 20, 2024 at 1:33 AM Jason-JH Lin (林睿祥)
<Jason-JH.Lin@mediatek.com> wrote:
>
> On Wed, 2024-06-19 at 10:38 -0500, Jassi Brar wrote:
> >
> > External email : Please do not click links or open attachments until
> > you have verified the sender or the content.
> >  On Wed, Jun 19, 2024 at 3:18 AM AngeloGioacchino Del Regno
> > <angelogioacchino.delregno@collabora.com> wrote:
> > > Il 18/06/24 17:59, Jassi Brar ha scritto:
> > .....
> >
> > > For example, when static content is displayed on screen, the CMDQ
> > mailbox never
> > > gets shut down, but no communication happens for a relatively long
> > time; the
> > > overhead of actually shutting down the mailbox and setting it back
> > up would be
> > > increasing latency in an unacceptable manner.
> > >
> > Hmm...  in your driver,  startup() is _empty_   and  shutdown() is
> > all
> > under a spin-lock with irqs disabled, so that too shouldn't be
> > expensive. Right?
> > Then what causes unacceptable latencies?
> >
>
> I found that the BUG report only occurred when opening the camera APP.
> Maybe something in imgsys_cmdq_sendtask() is too expensive or somewhere
> else in imgsys driver.
>
If you move anything from submit() into startup(), which is once per
lifetime of a channel, it will only make imgsys_cmdq_sendtask()
cheaper.
Btw, the imgsys code is not public, I don't know how it looks.


> I'm wondering why this BUG report is not occurred in display use case
> which is more frequent than imgsys use case.
> Does this mean sleep() is not always called in pm_runtime_get_sync()
> and if we can guarantee this sleep() won't be called, then using
> pm_runtime_get_sync() in atomic context is OK?
>
Instead of hacking around, maybe try using startup() and shutdown()
which is for such uses? Maybe request/release channel as part of RPM
in your client driver if you are worried about the noise?


> > > This is why I opted for autosuspend - it's only bringing down
> > certain clocks for
> > > the CMDQ HW, adding up a bit of power saving to the mix which, for
> > some use cases
> > > such as mobile devices with relatively small batteries, is
> > definitely important.
> > >
> > > I'll also briefly (and only briefly) mention that 120Hz displays
> > are already a
> > > common thing and in this case the gap between TX and ACK is ~8.33ms
> > instead, let
> > > alone that displays with a framerate of more than 120Hz also do
> > exist even though
> > > they're less common.
> > >
> > I don't know how even busier channels help your point.
> >
> > > All of the above describes a few of the reasons why autosuspend is
> > a good choice
> > > here, instead of a shutdown->startup flow.
> > > And again - I can place some bets that PM would also be applicable
> > to SoCs from
> > > other vendors as well, with most probably different benefits (but
> > still with some
> > > power related benefits!) compared to MediaTek.
>
> I agree with Angelo's point!
>
Ok, but you or Angelo still don't explain "unacceptable latencies"
when your startup() and shutdown() are empty. You just want api
changed, which we can but at least do you part and tell me where the
bottleneck (unexpected latency) comes from.

Thanks
AngeloGioacchino Del Regno June 24, 2024, 11:29 a.m. UTC | #8
Il 20/06/24 16:39, Jassi Brar ha scritto:
> On Thu, Jun 20, 2024 at 1:33 AM Jason-JH Lin (林睿祥)
> <Jason-JH.Lin@mediatek.com> wrote:
>>
>> On Wed, 2024-06-19 at 10:38 -0500, Jassi Brar wrote:
>>>
>>> External email : Please do not click links or open attachments until
>>> you have verified the sender or the content.
>>>   On Wed, Jun 19, 2024 at 3:18 AM AngeloGioacchino Del Regno
>>> <angelogioacchino.delregno@collabora.com> wrote:
>>>> Il 18/06/24 17:59, Jassi Brar ha scritto:
>>> .....
>>>
>>>> For example, when static content is displayed on screen, the CMDQ
>>> mailbox never
>>>> gets shut down, but no communication happens for a relatively long
>>> time; the
>>>> overhead of actually shutting down the mailbox and setting it back
>>> up would be
>>>> increasing latency in an unacceptable manner.
>>>>
>>> Hmm...  in your driver,  startup() is _empty_   and  shutdown() is
>>> all
>>> under a spin-lock with irqs disabled, so that too shouldn't be
>>> expensive. Right?
>>> Then what causes unacceptable latencies?
>>>
>>
>> I found that the BUG report only occurred when opening the camera APP.
>> Maybe something in imgsys_cmdq_sendtask() is too expensive or somewhere
>> else in imgsys driver.
>>
> If you move anything from submit() into startup(), which is once per
> lifetime of a channel, it will only make imgsys_cmdq_sendtask()
> cheaper.
> Btw, the imgsys code is not public, I don't know how it looks.
> 
> 
>> I'm wondering why this BUG report is not occurred in display use case
>> which is more frequent than imgsys use case.
>> Does this mean sleep() is not always called in pm_runtime_get_sync()
>> and if we can guarantee this sleep() won't be called, then using
>> pm_runtime_get_sync() in atomic context is OK?
>>
> Instead of hacking around, maybe try using startup() and shutdown()
> which is for such uses? Maybe request/release channel as part of RPM
> in your client driver if you are worried about the noise?
> 
> 
>>>> This is why I opted for autosuspend - it's only bringing down
>>> certain clocks for
>>>> the CMDQ HW, adding up a bit of power saving to the mix which, for
>>> some use cases
>>>> such as mobile devices with relatively small batteries, is
>>> definitely important.
>>>>
>>>> I'll also briefly (and only briefly) mention that 120Hz displays
>>> are already a
>>>> common thing and in this case the gap between TX and ACK is ~8.33ms
>>> instead, let
>>>> alone that displays with a framerate of more than 120Hz also do
>>> exist even though
>>>> they're less common.
>>>>
>>> I don't know how even busier channels help your point.
>>>
>>>> All of the above describes a few of the reasons why autosuspend is
>>> a good choice
>>>> here, instead of a shutdown->startup flow.
>>>> And again - I can place some bets that PM would also be applicable
>>> to SoCs from
>>>> other vendors as well, with most probably different benefits (but
>>> still with some
>>>> power related benefits!) compared to MediaTek.
>>
>> I agree with Angelo's point!
>>
> Ok, but you or Angelo still don't explain "unacceptable latencies"
> when your startup() and shutdown() are empty. You just want api
> changed, which we can but at least do you part and tell me where the
> bottleneck (unexpected latency) comes from.
> 

"I want numbers" is a sensible request, honestly I would do the same so I totally
understand that.

Jason, can you please perform latency measurements on 60Hz and *especially* ISP/cam
cases while "continuously" calling startup() and shutdown() for every power saving
operation?


Thanks,
Angelo
Jassi Brar June 24, 2024, 5:45 p.m. UTC | #9
On Mon, Jun 24, 2024 at 6:29 AM AngeloGioacchino Del Regno
<angelogioacchino.delregno@collabora.com> wrote:
>
> Il 20/06/24 16:39, Jassi Brar ha scritto:
> > On Thu, Jun 20, 2024 at 1:33 AM Jason-JH Lin (林睿祥)
> > <Jason-JH.Lin@mediatek.com> wrote:
> >>
> >> On Wed, 2024-06-19 at 10:38 -0500, Jassi Brar wrote:
> >>>
> >>> External email : Please do not click links or open attachments until
> >>> you have verified the sender or the content.
> >>>   On Wed, Jun 19, 2024 at 3:18 AM AngeloGioacchino Del Regno
> >>> <angelogioacchino.delregno@collabora.com> wrote:
> >>>> Il 18/06/24 17:59, Jassi Brar ha scritto:
> >>> .....
> >>>
> >>>> For example, when static content is displayed on screen, the CMDQ
> >>> mailbox never
> >>>> gets shut down, but no communication happens for a relatively long
> >>> time; the
> >>>> overhead of actually shutting down the mailbox and setting it back
> >>> up would be
> >>>> increasing latency in an unacceptable manner.
> >>>>
> >>> Hmm...  in your driver,  startup() is _empty_   and  shutdown() is
> >>> all
> >>> under a spin-lock with irqs disabled, so that too shouldn't be
> >>> expensive. Right?
> >>> Then what causes unacceptable latencies?
> >>>
> >>
> >> I found that the BUG report only occurred when opening the camera APP.
> >> Maybe something in imgsys_cmdq_sendtask() is too expensive or somewhere
> >> else in imgsys driver.
> >>
> > If you move anything from submit() into startup(), which is once per
> > lifetime of a channel, it will only make imgsys_cmdq_sendtask()
> > cheaper.
> > Btw, the imgsys code is not public, I don't know how it looks.
> >
> >
> >> I'm wondering why this BUG report is not occurred in display use case
> >> which is more frequent than imgsys use case.
> >> Does this mean sleep() is not always called in pm_runtime_get_sync()
> >> and if we can guarantee this sleep() won't be called, then using
> >> pm_runtime_get_sync() in atomic context is OK?
> >>
> > Instead of hacking around, maybe try using startup() and shutdown()
> > which is for such uses? Maybe request/release channel as part of RPM
> > in your client driver if you are worried about the noise?
> >
> >
> >>>> This is why I opted for autosuspend - it's only bringing down
> >>> certain clocks for
> >>>> the CMDQ HW, adding up a bit of power saving to the mix which, for
> >>> some use cases
> >>>> such as mobile devices with relatively small batteries, is
> >>> definitely important.
> >>>>
> >>>> I'll also briefly (and only briefly) mention that 120Hz displays
> >>> are already a
> >>>> common thing and in this case the gap between TX and ACK is ~8.33ms
> >>> instead, let
> >>>> alone that displays with a framerate of more than 120Hz also do
> >>> exist even though
> >>>> they're less common.
> >>>>
> >>> I don't know how even busier channels help your point.
> >>>
> >>>> All of the above describes a few of the reasons why autosuspend is
> >>> a good choice
> >>>> here, instead of a shutdown->startup flow.
> >>>> And again - I can place some bets that PM would also be applicable
> >>> to SoCs from
> >>>> other vendors as well, with most probably different benefits (but
> >>> still with some
> >>>> power related benefits!) compared to MediaTek.
> >>
> >> I agree with Angelo's point!
> >>
> > Ok, but you or Angelo still don't explain "unacceptable latencies"
> > when your startup() and shutdown() are empty. You just want api
> > changed, which we can but at least do you part and tell me where the
> > bottleneck (unexpected latency) comes from.
> >
>
> "I want numbers" is a sensible request, honestly I would do the same so I totally
> understand that.
>
> Jason, can you please perform latency measurements on 60Hz and *especially* ISP/cam
> cases while "continuously" calling startup() and shutdown() for every power saving
> operation?
>
To be clear, do with the mailbox channel that you do with the clocks
now, because your startup() is literally and shutdown() is practically
empty.
Call shutdown() when no request has come in for a while, so you know
the client has quiesced likely. So try putting request/release in
higher level RPM with autosuspend.

thanks
Jason-JH Lin (林睿祥) June 25, 2024, 3:40 a.m. UTC | #10
On Mon, 2024-06-24 at 12:45 -0500, Jassi Brar wrote:
>  	 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>  On Mon, Jun 24, 2024 at 6:29 AM AngeloGioacchino Del Regno
> <angelogioacchino.delregno@collabora.com> wrote:
> >
> > Il 20/06/24 16:39, Jassi Brar ha scritto:
> > > On Thu, Jun 20, 2024 at 1:33 AM Jason-JH Lin (林睿祥)
> > > <Jason-JH.Lin@mediatek.com> wrote:
> > >>
> > >> On Wed, 2024-06-19 at 10:38 -0500, Jassi Brar wrote:
> > >>>
> > >>> External email : Please do not click links or open attachments
> until
> > >>> you have verified the sender or the content.
> > >>>   On Wed, Jun 19, 2024 at 3:18 AM AngeloGioacchino Del Regno
> > >>> <angelogioacchino.delregno@collabora.com> wrote:
> > >>>> Il 18/06/24 17:59, Jassi Brar ha scritto:
> > >>> .....
> > >>>
> > >>>> For example, when static content is displayed on screen, the
> CMDQ
> > >>> mailbox never
> > >>>> gets shut down, but no communication happens for a relatively
> long
> > >>> time; the
> > >>>> overhead of actually shutting down the mailbox and setting it
> back
> > >>> up would be
> > >>>> increasing latency in an unacceptable manner.
> > >>>>
> > >>> Hmm...  in your driver,  startup() is _empty_   and  shutdown()
> is
> > >>> all
> > >>> under a spin-lock with irqs disabled, so that too shouldn't be
> > >>> expensive. Right?
> > >>> Then what causes unacceptable latencies?
> > >>>
> > >>
> > >> I found that the BUG report only occurred when opening the
> camera APP.
> > >> Maybe something in imgsys_cmdq_sendtask() is too expensive or
> somewhere
> > >> else in imgsys driver.
> > >>
> > > If you move anything from submit() into startup(), which is once
> per
> > > lifetime of a channel, it will only make imgsys_cmdq_sendtask()
> > > cheaper.
> > > Btw, the imgsys code is not public, I don't know how it looks.
> > >

It'll be upstreamed in the future, so I can only test it locally to
find the information we want.

> > >
> > >> I'm wondering why this BUG report is not occurred in display use
> case
> > >> which is more frequent than imgsys use case.
> > >> Does this mean sleep() is not always called in
> pm_runtime_get_sync()
> > >> and if we can guarantee this sleep() won't be called, then using
> > >> pm_runtime_get_sync() in atomic context is OK?
> > >>
> > > Instead of hacking around, maybe try using startup() and
> shutdown()
> > > which is for such uses? Maybe request/release channel as part of
> RPM
> > > in your client driver if you are worried about the noise?
> > >
> > >
> > >>>> This is why I opted for autosuspend - it's only bringing down
> > >>> certain clocks for
> > >>>> the CMDQ HW, adding up a bit of power saving to the mix which,
> for
> > >>> some use cases
> > >>>> such as mobile devices with relatively small batteries, is
> > >>> definitely important.
> > >>>>
> > >>>> I'll also briefly (and only briefly) mention that 120Hz
> displays
> > >>> are already a
> > >>>> common thing and in this case the gap between TX and ACK is
> ~8.33ms
> > >>> instead, let
> > >>>> alone that displays with a framerate of more than 120Hz also
> do
> > >>> exist even though
> > >>>> they're less common.
> > >>>>
> > >>> I don't know how even busier channels help your point.
> > >>>
> > >>>> All of the above describes a few of the reasons why
> autosuspend is
> > >>> a good choice
> > >>>> here, instead of a shutdown->startup flow.
> > >>>> And again - I can place some bets that PM would also be
> applicable
> > >>> to SoCs from
> > >>>> other vendors as well, with most probably different benefits
> (but
> > >>> still with some
> > >>>> power related benefits!) compared to MediaTek.
> > >>
> > >> I agree with Angelo's point!
> > >>
> > > Ok, but you or Angelo still don't explain "unacceptable
> latencies"
> > > when your startup() and shutdown() are empty. You just want api
> > > changed, which we can but at least do you part and tell me where
> the
> > > bottleneck (unexpected latency) comes from.
> > >
> >
> > "I want numbers" is a sensible request, honestly I would do the
> same so I totally
> > understand that.
> >
> > Jason, can you please perform latency measurements on 60Hz and
> *especially* ISP/cam
> > cases while "continuously" calling startup() and shutdown() for
> every power saving
> > operation?

You mean moving clk_builk_enable() and clk_bulk_disable() to startup()
and shutdown(), and then calling startup() in cmdq_runtime_resume() and
calling shutdown() in cmdq_runtime_suspend()?
If we do that, I think GCE clocks will never be closed because 
mbox_request_channel() and mbox_free_channel() will call startup() and
shutdown() already.

I found that display thread:0 will call mbox_send_message() every 16ms
when I moving UI continuously:
[   61.503604] thread_id:0, send_diff:16 ms
[   61.520266] thread_id:0, send_diff:16 ms
[   61.537143] thread_id:0, send_diff:16 ms
[   61.553609] thread_id:0, send_diff:16 ms
[   61.570811] thread_id:0, send_diff:17 ms
[   61.587075] thread_id:0, send_diff:16 ms
[   61.603527] thread_id:0, send_diff:16 ms
[   61.620082] thread_id:0, send_diff:16 ms
[   61.637496] thread_id:0, send_diff:17 ms
[   61.653876] thread_id:0, send_diff:16 ms
[   61.670162] thread_id:0, send_diff:16 ms
[   61.687318] thread_id:0, send_diff:17 ms
[   61.703747] thread_id:0, send_diff:16 ms
[   61.720656] thread_id:0, send_diff:16 ms
If I didn't do anything to UI, I won't see any mbox_send_message()
logs.

It means mbox_send_message() will only be called when the display is
updated:
[  782.777205] thread_id:0, send_diff:20072 ms
[  820.296738] thread_id:0, send_diff:37519 ms
[  863.034270] thread_id:0, send_diff:42737 ms
[  880.297759] thread_id:0, send_diff:17263 ms
[  917.795432] thread_id:0, send_diff:37497 ms


When opening camera APP, it seems to become 30fps:
[  120.551778] BUG: sleeping function called from invalid context at
drivers/base/power/runtime.c:1164
[  120.560844] in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid:
201, name: kworker/u17:1
[  120.569363] preempt_count: 1, expected: 0
[  120.573367] RCU nest depth: 0, expected: 0
[  120.577457] INFO: lockdep is turned off.
[  120.581372] irq event stamp: 0
[  120.584420] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
[  120.590687] hardirqs last disabled at (0): [<ffffffd3624a86e0>]
copy_process+0x578/0xf10
[  120.598780] softirqs last  enabled at (0): [<ffffffd3624a86ec>]
copy_process+0x584/0xf10
[  120.606865] softirqs last disabled at (0): [<0000000000000000>] 0x0
[  120.613128] CPU: 0 PID: 201 Comm: kworker/u17:1 Not tainted 6.1.92-
lockdep-14419-g8c5f20e56598-dirty #1
e971112e83e796239a41e46e8747794c737fb18c
[  120.626073] Hardware name: Google Ciri sku0/unprovisioned board (DT)
[  120.632420] Workqueue: imgsys_runner imgsys_runner_func
[  120.637647] Call trace:
[  120.640086]  dump_backtrace+0x100/0x120
[  120.643919]  show_stack+0x20/0x2c
[  120.647230]  dump_stack_lvl+0x84/0xb4
[  120.650891]  dump_stack+0x18/0x44
[  120.654203]  __might_resched+0x204/0x214
[  120.658123]  __might_sleep+0x50/0x80
[  120.661692]  __pm_runtime_resume+0x48/0xbc
[  120.665786]  cmdq_mbox_send_data+0x8c/0x434
[  120.669966]  msg_submit+0x84/0x104
[  120.673362]  mbox_send_message+0xb8/0x124
[  120.677365]  imgsys_cmdq_sendtask+0x688/0x928
[  120.681717]  imgsys_runner_func+0x44/0x78
[  120.685722]  process_one_work+0x274/0x574
[  120.689731]  worker_thread+0x240/0x3f0
[  120.693475]  kthread+0xfc/0x11c
[  120.696612]  ret_from_fork+0x10/0x20
[  120.710570] mtk-cam raw 16030000.cam-raw-a: [SOF] frame done reading
lost frames. req(7),ts(120697780083)
[  120.720352] mtk-cam raw 16030000.cam-raw-a: [SOF] SCQ_DELAY state:4
ts:120697780083
[  120.990091] thread_id:0, send_diff:1470 ms
[  121.007210] thread_id:0, send_diff:17 ms
[  121.016484] thread_id:0, send_diff:9 ms
[  121.032652] thread_id:0, send_diff:16 ms
[  121.049366] thread_id:0, send_diff:16 ms
[  121.066880] thread_id:0, send_diff:17 ms
[  121.087023] thread_id:0, send_diff:20 ms
[  121.099325] thread_id:0, send_diff:12 ms
[  121.135286] thread_id:0, send_diff:35 ms
[  121.149349] thread_id:0, send_diff:14 ms
[  121.179259] thread_id:0, send_diff:29 ms
[  121.203183] thread_id:0, send_diff:23 ms
[  121.242234] thread_id:0, send_diff:39 ms
[  121.280645] thread_id:0, send_diff:38 ms
[  121.318651] thread_id:0, send_diff:38 ms
[  121.356366] thread_id:0, send_diff:37 ms
[  121.394484] thread_id:0, send_diff:38 ms

The cmdq_runtime_suspend() will only be called when I disabled camera
APP:
[  468.137530] pm_on ts:465259, pm_off ts:4681375, diff:2877 ms
and the diff is the duration from opening the camera APP to closing the
camera APP.

From booting to seeing the UI and any operation of UI, I didn't see
any cmdq_runtime_suspend() logs from display mbox channel.
So cmdq_runtime_resume() and cmdq_runtime_suspend() are never called by
display mbox channel.
This can tell why only opening the camera APP can see this BUG report.

> >
> To be clear, do with the mailbox channel that you do with the clocks
> now, because your startup() is literally and shutdown() is
> practically
> empty.
> Call shutdown() when no request has come in for a while, so you know
> the client has quiesced likely. So try putting request/release in
> higher level RPM with autosuspend.

I'm not quit sure how to modify this from current usage.
Do you mean:
1. Move pm_runtime_get_sync() from cmdq_mbox_semd_data() to startup()
and keep pm_runtime_put_autosuspend() in cmdq_mbox_semd_data()?

2. Call shutdown() in cmdq_runtime_suspend()?

Regards,
Jason-JH.Lin

> 
> thanks
Jassi Brar June 25, 2024, 4:21 a.m. UTC | #11
On Mon, Jun 24, 2024 at 10:41 PM Jason-JH Lin (林睿祥)
<Jason-JH.Lin@mediatek.com> wrote:
>
> On Mon, 2024-06-24 at 12:45 -0500, Jassi Brar wrote:
> >
> > External email : Please do not click links or open attachments until
> > you have verified the sender or the content.
> >  On Mon, Jun 24, 2024 at 6:29 AM AngeloGioacchino Del Regno
> > <angelogioacchino.delregno@collabora.com> wrote:
> > >
> > > Il 20/06/24 16:39, Jassi Brar ha scritto:
> > > > On Thu, Jun 20, 2024 at 1:33 AM Jason-JH Lin (林睿祥)
> > > > <Jason-JH.Lin@mediatek.com> wrote:
> > > >>
> > > >> On Wed, 2024-06-19 at 10:38 -0500, Jassi Brar wrote:
> > > >>>
> > > >>> External email : Please do not click links or open attachments
> > until
> > > >>> you have verified the sender or the content.
> > > >>>   On Wed, Jun 19, 2024 at 3:18 AM AngeloGioacchino Del Regno
> > > >>> <angelogioacchino.delregno@collabora.com> wrote:
> > > >>>> Il 18/06/24 17:59, Jassi Brar ha scritto:
> > > >>> .....
> > > >>>
> > > >>>> For example, when static content is displayed on screen, the
> > CMDQ
> > > >>> mailbox never
> > > >>>> gets shut down, but no communication happens for a relatively
> > long
> > > >>> time; the
> > > >>>> overhead of actually shutting down the mailbox and setting it
> > back
> > > >>> up would be
> > > >>>> increasing latency in an unacceptable manner.
> > > >>>>
> > > >>> Hmm...  in your driver,  startup() is _empty_   and  shutdown()
> > is
> > > >>> all
> > > >>> under a spin-lock with irqs disabled, so that too shouldn't be
> > > >>> expensive. Right?
> > > >>> Then what causes unacceptable latencies?
> > > >>>
> > > >>
> > > >> I found that the BUG report only occurred when opening the
> > camera APP.
> > > >> Maybe something in imgsys_cmdq_sendtask() is too expensive or
> > somewhere
> > > >> else in imgsys driver.
> > > >>
> > > > If you move anything from submit() into startup(), which is once
> > per
> > > > lifetime of a channel, it will only make imgsys_cmdq_sendtask()
> > > > cheaper.
> > > > Btw, the imgsys code is not public, I don't know how it looks.
> > > >
>
> It'll be upstreamed in the future, so I can only test it locally to
> find the information we want.
>
> > > >
> > > >> I'm wondering why this BUG report is not occurred in display use
> > case
> > > >> which is more frequent than imgsys use case.
> > > >> Does this mean sleep() is not always called in
> > pm_runtime_get_sync()
> > > >> and if we can guarantee this sleep() won't be called, then using
> > > >> pm_runtime_get_sync() in atomic context is OK?
> > > >>
> > > > Instead of hacking around, maybe try using startup() and
> > shutdown()
> > > > which is for such uses? Maybe request/release channel as part of
> > RPM
> > > > in your client driver if you are worried about the noise?
> > > >
> > > >
> > > >>>> This is why I opted for autosuspend - it's only bringing down
> > > >>> certain clocks for
> > > >>>> the CMDQ HW, adding up a bit of power saving to the mix which,
> > for
> > > >>> some use cases
> > > >>>> such as mobile devices with relatively small batteries, is
> > > >>> definitely important.
> > > >>>>
> > > >>>> I'll also briefly (and only briefly) mention that 120Hz
> > displays
> > > >>> are already a
> > > >>>> common thing and in this case the gap between TX and ACK is
> > ~8.33ms
> > > >>> instead, let
> > > >>>> alone that displays with a framerate of more than 120Hz also
> > do
> > > >>> exist even though
> > > >>>> they're less common.
> > > >>>>
> > > >>> I don't know how even busier channels help your point.
> > > >>>
> > > >>>> All of the above describes a few of the reasons why
> > autosuspend is
> > > >>> a good choice
> > > >>>> here, instead of a shutdown->startup flow.
> > > >>>> And again - I can place some bets that PM would also be
> > applicable
> > > >>> to SoCs from
> > > >>>> other vendors as well, with most probably different benefits
> > (but
> > > >>> still with some
> > > >>>> power related benefits!) compared to MediaTek.
> > > >>
> > > >> I agree with Angelo's point!
> > > >>
> > > > Ok, but you or Angelo still don't explain "unacceptable
> > latencies"
> > > > when your startup() and shutdown() are empty. You just want api
> > > > changed, which we can but at least do you part and tell me where
> > the
> > > > bottleneck (unexpected latency) comes from.
> > > >
> > >
> > > "I want numbers" is a sensible request, honestly I would do the
> > same so I totally
> > > understand that.
> > >
> > > Jason, can you please perform latency measurements on 60Hz and
> > *especially* ISP/cam
> > > cases while "continuously" calling startup() and shutdown() for
> > every power saving
> > > operation?
>
> You mean moving clk_builk_enable() and clk_bulk_disable() to startup()
> and shutdown(), and then calling startup() in cmdq_runtime_resume() and
> calling shutdown() in cmdq_runtime_suspend()?
> If we do that, I think GCE clocks will never be closed because
> mbox_request_channel() and mbox_free_channel() will call startup() and
> shutdown() already.
>
> I found that display thread:0 will call mbox_send_message() every 16ms
> when I moving UI continuously:
> [   61.503604] thread_id:0, send_diff:16 ms
> [   61.520266] thread_id:0, send_diff:16 ms
> [   61.537143] thread_id:0, send_diff:16 ms
> [   61.553609] thread_id:0, send_diff:16 ms
> [   61.570811] thread_id:0, send_diff:17 ms
> [   61.587075] thread_id:0, send_diff:16 ms
> [   61.603527] thread_id:0, send_diff:16 ms
> [   61.620082] thread_id:0, send_diff:16 ms
> [   61.637496] thread_id:0, send_diff:17 ms
> [   61.653876] thread_id:0, send_diff:16 ms
> [   61.670162] thread_id:0, send_diff:16 ms
> [   61.687318] thread_id:0, send_diff:17 ms
> [   61.703747] thread_id:0, send_diff:16 ms
> [   61.720656] thread_id:0, send_diff:16 ms
> If I didn't do anything to UI, I won't see any mbox_send_message()
> logs.
>
> It means mbox_send_message() will only be called when the display is
> updated:
> [  782.777205] thread_id:0, send_diff:20072 ms
> [  820.296738] thread_id:0, send_diff:37519 ms
> [  863.034270] thread_id:0, send_diff:42737 ms
> [  880.297759] thread_id:0, send_diff:17263 ms
> [  917.795432] thread_id:0, send_diff:37497 ms
>
>
> When opening camera APP, it seems to become 30fps:
> [  120.551778] BUG: sleeping function called from invalid context at
> drivers/base/power/runtime.c:1164
> [  120.560844] in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid:
> 201, name: kworker/u17:1
> [  120.569363] preempt_count: 1, expected: 0
> [  120.573367] RCU nest depth: 0, expected: 0
> [  120.577457] INFO: lockdep is turned off.
> [  120.581372] irq event stamp: 0
> [  120.584420] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
> [  120.590687] hardirqs last disabled at (0): [<ffffffd3624a86e0>]
> copy_process+0x578/0xf10
> [  120.598780] softirqs last  enabled at (0): [<ffffffd3624a86ec>]
> copy_process+0x584/0xf10
> [  120.606865] softirqs last disabled at (0): [<0000000000000000>] 0x0
> [  120.613128] CPU: 0 PID: 201 Comm: kworker/u17:1 Not tainted 6.1.92-
> lockdep-14419-g8c5f20e56598-dirty #1
> e971112e83e796239a41e46e8747794c737fb18c
> [  120.626073] Hardware name: Google Ciri sku0/unprovisioned board (DT)
> [  120.632420] Workqueue: imgsys_runner imgsys_runner_func
> [  120.637647] Call trace:
> [  120.640086]  dump_backtrace+0x100/0x120
> [  120.643919]  show_stack+0x20/0x2c
> [  120.647230]  dump_stack_lvl+0x84/0xb4
> [  120.650891]  dump_stack+0x18/0x44
> [  120.654203]  __might_resched+0x204/0x214
> [  120.658123]  __might_sleep+0x50/0x80
> [  120.661692]  __pm_runtime_resume+0x48/0xbc
> [  120.665786]  cmdq_mbox_send_data+0x8c/0x434
> [  120.669966]  msg_submit+0x84/0x104
> [  120.673362]  mbox_send_message+0xb8/0x124
> [  120.677365]  imgsys_cmdq_sendtask+0x688/0x928
> [  120.681717]  imgsys_runner_func+0x44/0x78
> [  120.685722]  process_one_work+0x274/0x574
> [  120.689731]  worker_thread+0x240/0x3f0
> [  120.693475]  kthread+0xfc/0x11c
> [  120.696612]  ret_from_fork+0x10/0x20
> [  120.710570] mtk-cam raw 16030000.cam-raw-a: [SOF] frame done reading
> lost frames. req(7),ts(120697780083)
> [  120.720352] mtk-cam raw 16030000.cam-raw-a: [SOF] SCQ_DELAY state:4
> ts:120697780083
> [  120.990091] thread_id:0, send_diff:1470 ms
> [  121.007210] thread_id:0, send_diff:17 ms
> [  121.016484] thread_id:0, send_diff:9 ms
> [  121.032652] thread_id:0, send_diff:16 ms
> [  121.049366] thread_id:0, send_diff:16 ms
> [  121.066880] thread_id:0, send_diff:17 ms
> [  121.087023] thread_id:0, send_diff:20 ms
> [  121.099325] thread_id:0, send_diff:12 ms
> [  121.135286] thread_id:0, send_diff:35 ms
> [  121.149349] thread_id:0, send_diff:14 ms
> [  121.179259] thread_id:0, send_diff:29 ms
> [  121.203183] thread_id:0, send_diff:23 ms
> [  121.242234] thread_id:0, send_diff:39 ms
> [  121.280645] thread_id:0, send_diff:38 ms
> [  121.318651] thread_id:0, send_diff:38 ms
> [  121.356366] thread_id:0, send_diff:37 ms
> [  121.394484] thread_id:0, send_diff:38 ms
>
> The cmdq_runtime_suspend() will only be called when I disabled camera
> APP:
> [  468.137530] pm_on ts:465259, pm_off ts:4681375, diff:2877 ms
> and the diff is the duration from opening the camera APP to closing the
> camera APP.
>
> From booting to seeing the UI and any operation of UI, I didn't see
> any cmdq_runtime_suspend() logs from display mbox channel.
> So cmdq_runtime_resume() and cmdq_runtime_suspend() are never called by
> display mbox channel.
> This can tell why only opening the camera APP can see this BUG report.
>
> > >
> > To be clear, do with the mailbox channel that you do with the clocks
> > now, because your startup() is literally and shutdown() is
> > practically
> > empty.
> > Call shutdown() when no request has come in for a while, so you know
> > the client has quiesced likely. So try putting request/release in
> > higher level RPM with autosuspend.
>
> I'm not quit sure how to modify this from current usage.
> Do you mean:
> 1. Move pm_runtime_get_sync() from cmdq_mbox_semd_data() to startup()
> and keep pm_runtime_put_autosuspend() in cmdq_mbox_semd_data()?
>
> 2. Call shutdown() in cmdq_runtime_suspend()?
>
No shutdown() is called from channel release. Maybe release the
channel from suspend() of user driver (imgsys).

The idea is that
1) If the gap between transfers on a busy channel is ~10ms or more.  And ..
2) The silence on the mailbox channel is multiple of 100ms. And ...
3) startup() and shutdown() are empty (as in your driver)
  ... then you could afford to release/suspend and resume/request
during the silence period because the _api_ cost of request/release
should not even be 1ms. Your stack may be designed to make doing that
difficult but numbers wise it should work.

Thanks
Jason-JH Lin (林睿祥) June 26, 2024, 9:32 a.m. UTC | #12
[snip]

> > > > "I want numbers" is a sensible request, honestly I would do the
> > > same so I totally
> > > > understand that.
> > > >
> > > > Jason, can you please perform latency measurements on 60Hz and
> > > *especially* ISP/cam
> > > > cases while "continuously" calling startup() and shutdown() for
> > > every power saving
> > > > operation?
> >
> > You mean moving clk_builk_enable() and clk_bulk_disable() to
> startup()
> > and shutdown(), and then calling startup() in cmdq_runtime_resume()
> and
> > calling shutdown() in cmdq_runtime_suspend()?
> > If we do that, I think GCE clocks will never be closed because
> > mbox_request_channel() and mbox_free_channel() will call startup()
> and
> > shutdown() already.
> >
> > I found that display thread:0 will call mbox_send_message() every
> 16ms
> > when I moving UI continuously:
> > [   61.503604] thread_id:0, send_diff:16 ms
> > [   61.520266] thread_id:0, send_diff:16 ms
> > [   61.537143] thread_id:0, send_diff:16 ms
> > [   61.553609] thread_id:0, send_diff:16 ms
> > [   61.570811] thread_id:0, send_diff:17 ms
> > [   61.587075] thread_id:0, send_diff:16 ms
> > [   61.603527] thread_id:0, send_diff:16 ms
> > [   61.620082] thread_id:0, send_diff:16 ms
> > [   61.637496] thread_id:0, send_diff:17 ms
> > [   61.653876] thread_id:0, send_diff:16 ms
> > [   61.670162] thread_id:0, send_diff:16 ms
> > [   61.687318] thread_id:0, send_diff:17 ms
> > [   61.703747] thread_id:0, send_diff:16 ms
> > [   61.720656] thread_id:0, send_diff:16 ms
> > If I didn't do anything to UI, I won't see any mbox_send_message()
> > logs.
> >
> > It means mbox_send_message() will only be called when the display
> is
> > updated:
> > [  782.777205] thread_id:0, send_diff:20072 ms
> > [  820.296738] thread_id:0, send_diff:37519 ms
> > [  863.034270] thread_id:0, send_diff:42737 ms
> > [  880.297759] thread_id:0, send_diff:17263 ms
> > [  917.795432] thread_id:0, send_diff:37497 ms
> >
> >
> > When opening camera APP, it seems to become 30fps:
> > [  120.551778] BUG: sleeping function called from invalid context
> at
> > drivers/base/power/runtime.c:1164
> > [  120.560844] in_atomic(): 1, irqs_disabled(): 128, non_block: 0,
> pid:
> > 201, name: kworker/u17:1
> > [  120.569363] preempt_count: 1, expected: 0
> > [  120.573367] RCU nest depth: 0, expected: 0
> > [  120.577457] INFO: lockdep is turned off.
> > [  120.581372] irq event stamp: 0
> > [  120.584420] hardirqs last  enabled at (0): [<0000000000000000>]
> 0x0
> > [  120.590687] hardirqs last disabled at (0): [<ffffffd3624a86e0>]
> > copy_process+0x578/0xf10
> > [  120.598780] softirqs last  enabled at (0): [<ffffffd3624a86ec>]
> > copy_process+0x584/0xf10
> > [  120.606865] softirqs last disabled at (0): [<0000000000000000>]
> 0x0
> > [  120.613128] CPU: 0 PID: 201 Comm: kworker/u17:1 Not tainted
> 6.1.92-
> > lockdep-14419-g8c5f20e56598-dirty #1
> > e971112e83e796239a41e46e8747794c737fb18c
> > [  120.626073] Hardware name: Google Ciri sku0/unprovisioned board
> (DT)
> > [  120.632420] Workqueue: imgsys_runner imgsys_runner_func
> > [  120.637647] Call trace:
> > [  120.640086]  dump_backtrace+0x100/0x120
> > [  120.643919]  show_stack+0x20/0x2c
> > [  120.647230]  dump_stack_lvl+0x84/0xb4
> > [  120.650891]  dump_stack+0x18/0x44
> > [  120.654203]  __might_resched+0x204/0x214
> > [  120.658123]  __might_sleep+0x50/0x80
> > [  120.661692]  __pm_runtime_resume+0x48/0xbc
> > [  120.665786]  cmdq_mbox_send_data+0x8c/0x434
> > [  120.669966]  msg_submit+0x84/0x104
> > [  120.673362]  mbox_send_message+0xb8/0x124
> > [  120.677365]  imgsys_cmdq_sendtask+0x688/0x928
> > [  120.681717]  imgsys_runner_func+0x44/0x78
> > [  120.685722]  process_one_work+0x274/0x574
> > [  120.689731]  worker_thread+0x240/0x3f0
> > [  120.693475]  kthread+0xfc/0x11c
> > [  120.696612]  ret_from_fork+0x10/0x20
> > [  120.710570] mtk-cam raw 16030000.cam-raw-a: [SOF] frame done
> reading
> > lost frames. req(7),ts(120697780083)
> > [  120.720352] mtk-cam raw 16030000.cam-raw-a: [SOF] SCQ_DELAY
> state:4
> > ts:120697780083
> > [  120.990091] thread_id:0, send_diff:1470 ms
> > [  121.007210] thread_id:0, send_diff:17 ms
> > [  121.016484] thread_id:0, send_diff:9 ms
> > [  121.032652] thread_id:0, send_diff:16 ms
> > [  121.049366] thread_id:0, send_diff:16 ms
> > [  121.066880] thread_id:0, send_diff:17 ms
> > [  121.087023] thread_id:0, send_diff:20 ms
> > [  121.099325] thread_id:0, send_diff:12 ms
> > [  121.135286] thread_id:0, send_diff:35 ms
> > [  121.149349] thread_id:0, send_diff:14 ms
> > [  121.179259] thread_id:0, send_diff:29 ms
> > [  121.203183] thread_id:0, send_diff:23 ms
> > [  121.242234] thread_id:0, send_diff:39 ms
> > [  121.280645] thread_id:0, send_diff:38 ms
> > [  121.318651] thread_id:0, send_diff:38 ms
> > [  121.356366] thread_id:0, send_diff:37 ms
> > [  121.394484] thread_id:0, send_diff:38 ms
> >
> > The cmdq_runtime_suspend() will only be called when I disabled
> camera
> > APP:
> > [  468.137530] pm_on ts:465259, pm_off ts:4681375, diff:2877 ms
> > and the diff is the duration from opening the camera APP to closing
> the
> > camera APP.
> >
> > From booting to seeing the UI and any operation of UI, I didn't see
> > any cmdq_runtime_suspend() logs from display mbox channel.
> > So cmdq_runtime_resume() and cmdq_runtime_suspend() are never
> called by
> > display mbox channel.
> > This can tell why only opening the camera APP can see this BUG
> report.
> >
> > > >
> > > To be clear, do with the mailbox channel that you do with the
> clocks
> > > now, because your startup() is literally and shutdown() is
> > > practically
> > > empty.
> > > Call shutdown() when no request has come in for a while, so you
> know
> > > the client has quiesced likely. So try putting request/release in
> > > higher level RPM with autosuspend.
> >
> > I'm not quit sure how to modify this from current usage.
> > Do you mean:
> > 1. Move pm_runtime_get_sync() from cmdq_mbox_semd_data() to
> startup()
> > and keep pm_runtime_put_autosuspend() in cmdq_mbox_semd_data()?
> >
> > 2. Call shutdown() in cmdq_runtime_suspend()?
> >
> No shutdown() is called from channel release. Maybe release the
> channel from suspend() of user driver (imgsys).

When users call mbox_channel_request(), there is nothing to do with HW,
so CMDQ driver leaves startup() empty.
When users call mbox_sned_message(), users want to use HW to execute
some instruction, so CMDQ driver needs to enable the power/clocks in
this function.
When users call mbox_free_channel() or mbox_flush(), CMDQ driver needs
to check the HW executing status and clears all the executing tasks, so
it also needs to enable power/clocks in shutdown() and flush() to
control the HW.

The power saving goal is to disable the power/clk more than the
configurable timeout (100ms currently) after enabling it.
So I think calling suspend() of user driver can not achieve this.
Please correct me if I'm wrong :)

> 
> The idea is that
> 1) If the gap between transfers on a busy channel is ~10ms or
> more.  And ..
> 2) The silence on the mailbox channel is multiple of 100ms. And ...

The mailbox channel would not be always busy and the gap is not
consistent as well.
For example, display driver calls mbox_send_message() every ~16ms while
UI is updating in 60fps.
If UI dose not update, display driver won't call mbox_send_message(),
so user may not has the gap and the silence like this.

> 3) startup() and shutdown() are empty (as in your driver)
>   ... then you could afford to release/suspend and resume/request
> during the silence period because the _api_ cost of request/release
> should not even be 1ms. Your stack may be designed to make doing that
> difficult but numbers wise it should work.

Yes, that's true, but I think we can not achieve the goal during the
suspend/resume stage.

Currently, we use the pm_runtime_get_sync() and
pm_runtime_auto_suspend() to achieve this.
So I think adding the PM function hook in mailbox driver is much better
for users driver and also more flexible for the controller driver.

Please correct me if there is something I misunderstood :)

Regards,
Jason-JH.Lin

> 
> Thanks
diff mbox series

Patch

diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
index 4aa394e91109..02cef3eee35a 100644
--- a/drivers/mailbox/mtk-cmdq-mailbox.c
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -387,20 +387,13 @@  static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data)
 	struct cmdq *cmdq = dev_get_drvdata(chan->mbox->dev);
 	struct cmdq_task *task;
 	unsigned long curr_pa, end_pa;
-	int ret;
 
 	/* Client should not flush new tasks if suspended. */
 	WARN_ON(cmdq->suspended);
 
-	ret = pm_runtime_get_sync(cmdq->mbox.dev);
-	if (ret < 0)
-		return ret;
-
 	task = kzalloc(sizeof(*task), GFP_ATOMIC);
-	if (!task) {
-		pm_runtime_put_autosuspend(cmdq->mbox.dev);
+	if (!task)
 		return -ENOMEM;
-	}
 
 	task->cmdq = cmdq;
 	INIT_LIST_HEAD(&task->list_entry);
@@ -448,7 +441,6 @@  static int cmdq_mbox_send_data(struct mbox_chan *chan, void *data)
 	list_move_tail(&task->list_entry, &thread->task_busy_list);
 
 	pm_runtime_mark_last_busy(cmdq->mbox.dev);
-	pm_runtime_put_autosuspend(cmdq->mbox.dev);
 
 	return 0;
 }
@@ -465,8 +457,6 @@  static void cmdq_mbox_shutdown(struct mbox_chan *chan)
 	struct cmdq_task *task, *tmp;
 	unsigned long flags;
 
-	WARN_ON(pm_runtime_get_sync(cmdq->mbox.dev) < 0);
-
 	spin_lock_irqsave(&thread->chan->lock, flags);
 	if (list_empty(&thread->task_busy_list))
 		goto done;
@@ -496,7 +486,6 @@  static void cmdq_mbox_shutdown(struct mbox_chan *chan)
 	spin_unlock_irqrestore(&thread->chan->lock, flags);
 
 	pm_runtime_mark_last_busy(cmdq->mbox.dev);
-	pm_runtime_put_autosuspend(cmdq->mbox.dev);
 }
 
 static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout)
@@ -507,11 +496,6 @@  static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout)
 	struct cmdq_task *task, *tmp;
 	unsigned long flags;
 	u32 enable;
-	int ret;
-
-	ret = pm_runtime_get_sync(cmdq->mbox.dev);
-	if (ret < 0)
-		return ret;
 
 	spin_lock_irqsave(&thread->chan->lock, flags);
 	if (list_empty(&thread->task_busy_list))
@@ -536,7 +520,6 @@  static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout)
 out:
 	spin_unlock_irqrestore(&thread->chan->lock, flags);
 	pm_runtime_mark_last_busy(cmdq->mbox.dev);
-	pm_runtime_put_autosuspend(cmdq->mbox.dev);
 
 	return 0;
 
@@ -551,15 +534,31 @@  static int cmdq_mbox_flush(struct mbox_chan *chan, unsigned long timeout)
 		return -EFAULT;
 	}
 	pm_runtime_mark_last_busy(cmdq->mbox.dev);
-	pm_runtime_put_autosuspend(cmdq->mbox.dev);
+
 	return 0;
 }
 
+static int cmdq_mbox_pm_resume(struct mbox_chan *chan)
+{
+	struct cmdq *cmdq = dev_get_drvdata(chan->mbox->dev);
+
+	return pm_runtime_get_sync(cmdq->mbox.dev);
+}
+
+static void cmdq_mbox_pm_susepnd(struct mbox_chan *chan)
+{
+	struct cmdq *cmdq = dev_get_drvdata(chan->mbox->dev);
+
+	pm_runtime_put_autosuspend(cmdq->mbox.dev);
+}
+
 static const struct mbox_chan_ops cmdq_mbox_chan_ops = {
 	.send_data = cmdq_mbox_send_data,
 	.startup = cmdq_mbox_startup,
 	.shutdown = cmdq_mbox_shutdown,
 	.flush = cmdq_mbox_flush,
+	.power_get = cmdq_mbox_pm_resume,
+	.power_put = cmdq_mbox_pm_susepnd,
 };
 
 static struct mbox_chan *cmdq_xlate(struct mbox_controller *mbox,