diff mbox series

[v1,1/1] driver: watchdog: Remove mtk_wdt_stop() in probe() to prevent the system freeze and it doesn't reboot by watchdog problem

Message ID 1608881450-11081-2-git-send-email-freddy.hsin@mediatek.com (mailing list archive)
State Changes Requested
Headers show
Series [v1,1/1] driver: watchdog: Remove mtk_wdt_stop() in probe() to prevent the system freeze and it doesn't reboot by watchdog problem | expand

Commit Message

Freddy.Hsin Dec. 25, 2020, 7:30 a.m. UTC
From: "freddy.hsin" <freddy.hsin@mediatek.com>

Before user space daemon start to access the watchdog device,
there is a time interval that watchdog is disabled in the
original flow. If the system freezing at this interval, it
cannot be rebooted by watchdog hardware automatically.

In order to solve this problem, the watchdog hardware should be
kept working, and start hrtimer in framework to ping it by
setting max_hw_heartbeat_ms and HW_RUNNING used in
watchdog_need_worker to determine whether the worker should be
started or not

Change-Id: I6a041b0922888a90011d7538ee804d80bc8d15ea
Signed-off-by: freddy.hsin <freddy.hsin@mediatek.com>
---
 drivers/watchdog/mtk_wdt.c |   22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

Comments

Guenter Roeck Dec. 25, 2020, 2:27 p.m. UTC | #1
On 12/24/20 11:30 PM, Freddy Hsin wrote:
> From: "freddy.hsin" <freddy.hsin@mediatek.com>
> 
> Before user space daemon start to access the watchdog device,
> there is a time interval that watchdog is disabled in the
> original flow. If the system freezing at this interval, it
> cannot be rebooted by watchdog hardware automatically.
> 
> In order to solve this problem, the watchdog hardware should be
> kept working, and start hrtimer in framework to ping it by
> setting max_hw_heartbeat_ms and HW_RUNNING used in
> watchdog_need_worker to determine whether the worker should be
> started or not
> 
> Change-Id: I6a041b0922888a90011d7538ee804d80bc8d15ea
> Signed-off-by: freddy.hsin <freddy.hsin@mediatek.com>
> ---
>  drivers/watchdog/mtk_wdt.c |   22 ++++++++++++++++++----
>  1 file changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
> index d6a6393..62f08cd 100644
> --- a/drivers/watchdog/mtk_wdt.c
> +++ b/drivers/watchdog/mtk_wdt.c
> @@ -195,6 +195,19 @@ static int mtk_wdt_set_timeout(struct watchdog_device *wdt_dev,
>  	return 0;
>  }
>  
> +static void mtk_wdt_init(struct watchdog_device *wdt_dev)
> +{
> +	struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev);
> +	void __iomem *wdt_base;
> +
> +	wdt_base = mtk_wdt->wdt_base;
> +
> +	if (readl(wdt_base + WDT_MODE) & WDT_MODE_EN) {
> +		set_bit(WDOG_HW_RUNNING, &wdt_dev->status);
> +		mtk_wdt_set_timeout(wdt_dev, wdt_dev->timeout);
> +	}
> +}
> +
>  static int mtk_wdt_stop(struct watchdog_device *wdt_dev)
>  {
>  	struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev);
> @@ -266,16 +279,17 @@ static int mtk_wdt_probe(struct platform_device *pdev)
>  	mtk_wdt->wdt_dev.timeout = WDT_MAX_TIMEOUT;
>  	mtk_wdt->wdt_dev.max_timeout = WDT_MAX_TIMEOUT;

No longer needed if max_hw_heartbeat_ms is set.

>  	mtk_wdt->wdt_dev.min_timeout = WDT_MIN_TIMEOUT;
> +	mtk_wdt->wdt_dev.max_hw_heartbeat_ms = (WDT_MAX_TIMEOUT - 1) * 1000;

This needs explanation. Why WDT_MAX_TIMEOUT - 1 ?

>  	mtk_wdt->wdt_dev.parent = dev;
>  
> +	watchdog_set_drvdata(&mtk_wdt->wdt_dev, mtk_wdt);
> +
> +	mtk_wdt_init(&mtk_wdt->wdt_dev);
> +

Setting the chip (with the maximum timeout) ...

>  	watchdog_init_timeout(&mtk_wdt->wdt_dev, timeout, dev);

just before initializing/setting the real timeout doesn't really make sense.

>  	watchdog_set_nowayout(&mtk_wdt->wdt_dev, nowayout);
>  	watchdog_set_restart_priority(&mtk_wdt->wdt_dev, 128);
>  
> -	watchdog_set_drvdata(&mtk_wdt->wdt_dev, mtk_wdt);
> -
> -	mtk_wdt_stop(&mtk_wdt->wdt_dev);
> -
>  	watchdog_stop_on_reboot(&mtk_wdt->wdt_dev);
>  	err = devm_watchdog_register_device(dev, &mtk_wdt->wdt_dev);
>  	if (unlikely(err))
>
Guenter Roeck Dec. 25, 2020, 2:29 p.m. UTC | #2
On 12/24/20 11:30 PM, Freddy Hsin wrote:
> From: "freddy.hsin" <freddy.hsin@mediatek.com>
> 
> Before user space daemon start to access the watchdog device,
> there is a time interval that watchdog is disabled in the
> original flow. If the system freezing at this interval, it
> cannot be rebooted by watchdog hardware automatically.
> 
> In order to solve this problem, the watchdog hardware should be
> kept working, and start hrtimer in framework to ping it by
> setting max_hw_heartbeat_ms and HW_RUNNING used in
> watchdog_need_worker to determine whether the worker should be
> started or not
> 
> Change-Id: I6a041b0922888a90011d7538ee804d80bc8d15ea
> Signed-off-by: freddy.hsin <freddy.hsin@mediatek.com>

Also: $subject should refer to mtk_wdt.

watchdog: mtk_wdt: Do not stop watchdog in probe function

or similar.

Thanks,
Guenter

> ---
>  drivers/watchdog/mtk_wdt.c |   22 ++++++++++++++++++----
>  1 file changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
> index d6a6393..62f08cd 100644
> --- a/drivers/watchdog/mtk_wdt.c
> +++ b/drivers/watchdog/mtk_wdt.c
> @@ -195,6 +195,19 @@ static int mtk_wdt_set_timeout(struct watchdog_device *wdt_dev,
>  	return 0;
>  }
>  
> +static void mtk_wdt_init(struct watchdog_device *wdt_dev)
> +{
> +	struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev);
> +	void __iomem *wdt_base;
> +
> +	wdt_base = mtk_wdt->wdt_base;
> +
> +	if (readl(wdt_base + WDT_MODE) & WDT_MODE_EN) {
> +		set_bit(WDOG_HW_RUNNING, &wdt_dev->status);
> +		mtk_wdt_set_timeout(wdt_dev, wdt_dev->timeout);
> +	}
> +}
> +
>  static int mtk_wdt_stop(struct watchdog_device *wdt_dev)
>  {
>  	struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev);
> @@ -266,16 +279,17 @@ static int mtk_wdt_probe(struct platform_device *pdev)
>  	mtk_wdt->wdt_dev.timeout = WDT_MAX_TIMEOUT;
>  	mtk_wdt->wdt_dev.max_timeout = WDT_MAX_TIMEOUT;
>  	mtk_wdt->wdt_dev.min_timeout = WDT_MIN_TIMEOUT;
> +	mtk_wdt->wdt_dev.max_hw_heartbeat_ms = (WDT_MAX_TIMEOUT - 1) * 1000;
>  	mtk_wdt->wdt_dev.parent = dev;
>  
> +	watchdog_set_drvdata(&mtk_wdt->wdt_dev, mtk_wdt);
> +
> +	mtk_wdt_init(&mtk_wdt->wdt_dev);
> +
>  	watchdog_init_timeout(&mtk_wdt->wdt_dev, timeout, dev);
>  	watchdog_set_nowayout(&mtk_wdt->wdt_dev, nowayout);
>  	watchdog_set_restart_priority(&mtk_wdt->wdt_dev, 128);
>  
> -	watchdog_set_drvdata(&mtk_wdt->wdt_dev, mtk_wdt);
> -
> -	mtk_wdt_stop(&mtk_wdt->wdt_dev);
> -
>  	watchdog_stop_on_reboot(&mtk_wdt->wdt_dev);
>  	err = devm_watchdog_register_device(dev, &mtk_wdt->wdt_dev);
>  	if (unlikely(err))
>
Guenter Roeck Dec. 29, 2020, 3:08 p.m. UTC | #3
On 12/29/20 12:39 AM, Freddy.Hsin wrote:
> On Fri, 2020-12-25 at 22:27 +0800, Guenter Roeck wrote:
>> On 12/24/20 11:30 PM, Freddy Hsin wrote:
>>> From: "freddy.hsin" <freddy.hsin@mediatek.com>
>>>
>>> Before user space daemon start to access the watchdog device,
>>> there is a time interval that watchdog is disabled in the
>>> original flow. If the system freezing at this interval, it
>>> cannot be rebooted by watchdog hardware automatically.
>>>
>>> In order to solve this problem, the watchdog hardware should be
>>> kept working, and start hrtimer in framework to ping it by
>>> setting max_hw_heartbeat_ms and HW_RUNNING used in
>>> watchdog_need_worker to determine whether the worker should be
>>> started or not
>>>
>>> Change-Id: I6a041b0922888a90011d7538ee804d80bc8d15ea
>>> Signed-off-by: freddy.hsin <freddy.hsin@mediatek.com>
>>> ---
>>>  drivers/watchdog/mtk_wdt.c |   22 ++++++++++++++++++----
>>>  1 file changed, 18 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
>>> index d6a6393..62f08cd 100644
>>> --- a/drivers/watchdog/mtk_wdt.c
>>> +++ b/drivers/watchdog/mtk_wdt.c
>>> @@ -195,6 +195,19 @@ static int mtk_wdt_set_timeout(struct watchdog_device *wdt_dev,
>>>  	return 0;
>>>  }
>>>  
>>> +static void mtk_wdt_init(struct watchdog_device *wdt_dev)
>>> +{
>>> +	struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev);
>>> +	void __iomem *wdt_base;
>>> +
>>> +	wdt_base = mtk_wdt->wdt_base;
>>> +
>>> +	if (readl(wdt_base + WDT_MODE) & WDT_MODE_EN) {
>>> +		set_bit(WDOG_HW_RUNNING, &wdt_dev->status);
>>> +		mtk_wdt_set_timeout(wdt_dev, wdt_dev->timeout);
>>> +	}
>>> +}
>>> +
>>>  static int mtk_wdt_stop(struct watchdog_device *wdt_dev)
>>>  {
>>>  	struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev);
>>> @@ -266,16 +279,17 @@ static int mtk_wdt_probe(struct platform_device *pdev)
>>>  	mtk_wdt->wdt_dev.timeout = WDT_MAX_TIMEOUT;
>>>  	mtk_wdt->wdt_dev.max_timeout = WDT_MAX_TIMEOUT;
>>
>> No longer needed if max_hw_heartbeat_ms is set.
> 
> Got it. it will be removed in V2 patch
> 
>>
>>>  	mtk_wdt->wdt_dev.min_timeout = WDT_MIN_TIMEOUT;
>>> +	mtk_wdt->wdt_dev.max_hw_heartbeat_ms = (WDT_MAX_TIMEOUT - 1) * 1000;
>>
>> This needs explanation. Why WDT_MAX_TIMEOUT - 1 ?
> 
> In watchdog_need_worker(), there is a condition for starting worker (hm
> && watchdog_active(wdd) && t > hm), so I think in the case
> that watchdog_active is true, the t (timeout) should be greater than
> hm (max_hw_heartbeat_ms) or the worker will not work. Shouldn't I
> consider for this case?
> 

You are claiming that the watchdog core is buggy. Make your case there, not here.

Guenter

>>
>>>  	mtk_wdt->wdt_dev.parent = dev;
>>>  
>>> +	watchdog_set_drvdata(&mtk_wdt->wdt_dev, mtk_wdt);
>>> +
>>> +	mtk_wdt_init(&mtk_wdt->wdt_dev);
>>> +
>>
>> Setting the chip (with the maximum timeout) ...
>>
> Got it. mtk_wdt_init will be moved after watchdog_init_timeout in V2
> 
> BRs,
> FreddyHsin
> 
>>>  	watchdog_init_timeout(&mtk_wdt->wdt_dev, timeout, dev);
>>
>> just before initializing/setting the real timeout doesn't really make sense.
>>
>>>  	watchdog_set_nowayout(&mtk_wdt->wdt_dev, nowayout);
>>>  	watchdog_set_restart_priority(&mtk_wdt->wdt_dev, 128);
>>>  
>>> -	watchdog_set_drvdata(&mtk_wdt->wdt_dev, mtk_wdt);
>>> -
>>> -	mtk_wdt_stop(&mtk_wdt->wdt_dev);
>>> -
>>>  	watchdog_stop_on_reboot(&mtk_wdt->wdt_dev);
>>>  	err = devm_watchdog_register_device(dev, &mtk_wdt->wdt_dev);
>>>  	if (unlikely(err))
>>>
>>
>
diff mbox series

Patch

diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
index d6a6393..62f08cd 100644
--- a/drivers/watchdog/mtk_wdt.c
+++ b/drivers/watchdog/mtk_wdt.c
@@ -195,6 +195,19 @@  static int mtk_wdt_set_timeout(struct watchdog_device *wdt_dev,
 	return 0;
 }
 
+static void mtk_wdt_init(struct watchdog_device *wdt_dev)
+{
+	struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev);
+	void __iomem *wdt_base;
+
+	wdt_base = mtk_wdt->wdt_base;
+
+	if (readl(wdt_base + WDT_MODE) & WDT_MODE_EN) {
+		set_bit(WDOG_HW_RUNNING, &wdt_dev->status);
+		mtk_wdt_set_timeout(wdt_dev, wdt_dev->timeout);
+	}
+}
+
 static int mtk_wdt_stop(struct watchdog_device *wdt_dev)
 {
 	struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev);
@@ -266,16 +279,17 @@  static int mtk_wdt_probe(struct platform_device *pdev)
 	mtk_wdt->wdt_dev.timeout = WDT_MAX_TIMEOUT;
 	mtk_wdt->wdt_dev.max_timeout = WDT_MAX_TIMEOUT;
 	mtk_wdt->wdt_dev.min_timeout = WDT_MIN_TIMEOUT;
+	mtk_wdt->wdt_dev.max_hw_heartbeat_ms = (WDT_MAX_TIMEOUT - 1) * 1000;
 	mtk_wdt->wdt_dev.parent = dev;
 
+	watchdog_set_drvdata(&mtk_wdt->wdt_dev, mtk_wdt);
+
+	mtk_wdt_init(&mtk_wdt->wdt_dev);
+
 	watchdog_init_timeout(&mtk_wdt->wdt_dev, timeout, dev);
 	watchdog_set_nowayout(&mtk_wdt->wdt_dev, nowayout);
 	watchdog_set_restart_priority(&mtk_wdt->wdt_dev, 128);
 
-	watchdog_set_drvdata(&mtk_wdt->wdt_dev, mtk_wdt);
-
-	mtk_wdt_stop(&mtk_wdt->wdt_dev);
-
 	watchdog_stop_on_reboot(&mtk_wdt->wdt_dev);
 	err = devm_watchdog_register_device(dev, &mtk_wdt->wdt_dev);
 	if (unlikely(err))