diff mbox series

[RFC] Bluetooth: btnxpuart: Fix nxp_setup in case chip is powered on late

Message ID 20240117030501.149114-1-neeraj.sanjaykale@nxp.com (mailing list archive)
State New, archived
Headers show
Series [RFC] Bluetooth: btnxpuart: Fix nxp_setup in case chip is powered on late | expand

Checks

Context Check Description
tedd_an/pre-ci_am success Success
tedd_an/CheckPatch warning WARNING: Prefer a maximum 75 chars per line (possible unwrapped commit description?) #163: https://patchwork.kernel.org/project/bluetooth/patch/20231018145540.34014-3-marcel@ziswiler.com/ total: 0 errors, 1 warnings, 57 lines checked NOTE: For some of the reported defects, checkpatch may be able to mechanically convert to the typical style using --fix or --fix-inplace. /github/workspace/src/src/13521411.patch has style problems, please review. NOTE: Ignored message types: UNKNOWN_COMMIT_ID NOTE: If any of the errors are false positives, please report them to the maintainer, see CHECKPATCH in MAINTAINERS.
tedd_an/GitLint fail WARNING: I3 - ignore-body-lines: gitlint will be switching from using Python regex 'match' (match beginning) to 'search' (match anywhere) semantics. Please review your ignore-body-lines.regex option accordingly. To remove this warning, set general.regex-style-search=True. More details: https://jorisroovers.github.io/gitlint/configuration/#regex-style-search 37: B1 Line exceeds max length (96>80): "https://patchwork.kernel.org/project/bluetooth/patch/20231018145540.34014-3-marcel@ziswiler.com/" 41: B1 Line exceeds max length (104>80): "Closes: https://patchwork.kernel.org/project/bluetooth/patch/20231018145540.34014-3-marcel@ziswiler.com/"
tedd_an/SubjectPrefix success Gitlint PASS
tedd_an/BuildKernel success BuildKernel PASS
tedd_an/CheckAllWarning success CheckAllWarning PASS
tedd_an/CheckSparse success CheckSparse PASS
tedd_an/CheckSmatch success CheckSparse PASS
tedd_an/BuildKernel32 success BuildKernel32 PASS
tedd_an/TestRunnerSetup success TestRunnerSetup PASS
tedd_an/TestRunner_l2cap-tester success TestRunner PASS
tedd_an/TestRunner_iso-tester success TestRunner PASS
tedd_an/TestRunner_bnep-tester success TestRunner PASS
tedd_an/TestRunner_mgmt-tester success TestRunner PASS
tedd_an/TestRunner_rfcomm-tester success TestRunner PASS
tedd_an/TestRunner_sco-tester success TestRunner PASS
tedd_an/TestRunner_ioctl-tester success TestRunner PASS
tedd_an/TestRunner_mesh-tester success TestRunner PASS
tedd_an/TestRunner_smp-tester success TestRunner PASS
tedd_an/TestRunner_userchan-tester success TestRunner PASS
tedd_an/IncrementalBuild success Incremental Build PASS

Commit Message

Neeraj Sanjay Kale Jan. 17, 2024, 3:05 a.m. UTC
This adds a setup retry mechanism in case the chip is powered on after the
btnxpuart driver is loaded.

The NXP chipsets have a common PDn pin shared between Wi-Fi and Bluetooth.

When customers use mwifiex_sdio drivers for Wi-Fi, the pwrseq tied to the
driver toggles the GPIO connected to the chip's PDn pin, powering it on.

The btnxpuart driver is loaded before mwifiex, and the setup function does
not receive any bootloader signature, as PDn is held low at this moment.
The driver inadvertently assumes that FW is already running on the chip.

The nxp_setup exits with a success, and BT subsystem proceeds with sending
initialization commands, which result in a timeout.
[  284.588177] Bluetooth: hci0: Opcode 0x0c03 failed: -110
[  286.636167] Bluetooth: hci0: Setting wake-up method failed (-110)

Later when mwifiex is loaded, the pwrseq makes PDn pin high, and downloads
either WiFi or combo FW.

However, the btnxpuart is in a bad state, and re-loading btnxpuart module
does not help.

This fix adds a check for CTS pin, in case no bootloader signatures are
received. If CTS is high, it means that the chip is currently powered off,
and nxp_setup will return an error, preventing any HCI initialization
commands to be sent by the BT subsystem.

The driver attempts to check for bootloader signatures and CTS again, by
scheduling the hci power_on work after every 5 seconds, as long as the
btnxpuart module is inserted in the kernel.

This fix attempts to improvise the fix provided my Marcel Ziswiler and
handle this scenario gracefully.
https://patchwork.kernel.org/project/bluetooth/patch/20231018145540.34014-3-marcel@ziswiler.com/

Signed-off-by: Neeraj Sanjay Kale <neeraj.sanjaykale@nxp.com>
Reported-by: Marcel Ziswiler <marcel.ziswiler@toradex.com>
Closes: https://patchwork.kernel.org/project/bluetooth/patch/20231018145540.34014-3-marcel@ziswiler.com/
---
 drivers/bluetooth/btnxpuart.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

Comments

bluez.test.bot@gmail.com Jan. 17, 2024, 5:33 a.m. UTC | #1
This is automated email and please do not reply to this email!

Dear submitter,

Thank you for submitting the patches to the linux bluetooth mailing list.
This is a CI test results with your patch series:
PW Link:https://patchwork.kernel.org/project/bluetooth/list/?series=817400

---Test result---

Test Summary:
CheckPatch                    FAIL      0.94 seconds
GitLint                       FAIL      0.58 seconds
SubjectPrefix                 PASS      0.13 seconds
BuildKernel                   PASS      27.66 seconds
CheckAllWarning               PASS      30.76 seconds
CheckSparse                   PASS      36.11 seconds
CheckSmatch                   PASS      100.29 seconds
BuildKernel32                 PASS      29.32 seconds
TestRunnerSetup               PASS      443.73 seconds
TestRunner_l2cap-tester       PASS      23.51 seconds
TestRunner_iso-tester         PASS      44.05 seconds
TestRunner_bnep-tester        PASS      7.04 seconds
TestRunner_mgmt-tester        PASS      161.26 seconds
TestRunner_rfcomm-tester      PASS      11.00 seconds
TestRunner_sco-tester         PASS      14.66 seconds
TestRunner_ioctl-tester       PASS      12.12 seconds
TestRunner_mesh-tester        PASS      8.97 seconds
TestRunner_smp-tester         PASS      9.81 seconds
TestRunner_userchan-tester    PASS      7.37 seconds
IncrementalBuild              PASS      26.77 seconds

Details
##############################
Test: CheckPatch - FAIL
Desc: Run checkpatch.pl script
Output:
[RFC] Bluetooth: btnxpuart: Fix nxp_setup in case chip is powered on late
WARNING: Prefer a maximum 75 chars per line (possible unwrapped commit description?)
#163: 
https://patchwork.kernel.org/project/bluetooth/patch/20231018145540.34014-3-marcel@ziswiler.com/

total: 0 errors, 1 warnings, 57 lines checked

NOTE: For some of the reported defects, checkpatch may be able to
      mechanically convert to the typical style using --fix or --fix-inplace.

/github/workspace/src/src/13521411.patch has style problems, please review.

NOTE: Ignored message types: UNKNOWN_COMMIT_ID

NOTE: If any of the errors are false positives, please report
      them to the maintainer, see CHECKPATCH in MAINTAINERS.


##############################
Test: GitLint - FAIL
Desc: Run gitlint
Output:
[RFC] Bluetooth: btnxpuart: Fix nxp_setup in case chip is powered on late

WARNING: I3 - ignore-body-lines: gitlint will be switching from using Python regex 'match' (match beginning) to 'search' (match anywhere) semantics. Please review your ignore-body-lines.regex option accordingly. To remove this warning, set general.regex-style-search=True. More details: https://jorisroovers.github.io/gitlint/configuration/#regex-style-search
37: B1 Line exceeds max length (96>80): "https://patchwork.kernel.org/project/bluetooth/patch/20231018145540.34014-3-marcel@ziswiler.com/"
41: B1 Line exceeds max length (104>80): "Closes: https://patchwork.kernel.org/project/bluetooth/patch/20231018145540.34014-3-marcel@ziswiler.com/"


---
Regards,
Linux Bluetooth
Francesco Dolcini Jan. 17, 2024, 9:09 a.m. UTC | #2
On Wed, Jan 17, 2024 at 08:35:01AM +0530, Neeraj Sanjay Kale wrote:
> This adds a setup retry mechanism in case the chip is powered on after the
> btnxpuart driver is loaded.
> 
> The NXP chipsets have a common PDn pin shared between Wi-Fi and Bluetooth.
> 
> When customers use mwifiex_sdio drivers for Wi-Fi, the pwrseq tied to the
> driver toggles the GPIO connected to the chip's PDn pin, powering it on.
> 
> The btnxpuart driver is loaded before mwifiex, and the setup function does
> not receive any bootloader signature, as PDn is held low at this moment.
> The driver inadvertently assumes that FW is already running on the chip.
> 
> The nxp_setup exits with a success, and BT subsystem proceeds with sending
> initialization commands, which result in a timeout.
> [  284.588177] Bluetooth: hci0: Opcode 0x0c03 failed: -110
> [  286.636167] Bluetooth: hci0: Setting wake-up method failed (-110)
> 
> Later when mwifiex is loaded, the pwrseq makes PDn pin high, and downloads
> either WiFi or combo FW.
> 
> However, the btnxpuart is in a bad state, and re-loading btnxpuart module
> does not help.
> 
> This fix adds a check for CTS pin, in case no bootloader signatures are
> received. If CTS is high, it means that the chip is currently powered off,
> and nxp_setup will return an error, preventing any HCI initialization
> commands to be sent by the BT subsystem.
> 
> The driver attempts to check for bootloader signatures and CTS again, by
> scheduling the hci power_on work after every 5 seconds, as long as the
> btnxpuart module is inserted in the kernel.
> 
> This fix attempts to improvise the fix provided my Marcel Ziswiler and
> handle this scenario gracefully.
> https://patchwork.kernel.org/project/bluetooth/patch/20231018145540.34014-3-marcel@ziswiler.com/
> 
> Signed-off-by: Neeraj Sanjay Kale <neeraj.sanjaykale@nxp.com>
> Reported-by: Marcel Ziswiler <marcel.ziswiler@toradex.com>
> Closes: https://patchwork.kernel.org/project/bluetooth/patch/20231018145540.34014-3-marcel@ziswiler.com/
> ---
>  drivers/bluetooth/btnxpuart.c | 21 +++++++++++++++++++++
>  1 file changed, 21 insertions(+)
> 
> diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
> index 7f88b6f52f26..20a3a5bd5529 100644
> --- a/drivers/bluetooth/btnxpuart.c
> +++ b/drivers/bluetooth/btnxpuart.c
> @@ -1036,6 +1048,13 @@ static int nxp_setup(struct hci_dev *hdev)
>  		err = nxp_download_firmware(hdev);
>  		if (err < 0)
>  			return err;
> +	} else if (!serdev_device_get_cts(nxpdev->serdev)) {
> +		/* CTS is high and no bootloader signatures detected */
> +		bt_dev_dbg(hdev, "Controller not detected. Will check again in %d msec",
> +			   NXP_SETUP_RETRY_TIME_MS);
> +		schedule_delayed_work(&nxpdev->setup_retry_work,
> +				      msecs_to_jiffies(NXP_SETUP_RETRY_TIME_MS));
> +		return -ENODEV;
why not just

return -EPROBE_DEFER;

and remove everything else, no need for any kind of retry or delayed work
if the driver core takes care of it.


Francesco
Neeraj Sanjay Kale Jan. 17, 2024, 9:38 a.m. UTC | #3
Hi Francesco,

Thankyou for the review comment.

> > diff --git a/drivers/bluetooth/btnxpuart.c
> > b/drivers/bluetooth/btnxpuart.c index 7f88b6f52f26..20a3a5bd5529
> > 100644
> > --- a/drivers/bluetooth/btnxpuart.c
> > +++ b/drivers/bluetooth/btnxpuart.c
> > @@ -1036,6 +1048,13 @@ static int nxp_setup(struct hci_dev *hdev)
> >               err = nxp_download_firmware(hdev);
> >               if (err < 0)
> >                       return err;
> > +     } else if (!serdev_device_get_cts(nxpdev->serdev)) {
> > +             /* CTS is high and no bootloader signatures detected */
> > +             bt_dev_dbg(hdev, "Controller not detected. Will check again in %d
> msec",
> > +                        NXP_SETUP_RETRY_TIME_MS);
> > +             schedule_delayed_work(&nxpdev->setup_retry_work,
> > +                                   msecs_to_jiffies(NXP_SETUP_RETRY_TIME_MS));
> > +             return -ENODEV;
> why not just
> 
> return -EPROBE_DEFER;
> 
> and remove everything else, no need for any kind of retry or delayed work if
> the driver core takes care of it.
> 
Wouldn't returning -EPROBE_DEFER make more sense in driver probe context?

Here, the driver probe registers an hci interface (hci_register_dev()), and returns success to kernel.

The hci_register_dev() queues hdev->power_on work at the end, which opens the hci dev, and ultimately calls this setup function.

In this patch, we are queueing the same work from the delayed setup_retry_work().

Returning -ENODEV (or -EPROBE_DEFER) would only affect hci_dev_open(), which is in power_on work context, and not driver probe context.

Perhaps, we should call it hci_retry_power_on() work or something similar?

Please let me know your thoughts on this.

Thanks,
Neeraj
Francesco Dolcini Jan. 17, 2024, 10:49 a.m. UTC | #4
On Wed, Jan 17, 2024 at 09:38:43AM +0000, Neeraj Sanjay Kale wrote:
> > > diff --git a/drivers/bluetooth/btnxpuart.c
> > > b/drivers/bluetooth/btnxpuart.c index 7f88b6f52f26..20a3a5bd5529
> > > 100644
> > > --- a/drivers/bluetooth/btnxpuart.c
> > > +++ b/drivers/bluetooth/btnxpuart.c
> > > @@ -1036,6 +1048,13 @@ static int nxp_setup(struct hci_dev *hdev)
> > >               err = nxp_download_firmware(hdev);
> > >               if (err < 0)
> > >                       return err;
> > > +     } else if (!serdev_device_get_cts(nxpdev->serdev)) {
> > > +             /* CTS is high and no bootloader signatures detected */
> > > +             bt_dev_dbg(hdev, "Controller not detected. Will check again in %d
> > msec",
> > > +                        NXP_SETUP_RETRY_TIME_MS);
> > > +             schedule_delayed_work(&nxpdev->setup_retry_work,
> > > +                                   msecs_to_jiffies(NXP_SETUP_RETRY_TIME_MS));
> > > +             return -ENODEV;
> > why not just
> > 
> > return -EPROBE_DEFER;
> > 
> > and remove everything else, no need for any kind of retry or delayed work if
> > the driver core takes care of it.
> > 
> Wouldn't returning -EPROBE_DEFER make more sense in driver probe context?

Yes, you are right. I was rushing to this suggestion without thinking at this
properly.

> Here, the driver probe registers an hci interface
> (hci_register_dev()), and returns success to kernel.
> 
> The hci_register_dev() queues hdev->power_on work at the end, which
> opens the hci dev, and ultimately calls this setup function.
> 
> In this patch, we are queueing the same work from the delayed
> setup_retry_work().
> 
> Returning -ENODEV (or -EPROBE_DEFER) would only affect hci_dev_open(),
> which is in power_on work context, and not driver probe context.
> 
> Perhaps, we should call it hci_retry_power_on() work or something
> similar?
> 
> Please let me know your thoughts on this.

Do you see any way to get rid of this complexity? Maybe having this
check done during probe, deferring there till we know the device is in a
suitable state (e.g. either you received the bootloader signature, you
know the device is powered or that the firmware is loaded and ready?).

In other words returning EPROBE_DEFER before calling hci_register_dev()?


With that said I still see an issue if the firmware is loaded by the
wifi driver and the BT driver start sending commands before the firware
load procedure is concluded and the firmware is ready. Not sure if you
have a way to wait for this "firmware ready" state.

Francesco
Neeraj Sanjay Kale Jan. 17, 2024, 11:11 a.m. UTC | #5
Hi Francesco,

> > > > diff --git a/drivers/bluetooth/btnxpuart.c
> > > > b/drivers/bluetooth/btnxpuart.c index 7f88b6f52f26..20a3a5bd5529
> > > > 100644
> > > > --- a/drivers/bluetooth/btnxpuart.c
> > > > +++ b/drivers/bluetooth/btnxpuart.c
> > > > @@ -1036,6 +1048,13 @@ static int nxp_setup(struct hci_dev *hdev)
> > > >               err = nxp_download_firmware(hdev);
> > > >               if (err < 0)
> > > >                       return err;
> > > > +     } else if (!serdev_device_get_cts(nxpdev->serdev)) {
> > > > +             /* CTS is high and no bootloader signatures detected */
> > > > +             bt_dev_dbg(hdev, "Controller not detected. Will
> > > > + check again in %d
> > > msec",
> > > > +                        NXP_SETUP_RETRY_TIME_MS);
> > > > +             schedule_delayed_work(&nxpdev->setup_retry_work,
> > > > +                                   msecs_to_jiffies(NXP_SETUP_RETRY_TIME_MS));
> > > > +             return -ENODEV;
> > > why not just
> > >
> > > return -EPROBE_DEFER;
> > >
> > > and remove everything else, no need for any kind of retry or delayed
> > > work if the driver core takes care of it.
> > >
> > Wouldn't returning -EPROBE_DEFER make more sense in driver probe
> context?
> 
> Yes, you are right. I was rushing to this suggestion without thinking at this
> properly.
> 
> > Here, the driver probe registers an hci interface
> > (hci_register_dev()), and returns success to kernel.
> >
> > The hci_register_dev() queues hdev->power_on work at the end, which
> > opens the hci dev, and ultimately calls this setup function.
> >
> > In this patch, we are queueing the same work from the delayed
> > setup_retry_work().
> >
> > Returning -ENODEV (or -EPROBE_DEFER) would only affect hci_dev_open(),
> > which is in power_on work context, and not driver probe context.
> >
> > Perhaps, we should call it hci_retry_power_on() work or something
> > similar?
> >
> > Please let me know your thoughts on this.
> 
> Do you see any way to get rid of this complexity? Maybe having this check
> done during probe, deferring there till we know the device is in a suitable
> state (e.g. either you received the bootloader signature, you know the device
> is powered or that the firmware is loaded and ready?).
> 
> In other words returning EPROBE_DEFER before calling hci_register_dev()?
> 
Unfortunately no. To read any bootloader signatures, or read CTS line, we need serdev device opened, which is done only after hci_register_dev() -> power_on work -> hci_dev_open()->serdev_open().

Re-scheduling power_on work would open the serdev, check for bootloader signatures and CTS line, and if no chip detected, it will return error, closing serdev device.
 
> With that said I still see an issue if the firmware is loaded by the wifi driver
> and the BT driver start sending commands before the firware load procedure
> is concluded and the firmware is ready. Not sure if you have a way to wait for
> this "firmware ready" state.
After FW is downloaded, no matter if it is BT-only FW or Combo FW downloaded by WiFi driver, the FW has a bunch of it's own initializations. Only when FW is ready to accept commands, it pulls the CTS line low.
So driver would still be re-scheduling power_on work at the moment FW is downloaded, but not ready to accept commands.

We could simplify this by only returning -ENODEV, without the delayed_work handling, but then user would have to remove and re-insert the btnxpuart driver after mwifiex driver is loaded successfully. This may not seam like a user-friendly approach.
Please let me know your thoughts on this.

Thanks,
Neeraj
Francesco Dolcini Jan. 17, 2024, 11:19 a.m. UTC | #6
On Wed, Jan 17, 2024 at 11:11:44AM +0000, Neeraj Sanjay Kale wrote:
> > > Here, the driver probe registers an hci interface
> > > (hci_register_dev()), and returns success to kernel.
> > >
> > > The hci_register_dev() queues hdev->power_on work at the end, which
> > > opens the hci dev, and ultimately calls this setup function.
> > >
> > > In this patch, we are queueing the same work from the delayed
> > > setup_retry_work().
> > >
> > > Returning -ENODEV (or -EPROBE_DEFER) would only affect hci_dev_open(),
> > > which is in power_on work context, and not driver probe context.
> > >
> > > Perhaps, we should call it hci_retry_power_on() work or something
> > > similar?
> > >
> > > Please let me know your thoughts on this.
> > 
> > Do you see any way to get rid of this complexity? Maybe having this check
> > done during probe, deferring there till we know the device is in a suitable
> > state (e.g. either you received the bootloader signature, you know the device
> > is powered or that the firmware is loaded and ready?).
> > 
> > In other words returning EPROBE_DEFER before calling hci_register_dev()?
> > 
> Unfortunately no. To read any bootloader signatures, or read CTS line,
> we need serdev device opened, which is done only after
> hci_register_dev() -> power_on work -> hci_dev_open()->serdev_open().

Ok, thanks, it makes sense and it's clear.

> We could simplify this by only returning -ENODEV, without the
> delayed_work handling, but then user would have to remove and
> re-insert the btnxpuart driver after mwifiex driver is loaded
> successfully.
>
> This may not seam like a user-friendly approach.

I think that something like that is pretty much useless, manual
reloading is already possible, and this can also be solved
"artificially" serializing loading the wifi driver and the bt one (the
latter is what we are currently doing to overcome this limitation).

Francesco
diff mbox series

Patch

diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
index 7f88b6f52f26..20a3a5bd5529 100644
--- a/drivers/bluetooth/btnxpuart.c
+++ b/drivers/bluetooth/btnxpuart.c
@@ -171,6 +171,7 @@  struct btnxpuart_dev {
 	bool timeout_changed;
 	bool baudrate_changed;
 	bool helper_downloaded;
+	struct delayed_work setup_retry_work;
 
 	struct ps_data psdata;
 	struct btnxpuart_data *nxp_data;
@@ -240,6 +241,8 @@  struct v3_start_ind {
 	u8 crc;
 } __packed;
 
+#define NXP_SETUP_RETRY_TIME_MS	5000
+
 /* UART register addresses of BT chip */
 #define CLKDIVADDR	0x7f00008f
 #define UARTDIVADDR	0x7f000090
@@ -1008,6 +1011,15 @@  static int nxp_check_boot_sign(struct btnxpuart_dev *nxpdev)
 					       msecs_to_jiffies(1000));
 }
 
+static void nxp_setup_retry_work(struct work_struct *work)
+{
+	struct btnxpuart_dev *nxpdev = container_of(work, struct btnxpuart_dev,
+						    setup_retry_work.work);
+	struct hci_dev *hdev = nxpdev->hdev;
+
+	queue_work(hdev->req_workqueue, &hdev->power_on);
+}
+
 static int nxp_set_ind_reset(struct hci_dev *hdev, void *data)
 {
 	static const u8 ir_hw_err[] = { HCI_EV_HARDWARE_ERROR,
@@ -1036,6 +1048,13 @@  static int nxp_setup(struct hci_dev *hdev)
 		err = nxp_download_firmware(hdev);
 		if (err < 0)
 			return err;
+	} else if (!serdev_device_get_cts(nxpdev->serdev)) {
+		/* CTS is high and no bootloader signatures detected */
+		bt_dev_dbg(hdev, "Controller not detected. Will check again in %d msec",
+			   NXP_SETUP_RETRY_TIME_MS);
+		schedule_delayed_work(&nxpdev->setup_retry_work,
+				      msecs_to_jiffies(NXP_SETUP_RETRY_TIME_MS));
+		return -ENODEV;
 	} else {
 		bt_dev_dbg(hdev, "FW already running.");
 		clear_bit(BTNXPUART_FW_DOWNLOADING, &nxpdev->tx_state);
@@ -1373,6 +1392,7 @@  static int nxp_serdev_probe(struct serdev_device *serdev)
 	}
 
 	ps_setup(hdev);
+	INIT_DELAYED_WORK(&nxpdev->setup_retry_work, nxp_setup_retry_work);
 
 	return 0;
 }
@@ -1391,6 +1411,7 @@  static void nxp_serdev_remove(struct serdev_device *serdev)
 		nxp_set_baudrate_cmd(hdev, NULL);
 	}
 
+	cancel_delayed_work_sync(&nxpdev->setup_retry_work);
 	ps_cancel_timer(nxpdev);
 	hci_unregister_dev(hdev);
 	hci_free_dev(hdev);