Message ID | 1581522508-31337-1-git-send-email-gubbaven@codeaurora.org (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Marcel Holtmann |
Headers | show |
Series | [v2] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump | expand |
Hi Venkata, I would suggest removing the memdump_timer entirely and making the ctrl_memdump_timeout into struct delayed_work. Instead of using mod_timer to get the callback ready, you would instead call `queue_delayed_work(qca->workqueue, &qca->ctrl_memdump_timeout, MEMDUMP_TIMEOUT_MS);` and instead of del_timer, you would instead `cancel_delayed_work(&qca->ctrl_memdump_timeout)` if mutex is held or `cancel_delayed_work_sync(&qca->ctrl_memdump_timeout)` if mutex is not held. Other than that, everything else looks good to me. On Wed, Feb 12, 2020 at 7:51 AM Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org> wrote: > > This patch will fix the below issues > 1.Fixed race conditions while accessing memory dump state flags. > 2.Updated with actual context of timer in hci_memdump_timeout() > 3.Updated injecting hardware error event if the dumps failed to receive. > 4.Once timeout is triggered, stopping the memory dump collections. > > Possible scenarios while collecting memory dump: > > Scenario 1: > > Memdump event from firmware > Some number of memdump events with seq # > Hw error event > Reset > > Scenario 2: > > Memdump event from firmware > Some number of memdump events with seq # > Timeout schedules hw_error_event if hw error event is not received already > hw_error_event clears the memdump activity > reset > > Scenario 3: > > hw_error_event sends memdump command to firmware and waits for completion > Some number of memdump events with seq # > hw error event > reset > > Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR") > Reported-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org> > Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org> > --- > drivers/bluetooth/hci_qca.c | 96 ++++++++++++++++++++++++++++++++------------- > 1 file changed, 69 insertions(+), 27 deletions(-) > > diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c > index eacc65b..80ee838 100644 > --- a/drivers/bluetooth/hci_qca.c > +++ b/drivers/bluetooth/hci_qca.c > @@ -29,6 +29,7 @@ > #include <linux/platform_device.h> > #include <linux/regulator/consumer.h> > #include <linux/serdev.h> > +#include <linux/mutex.h> > #include <asm/unaligned.h> > > #include <net/bluetooth/bluetooth.h> > @@ -69,7 +70,8 @@ enum qca_flags { > QCA_IBS_ENABLED, > QCA_DROP_VENDOR_EVENT, > QCA_SUSPENDING, > - QCA_MEMDUMP_COLLECTION > + QCA_MEMDUMP_COLLECTION, > + QCA_HW_ERROR_EVENT > }; > > > @@ -145,11 +147,13 @@ struct qca_data { > struct work_struct ws_rx_vote_off; > struct work_struct ws_tx_vote_off; > struct work_struct ctrl_memdump_evt; > + struct work_struct ctrl_memdump_timeout; > struct qca_memdump_data *qca_memdump; > unsigned long flags; > struct completion drop_ev_comp; > wait_queue_head_t suspend_wait_q; > enum qca_memdump_states memdump_state; > + struct mutex hci_memdump_lock; > > /* For debugging purpose */ > u64 ibs_sent_wacks; > @@ -524,21 +528,33 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t) > > static void hci_memdump_timeout(struct timer_list *t) > { > - struct qca_data *qca = from_timer(qca, t, tx_idle_timer); > - struct hci_uart *hu = qca->hu; > - struct qca_memdump_data *qca_memdump = qca->qca_memdump; > - char *memdump_buf = qca_memdump->memdump_buf_tail; > - > - bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout"); > - /* Inject hw error event to reset the device and driver. */ > - hci_reset_dev(hu->hdev); > - vfree(memdump_buf); > - kfree(qca_memdump); > - qca->memdump_state = QCA_MEMDUMP_TIMEOUT; > + struct qca_data *qca = from_timer(qca, t, memdump_timer); > + > + queue_work(qca->workqueue, &qca->ctrl_memdump_timeout); > del_timer(&qca->memdump_timer); > - cancel_work_sync(&qca->ctrl_memdump_evt); > } > > +static void qca_controller_memdump_timeout(struct work_struct *work) > +{ > + struct qca_data *qca = container_of(work, struct qca_data, > + ctrl_memdump_timeout); > + struct hci_uart *hu = qca->hu; > + > + mutex_lock(&qca->hci_memdump_lock); > + if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) { > + qca->memdump_state = QCA_MEMDUMP_TIMEOUT; > + if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) { > + /* Inject hw error event to reset the device > + * and driver. > + */ > + hci_reset_dev(hu->hdev); > + } > + } > + > + mutex_unlock(&qca->hci_memdump_lock); > +} > + > + > /* Initialize protocol */ > static int qca_open(struct hci_uart *hu) > { > @@ -558,6 +574,7 @@ static int qca_open(struct hci_uart *hu) > skb_queue_head_init(&qca->tx_wait_q); > skb_queue_head_init(&qca->rx_memdump_q); > spin_lock_init(&qca->hci_ibs_lock); > + mutex_init(&qca->hci_memdump_lock); > qca->workqueue = alloc_ordered_workqueue("qca_wq", 0); > if (!qca->workqueue) { > BT_ERR("QCA Workqueue not initialized properly"); > @@ -570,6 +587,7 @@ static int qca_open(struct hci_uart *hu) > INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off); > INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off); > INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump); > + INIT_WORK(&qca->ctrl_memdump_timeout, qca_controller_memdump_timeout); > init_waitqueue_head(&qca->suspend_wait_q); > > qca->hu = hu; > @@ -963,11 +981,20 @@ static void qca_controller_memdump(struct work_struct *work) > > while ((skb = skb_dequeue(&qca->rx_memdump_q))) { > > + mutex_lock(&qca->hci_memdump_lock); > + /* Skip processing the received packets if timeout detected. */ > + if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) { > + mutex_unlock(&qca->hci_memdump_lock); > + return; > + } > + > if (!qca_memdump) { > qca_memdump = kzalloc(sizeof(struct qca_memdump_data), > GFP_ATOMIC); > - if (!qca_memdump) > + if (!qca_memdump) { > + mutex_unlock(&qca->hci_memdump_lock); > return; > + } > > qca->qca_memdump = qca_memdump; > } > @@ -992,6 +1019,7 @@ static void qca_controller_memdump(struct work_struct *work) > if (!(dump_size)) { > bt_dev_err(hu->hdev, "Rx invalid memdump size"); > kfree_skb(skb); > + mutex_unlock(&qca->hci_memdump_lock); > return; > } > > @@ -1016,6 +1044,7 @@ static void qca_controller_memdump(struct work_struct *work) > kfree(qca_memdump); > kfree_skb(skb); > qca->qca_memdump = NULL; > + mutex_unlock(&qca->hci_memdump_lock); > return; > } > > @@ -1050,12 +1079,16 @@ static void qca_controller_memdump(struct work_struct *work) > kfree(qca->qca_memdump); > qca->qca_memdump = NULL; > qca->memdump_state = QCA_MEMDUMP_COLLECTED; > + clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); > } > + > + mutex_unlock(&qca->hci_memdump_lock); > } > > } > > -int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb) > +static int qca_controller_memdump_event(struct hci_dev *hdev, > + struct sk_buff *skb) > { > struct hci_uart *hu = hci_get_drvdata(hdev); > struct qca_data *qca = hu->priv; > @@ -1406,30 +1439,21 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev) > { > struct hci_uart *hu = hci_get_drvdata(hdev); > struct qca_data *qca = hu->priv; > - struct qca_memdump_data *qca_memdump = qca->qca_memdump; > - char *memdump_buf = NULL; > > wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION, > TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS); > > clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); > - if (qca->memdump_state == QCA_MEMDUMP_IDLE) { > - bt_dev_err(hu->hdev, "Clearing the buffers due to timeout"); > - if (qca_memdump) > - memdump_buf = qca_memdump->memdump_buf_tail; > - vfree(memdump_buf); > - kfree(qca_memdump); > - qca->memdump_state = QCA_MEMDUMP_TIMEOUT; > - del_timer(&qca->memdump_timer); > - cancel_work_sync(&qca->ctrl_memdump_evt); > - } > } > > static void qca_hw_error(struct hci_dev *hdev, u8 code) > { > struct hci_uart *hu = hci_get_drvdata(hdev); > struct qca_data *qca = hu->priv; > + struct qca_memdump_data *qca_memdump = qca->qca_memdump; > + char *memdump_buf = NULL; > > + set_bit(QCA_HW_ERROR_EVENT, &qca->flags); > bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state); > > if (qca->memdump_state == QCA_MEMDUMP_IDLE) { > @@ -1449,6 +1473,24 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code) > bt_dev_info(hdev, "waiting for dump to complete"); > qca_wait_for_dump_collection(hdev); > } > + > + if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) { > + bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout"); > + mutex_lock(&qca->hci_memdump_lock); > + if (qca_memdump) > + memdump_buf = qca_memdump->memdump_buf_head; > + vfree(memdump_buf); > + kfree(qca_memdump); > + qca->qca_memdump = NULL; > + qca->memdump_state = QCA_MEMDUMP_TIMEOUT; > + del_timer(&qca->memdump_timer); > + skb_queue_purge(&qca->rx_memdump_q); > + mutex_unlock(&qca->hci_memdump_lock); > + cancel_work_sync(&qca->ctrl_memdump_timeout); > + cancel_work_sync(&qca->ctrl_memdump_evt); > + } > + > + clear_bit(QCA_HW_ERROR_EVENT, &qca->flags); > } > > static void qca_cmd_timeout(struct hci_dev *hdev) > -- > QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member > of Code Aurora Forum, hosted by The Linux Foundation > Thanks Abhishek
Hi Abhishek, Sure I will update and post next patch set. Regards, Lakshmi Narayana. On 2020-02-12 22:27, Abhishek Pandit-Subedi wrote: > Hi Venkata, > > I would suggest removing the memdump_timer entirely and making the > ctrl_memdump_timeout into struct delayed_work. > > Instead of using mod_timer to get the callback ready, you would > instead call `queue_delayed_work(qca->workqueue, > &qca->ctrl_memdump_timeout, MEMDUMP_TIMEOUT_MS);` and instead of > del_timer, you would instead > `cancel_delayed_work(&qca->ctrl_memdump_timeout)` if mutex is held or > `cancel_delayed_work_sync(&qca->ctrl_memdump_timeout)` if mutex is not > held. > > Other than that, everything else looks good to me. > > On Wed, Feb 12, 2020 at 7:51 AM Venkata Lakshmi Narayana Gubba > <gubbaven@codeaurora.org> wrote: >> >> This patch will fix the below issues >> 1.Fixed race conditions while accessing memory dump state flags. >> 2.Updated with actual context of timer in hci_memdump_timeout() >> 3.Updated injecting hardware error event if the dumps failed to >> receive. >> 4.Once timeout is triggered, stopping the memory dump collections. >> >> Possible scenarios while collecting memory dump: >> >> Scenario 1: >> >> Memdump event from firmware >> Some number of memdump events with seq # >> Hw error event >> Reset >> >> Scenario 2: >> >> Memdump event from firmware >> Some number of memdump events with seq # >> Timeout schedules hw_error_event if hw error event is not received >> already >> hw_error_event clears the memdump activity >> reset >> >> Scenario 3: >> >> hw_error_event sends memdump command to firmware and waits for >> completion >> Some number of memdump events with seq # >> hw error event >> reset >> >> Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory >> dump during SSR") >> Reported-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org> >> Signed-off-by: Venkata Lakshmi Narayana Gubba >> <gubbaven@codeaurora.org> >> --- >> drivers/bluetooth/hci_qca.c | 96 >> ++++++++++++++++++++++++++++++++------------- >> 1 file changed, 69 insertions(+), 27 deletions(-) >> >> diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c >> index eacc65b..80ee838 100644 >> --- a/drivers/bluetooth/hci_qca.c >> +++ b/drivers/bluetooth/hci_qca.c >> @@ -29,6 +29,7 @@ >> #include <linux/platform_device.h> >> #include <linux/regulator/consumer.h> >> #include <linux/serdev.h> >> +#include <linux/mutex.h> >> #include <asm/unaligned.h> >> >> #include <net/bluetooth/bluetooth.h> >> @@ -69,7 +70,8 @@ enum qca_flags { >> QCA_IBS_ENABLED, >> QCA_DROP_VENDOR_EVENT, >> QCA_SUSPENDING, >> - QCA_MEMDUMP_COLLECTION >> + QCA_MEMDUMP_COLLECTION, >> + QCA_HW_ERROR_EVENT >> }; >> >> >> @@ -145,11 +147,13 @@ struct qca_data { >> struct work_struct ws_rx_vote_off; >> struct work_struct ws_tx_vote_off; >> struct work_struct ctrl_memdump_evt; >> + struct work_struct ctrl_memdump_timeout; >> struct qca_memdump_data *qca_memdump; >> unsigned long flags; >> struct completion drop_ev_comp; >> wait_queue_head_t suspend_wait_q; >> enum qca_memdump_states memdump_state; >> + struct mutex hci_memdump_lock; >> >> /* For debugging purpose */ >> u64 ibs_sent_wacks; >> @@ -524,21 +528,33 @@ static void hci_ibs_wake_retrans_timeout(struct >> timer_list *t) >> >> static void hci_memdump_timeout(struct timer_list *t) >> { >> - struct qca_data *qca = from_timer(qca, t, tx_idle_timer); >> - struct hci_uart *hu = qca->hu; >> - struct qca_memdump_data *qca_memdump = qca->qca_memdump; >> - char *memdump_buf = qca_memdump->memdump_buf_tail; >> - >> - bt_dev_err(hu->hdev, "clearing allocated memory due to memdump >> timeout"); >> - /* Inject hw error event to reset the device and driver. */ >> - hci_reset_dev(hu->hdev); >> - vfree(memdump_buf); >> - kfree(qca_memdump); >> - qca->memdump_state = QCA_MEMDUMP_TIMEOUT; >> + struct qca_data *qca = from_timer(qca, t, memdump_timer); >> + >> + queue_work(qca->workqueue, &qca->ctrl_memdump_timeout); >> del_timer(&qca->memdump_timer); >> - cancel_work_sync(&qca->ctrl_memdump_evt); >> } >> >> +static void qca_controller_memdump_timeout(struct work_struct *work) >> +{ >> + struct qca_data *qca = container_of(work, struct qca_data, >> + ctrl_memdump_timeout); >> + struct hci_uart *hu = qca->hu; >> + >> + mutex_lock(&qca->hci_memdump_lock); >> + if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) { >> + qca->memdump_state = QCA_MEMDUMP_TIMEOUT; >> + if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) { >> + /* Inject hw error event to reset the device >> + * and driver. >> + */ >> + hci_reset_dev(hu->hdev); >> + } >> + } >> + >> + mutex_unlock(&qca->hci_memdump_lock); >> +} >> + >> + >> /* Initialize protocol */ >> static int qca_open(struct hci_uart *hu) >> { >> @@ -558,6 +574,7 @@ static int qca_open(struct hci_uart *hu) >> skb_queue_head_init(&qca->tx_wait_q); >> skb_queue_head_init(&qca->rx_memdump_q); >> spin_lock_init(&qca->hci_ibs_lock); >> + mutex_init(&qca->hci_memdump_lock); >> qca->workqueue = alloc_ordered_workqueue("qca_wq", 0); >> if (!qca->workqueue) { >> BT_ERR("QCA Workqueue not initialized properly"); >> @@ -570,6 +587,7 @@ static int qca_open(struct hci_uart *hu) >> INIT_WORK(&qca->ws_rx_vote_off, >> qca_wq_serial_rx_clock_vote_off); >> INIT_WORK(&qca->ws_tx_vote_off, >> qca_wq_serial_tx_clock_vote_off); >> INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump); >> + INIT_WORK(&qca->ctrl_memdump_timeout, >> qca_controller_memdump_timeout); >> init_waitqueue_head(&qca->suspend_wait_q); >> >> qca->hu = hu; >> @@ -963,11 +981,20 @@ static void qca_controller_memdump(struct >> work_struct *work) >> >> while ((skb = skb_dequeue(&qca->rx_memdump_q))) { >> >> + mutex_lock(&qca->hci_memdump_lock); >> + /* Skip processing the received packets if timeout >> detected. */ >> + if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) { >> + mutex_unlock(&qca->hci_memdump_lock); >> + return; >> + } >> + >> if (!qca_memdump) { >> qca_memdump = kzalloc(sizeof(struct >> qca_memdump_data), >> GFP_ATOMIC); >> - if (!qca_memdump) >> + if (!qca_memdump) { >> + mutex_unlock(&qca->hci_memdump_lock); >> return; >> + } >> >> qca->qca_memdump = qca_memdump; >> } >> @@ -992,6 +1019,7 @@ static void qca_controller_memdump(struct >> work_struct *work) >> if (!(dump_size)) { >> bt_dev_err(hu->hdev, "Rx invalid >> memdump size"); >> kfree_skb(skb); >> + mutex_unlock(&qca->hci_memdump_lock); >> return; >> } >> >> @@ -1016,6 +1044,7 @@ static void qca_controller_memdump(struct >> work_struct *work) >> kfree(qca_memdump); >> kfree_skb(skb); >> qca->qca_memdump = NULL; >> + mutex_unlock(&qca->hci_memdump_lock); >> return; >> } >> >> @@ -1050,12 +1079,16 @@ static void qca_controller_memdump(struct >> work_struct *work) >> kfree(qca->qca_memdump); >> qca->qca_memdump = NULL; >> qca->memdump_state = QCA_MEMDUMP_COLLECTED; >> + clear_bit(QCA_MEMDUMP_COLLECTION, >> &qca->flags); >> } >> + >> + mutex_unlock(&qca->hci_memdump_lock); >> } >> >> } >> >> -int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff >> *skb) >> +static int qca_controller_memdump_event(struct hci_dev *hdev, >> + struct sk_buff *skb) >> { >> struct hci_uart *hu = hci_get_drvdata(hdev); >> struct qca_data *qca = hu->priv; >> @@ -1406,30 +1439,21 @@ static void >> qca_wait_for_dump_collection(struct hci_dev *hdev) >> { >> struct hci_uart *hu = hci_get_drvdata(hdev); >> struct qca_data *qca = hu->priv; >> - struct qca_memdump_data *qca_memdump = qca->qca_memdump; >> - char *memdump_buf = NULL; >> >> wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION, >> TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS); >> >> clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); >> - if (qca->memdump_state == QCA_MEMDUMP_IDLE) { >> - bt_dev_err(hu->hdev, "Clearing the buffers due to >> timeout"); >> - if (qca_memdump) >> - memdump_buf = qca_memdump->memdump_buf_tail; >> - vfree(memdump_buf); >> - kfree(qca_memdump); >> - qca->memdump_state = QCA_MEMDUMP_TIMEOUT; >> - del_timer(&qca->memdump_timer); >> - cancel_work_sync(&qca->ctrl_memdump_evt); >> - } >> } >> >> static void qca_hw_error(struct hci_dev *hdev, u8 code) >> { >> struct hci_uart *hu = hci_get_drvdata(hdev); >> struct qca_data *qca = hu->priv; >> + struct qca_memdump_data *qca_memdump = qca->qca_memdump; >> + char *memdump_buf = NULL; >> >> + set_bit(QCA_HW_ERROR_EVENT, &qca->flags); >> bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state); >> >> if (qca->memdump_state == QCA_MEMDUMP_IDLE) { >> @@ -1449,6 +1473,24 @@ static void qca_hw_error(struct hci_dev *hdev, >> u8 code) >> bt_dev_info(hdev, "waiting for dump to complete"); >> qca_wait_for_dump_collection(hdev); >> } >> + >> + if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) { >> + bt_dev_err(hu->hdev, "clearing allocated memory due to >> memdump timeout"); >> + mutex_lock(&qca->hci_memdump_lock); >> + if (qca_memdump) >> + memdump_buf = qca_memdump->memdump_buf_head; >> + vfree(memdump_buf); >> + kfree(qca_memdump); >> + qca->qca_memdump = NULL; >> + qca->memdump_state = QCA_MEMDUMP_TIMEOUT; >> + del_timer(&qca->memdump_timer); >> + skb_queue_purge(&qca->rx_memdump_q); >> + mutex_unlock(&qca->hci_memdump_lock); >> + cancel_work_sync(&qca->ctrl_memdump_timeout); >> + cancel_work_sync(&qca->ctrl_memdump_evt); >> + } >> + >> + clear_bit(QCA_HW_ERROR_EVENT, &qca->flags); >> } >> >> static void qca_cmd_timeout(struct hci_dev *hdev) >> -- >> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a >> member >> of Code Aurora Forum, hosted by The Linux Foundation >> > > Thanks > Abhishek
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index eacc65b..80ee838 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -29,6 +29,7 @@ #include <linux/platform_device.h> #include <linux/regulator/consumer.h> #include <linux/serdev.h> +#include <linux/mutex.h> #include <asm/unaligned.h> #include <net/bluetooth/bluetooth.h> @@ -69,7 +70,8 @@ enum qca_flags { QCA_IBS_ENABLED, QCA_DROP_VENDOR_EVENT, QCA_SUSPENDING, - QCA_MEMDUMP_COLLECTION + QCA_MEMDUMP_COLLECTION, + QCA_HW_ERROR_EVENT }; @@ -145,11 +147,13 @@ struct qca_data { struct work_struct ws_rx_vote_off; struct work_struct ws_tx_vote_off; struct work_struct ctrl_memdump_evt; + struct work_struct ctrl_memdump_timeout; struct qca_memdump_data *qca_memdump; unsigned long flags; struct completion drop_ev_comp; wait_queue_head_t suspend_wait_q; enum qca_memdump_states memdump_state; + struct mutex hci_memdump_lock; /* For debugging purpose */ u64 ibs_sent_wacks; @@ -524,21 +528,33 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t) static void hci_memdump_timeout(struct timer_list *t) { - struct qca_data *qca = from_timer(qca, t, tx_idle_timer); - struct hci_uart *hu = qca->hu; - struct qca_memdump_data *qca_memdump = qca->qca_memdump; - char *memdump_buf = qca_memdump->memdump_buf_tail; - - bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout"); - /* Inject hw error event to reset the device and driver. */ - hci_reset_dev(hu->hdev); - vfree(memdump_buf); - kfree(qca_memdump); - qca->memdump_state = QCA_MEMDUMP_TIMEOUT; + struct qca_data *qca = from_timer(qca, t, memdump_timer); + + queue_work(qca->workqueue, &qca->ctrl_memdump_timeout); del_timer(&qca->memdump_timer); - cancel_work_sync(&qca->ctrl_memdump_evt); } +static void qca_controller_memdump_timeout(struct work_struct *work) +{ + struct qca_data *qca = container_of(work, struct qca_data, + ctrl_memdump_timeout); + struct hci_uart *hu = qca->hu; + + mutex_lock(&qca->hci_memdump_lock); + if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) { + qca->memdump_state = QCA_MEMDUMP_TIMEOUT; + if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) { + /* Inject hw error event to reset the device + * and driver. + */ + hci_reset_dev(hu->hdev); + } + } + + mutex_unlock(&qca->hci_memdump_lock); +} + + /* Initialize protocol */ static int qca_open(struct hci_uart *hu) { @@ -558,6 +574,7 @@ static int qca_open(struct hci_uart *hu) skb_queue_head_init(&qca->tx_wait_q); skb_queue_head_init(&qca->rx_memdump_q); spin_lock_init(&qca->hci_ibs_lock); + mutex_init(&qca->hci_memdump_lock); qca->workqueue = alloc_ordered_workqueue("qca_wq", 0); if (!qca->workqueue) { BT_ERR("QCA Workqueue not initialized properly"); @@ -570,6 +587,7 @@ static int qca_open(struct hci_uart *hu) INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off); INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off); INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump); + INIT_WORK(&qca->ctrl_memdump_timeout, qca_controller_memdump_timeout); init_waitqueue_head(&qca->suspend_wait_q); qca->hu = hu; @@ -963,11 +981,20 @@ static void qca_controller_memdump(struct work_struct *work) while ((skb = skb_dequeue(&qca->rx_memdump_q))) { + mutex_lock(&qca->hci_memdump_lock); + /* Skip processing the received packets if timeout detected. */ + if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) { + mutex_unlock(&qca->hci_memdump_lock); + return; + } + if (!qca_memdump) { qca_memdump = kzalloc(sizeof(struct qca_memdump_data), GFP_ATOMIC); - if (!qca_memdump) + if (!qca_memdump) { + mutex_unlock(&qca->hci_memdump_lock); return; + } qca->qca_memdump = qca_memdump; } @@ -992,6 +1019,7 @@ static void qca_controller_memdump(struct work_struct *work) if (!(dump_size)) { bt_dev_err(hu->hdev, "Rx invalid memdump size"); kfree_skb(skb); + mutex_unlock(&qca->hci_memdump_lock); return; } @@ -1016,6 +1044,7 @@ static void qca_controller_memdump(struct work_struct *work) kfree(qca_memdump); kfree_skb(skb); qca->qca_memdump = NULL; + mutex_unlock(&qca->hci_memdump_lock); return; } @@ -1050,12 +1079,16 @@ static void qca_controller_memdump(struct work_struct *work) kfree(qca->qca_memdump); qca->qca_memdump = NULL; qca->memdump_state = QCA_MEMDUMP_COLLECTED; + clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); } + + mutex_unlock(&qca->hci_memdump_lock); } } -int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb) +static int qca_controller_memdump_event(struct hci_dev *hdev, + struct sk_buff *skb) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; @@ -1406,30 +1439,21 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; - struct qca_memdump_data *qca_memdump = qca->qca_memdump; - char *memdump_buf = NULL; wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION, TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS); clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); - if (qca->memdump_state == QCA_MEMDUMP_IDLE) { - bt_dev_err(hu->hdev, "Clearing the buffers due to timeout"); - if (qca_memdump) - memdump_buf = qca_memdump->memdump_buf_tail; - vfree(memdump_buf); - kfree(qca_memdump); - qca->memdump_state = QCA_MEMDUMP_TIMEOUT; - del_timer(&qca->memdump_timer); - cancel_work_sync(&qca->ctrl_memdump_evt); - } } static void qca_hw_error(struct hci_dev *hdev, u8 code) { struct hci_uart *hu = hci_get_drvdata(hdev); struct qca_data *qca = hu->priv; + struct qca_memdump_data *qca_memdump = qca->qca_memdump; + char *memdump_buf = NULL; + set_bit(QCA_HW_ERROR_EVENT, &qca->flags); bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state); if (qca->memdump_state == QCA_MEMDUMP_IDLE) { @@ -1449,6 +1473,24 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code) bt_dev_info(hdev, "waiting for dump to complete"); qca_wait_for_dump_collection(hdev); } + + if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) { + bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout"); + mutex_lock(&qca->hci_memdump_lock); + if (qca_memdump) + memdump_buf = qca_memdump->memdump_buf_head; + vfree(memdump_buf); + kfree(qca_memdump); + qca->qca_memdump = NULL; + qca->memdump_state = QCA_MEMDUMP_TIMEOUT; + del_timer(&qca->memdump_timer); + skb_queue_purge(&qca->rx_memdump_q); + mutex_unlock(&qca->hci_memdump_lock); + cancel_work_sync(&qca->ctrl_memdump_timeout); + cancel_work_sync(&qca->ctrl_memdump_evt); + } + + clear_bit(QCA_HW_ERROR_EVENT, &qca->flags); } static void qca_cmd_timeout(struct hci_dev *hdev)
This patch will fix the below issues 1.Fixed race conditions while accessing memory dump state flags. 2.Updated with actual context of timer in hci_memdump_timeout() 3.Updated injecting hardware error event if the dumps failed to receive. 4.Once timeout is triggered, stopping the memory dump collections. Possible scenarios while collecting memory dump: Scenario 1: Memdump event from firmware Some number of memdump events with seq # Hw error event Reset Scenario 2: Memdump event from firmware Some number of memdump events with seq # Timeout schedules hw_error_event if hw error event is not received already hw_error_event clears the memdump activity reset Scenario 3: hw_error_event sends memdump command to firmware and waits for completion Some number of memdump events with seq # hw error event reset Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR") Reported-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org> Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org> --- drivers/bluetooth/hci_qca.c | 96 ++++++++++++++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 27 deletions(-)