@@ -41,3 +41,63 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
set_bit(PDSC_S_FW_DEAD, &pdsc->state);
}
+
+static void pdsc_fw_down(struct pdsc *pdsc)
+{
+ mutex_lock(&pdsc->config_lock);
+
+ if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ dev_err(pdsc->dev, "%s: already happening\n", __func__);
+ mutex_unlock(&pdsc->config_lock);
+ return;
+ }
+
+ pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
+
+ mutex_unlock(&pdsc->config_lock);
+}
+
+static void pdsc_fw_up(struct pdsc *pdsc)
+{
+ int err;
+
+ mutex_lock(&pdsc->config_lock);
+
+ if (!test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ dev_err(pdsc->dev, "%s: fw not dead\n", __func__);
+ mutex_unlock(&pdsc->config_lock);
+ return;
+ }
+
+ err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY);
+ if (err)
+ goto err_out;
+
+ mutex_unlock(&pdsc->config_lock);
+
+ return;
+
+err_out:
+ pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
+ mutex_unlock(&pdsc->config_lock);
+}
+
+void pdsc_health_thread(struct work_struct *work)
+{
+ struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
+ bool healthy;
+
+ healthy = pdsc_is_fw_good(pdsc);
+ dev_dbg(pdsc->dev, "%s: health %d fw_status %#02x fw_heartbeat %d\n",
+ __func__, healthy, pdsc->fw_status, pdsc->last_hb);
+
+ if (test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ if (healthy)
+ pdsc_fw_up(pdsc);
+ } else {
+ if (!healthy)
+ pdsc_fw_down(pdsc);
+ }
+
+ pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
+}
@@ -12,6 +12,8 @@
#include <linux/pds/pds_intr.h>
#define PDSC_DRV_DESCRIPTION "AMD/Pensando Core PF Driver"
+
+#define PDSC_WATCHDOG_SECS 5
#define PDSC_TEARDOWN_RECOVERY false
#define PDSC_TEARDOWN_REMOVING true
#define PDSC_SETUP_RECOVERY false
@@ -64,12 +66,17 @@ struct pdsc {
u8 fw_generation;
unsigned long last_fw_time;
u32 last_hb;
+ struct timer_list wdtimer;
+ unsigned int wdtimer_period;
+ struct work_struct health_work;
struct pdsc_devinfo dev_info;
struct pds_core_dev_identity dev_ident;
unsigned int nintrs;
struct pdsc_intr_info *intr_info; /* array of nintrs elements */
+ struct workqueue_struct *wq;
+
unsigned int devcmd_timeout;
struct mutex devcmd_lock; /* lock for dev_cmd operations */
struct mutex config_lock; /* lock for configuration operations */
@@ -82,6 +89,7 @@ struct pdsc {
u64 __iomem *kern_dbpage;
};
+void pdsc_queue_health_check(struct pdsc *pdsc);
void __iomem *pdsc_map_dbpage(struct pdsc *pdsc, int page_num);
struct pdsc *pdsc_dl_alloc(struct device *dev);
@@ -119,5 +127,6 @@ int pdsc_dev_init(struct pdsc *pdsc);
int pdsc_setup(struct pdsc *pdsc, bool init);
void pdsc_teardown(struct pdsc *pdsc, bool removing);
+void pdsc_health_thread(struct work_struct *work);
#endif /* _PDSC_H_ */
@@ -181,6 +181,9 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
err = pdsc_devcmd_wait(pdsc, max_seconds);
memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
+ if (err == -ENXIO || err == -ETIMEDOUT)
+ pdsc_queue_health_check(pdsc);
+
return err;
}
@@ -25,6 +25,31 @@ static const struct pci_device_id pdsc_id_table[] = {
};
MODULE_DEVICE_TABLE(pci, pdsc_id_table);
+void pdsc_queue_health_check(struct pdsc *pdsc)
+{
+ unsigned long mask;
+
+ /* Don't do a check when in a transition state */
+ mask = BIT_ULL(PDSC_S_INITING_DRIVER) |
+ BIT_ULL(PDSC_S_STOPPING_DRIVER);
+ if (pdsc->state & mask)
+ return;
+
+ /* Queue a new health check if one isn't already queued */
+ queue_work(pdsc->wq, &pdsc->health_work);
+}
+
+static void pdsc_wdtimer_cb(struct timer_list *t)
+{
+ struct pdsc *pdsc = from_timer(pdsc, t, wdtimer);
+
+ dev_dbg(pdsc->dev, "%s: jiffies %ld\n", __func__, jiffies);
+ mod_timer(&pdsc->wdtimer,
+ round_jiffies(jiffies + pdsc->wdtimer_period));
+
+ pdsc_queue_health_check(pdsc);
+}
+
static void pdsc_unmap_bars(struct pdsc *pdsc)
{
struct pdsc_dev_bar *bars = pdsc->bars;
@@ -135,9 +160,12 @@ static int pdsc_map_bars(struct pdsc *pdsc)
static DEFINE_IDA(pdsc_pf_ida);
+#define PDSC_WQ_NAME_LEN 24
+
static int pdsc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct device *dev = &pdev->dev;
+ char wq_name[PDSC_WQ_NAME_LEN];
struct pdsc *pdsc;
int err;
@@ -189,6 +217,13 @@ static int pdsc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_out_pci_disable_device;
+ /* General workqueue and timer, but don't start timer yet */
+ snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->id);
+ pdsc->wq = create_singlethread_workqueue(wq_name);
+ INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+ timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
+ pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
+
/* PDS device setup */
mutex_init(&pdsc->devcmd_lock);
mutex_init(&pdsc->config_lock);
@@ -209,6 +244,8 @@ static int pdsc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pdsc->fw_generation = PDS_CORE_FW_STS_F_GENERATION &
ioread8(&pdsc->info_regs->fw_status);
+ /* Lastly, start the health check timer */
+ mod_timer(&pdsc->wdtimer, round_jiffies(jiffies + pdsc->wdtimer_period));
clear_bit(PDSC_S_INITING_DRIVER, &pdsc->state);
return 0;
@@ -216,6 +253,12 @@ static int pdsc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err_out:
pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
err_out_unmap_bars:
+ del_timer_sync(&pdsc->wdtimer);
+ if (pdsc->wq) {
+ flush_workqueue(pdsc->wq);
+ destroy_workqueue(pdsc->wq);
+ pdsc->wq = NULL;
+ }
mutex_unlock(&pdsc->config_lock);
mutex_destroy(&pdsc->config_lock);
mutex_destroy(&pdsc->devcmd_lock);
@@ -248,6 +291,13 @@ static void pdsc_remove(struct pci_dev *pdev)
mutex_lock(&pdsc->config_lock);
set_bit(PDSC_S_STOPPING_DRIVER, &pdsc->state);
+ del_timer_sync(&pdsc->wdtimer);
+ if (pdsc->wq) {
+ flush_workqueue(pdsc->wq);
+ destroy_workqueue(pdsc->wq);
+ pdsc->wq = NULL;
+ }
+
/* Device teardown */
pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
pdsc_debugfs_del_dev(pdsc);
Add in the periodic health check and the related workqueue, as well as the handlers for when a FW reset is seen. Signed-off-by: Shannon Nelson <shannon.nelson@amd.com> --- drivers/net/ethernet/amd/pds_core/core.c | 60 ++++++++++++++++++++++++ drivers/net/ethernet/amd/pds_core/core.h | 9 ++++ drivers/net/ethernet/amd/pds_core/dev.c | 3 ++ drivers/net/ethernet/amd/pds_core/main.c | 50 ++++++++++++++++++++ 4 files changed, 122 insertions(+)