@@ -34,3 +34,63 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
set_bit(PDSC_S_FW_DEAD, &pdsc->state);
}
+
+static void pdsc_fw_down(struct pdsc *pdsc)
+{
+ mutex_lock(&pdsc->config_lock);
+
+ if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ dev_err(pdsc->dev, "%s: already happening\n", __func__);
+ mutex_unlock(&pdsc->config_lock);
+ return;
+ }
+
+ pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
+
+ mutex_unlock(&pdsc->config_lock);
+}
+
+static void pdsc_fw_up(struct pdsc *pdsc)
+{
+ int err;
+
+ mutex_lock(&pdsc->config_lock);
+
+ if (!test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ dev_err(pdsc->dev, "%s: fw not dead\n", __func__);
+ mutex_unlock(&pdsc->config_lock);
+ return;
+ }
+
+ err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY);
+ if (err)
+ goto err_out;
+
+ mutex_unlock(&pdsc->config_lock);
+
+ return;
+
+err_out:
+ pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
+ mutex_unlock(&pdsc->config_lock);
+}
+
+void pdsc_health_thread(struct work_struct *work)
+{
+ struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
+ bool healthy;
+
+ healthy = pdsc_is_fw_good(pdsc);
+ dev_dbg(pdsc->dev, "%s: health %d fw_status %#02x fw_heartbeat %d\n",
+ __func__, healthy, pdsc->fw_status, pdsc->last_hb);
+
+ if (test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+ if (healthy)
+ pdsc_fw_up(pdsc);
+ } else {
+ if (!healthy)
+ pdsc_fw_down(pdsc);
+ }
+
+ pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
+}
@@ -12,6 +12,8 @@
#include <linux/pds/pds_intr.h>
#define PDSC_DRV_DESCRIPTION "AMD/Pensando Core Driver"
+
+#define PDSC_WATCHDOG_SECS 5
#define PDSC_TEARDOWN_RECOVERY false
#define PDSC_TEARDOWN_REMOVING true
#define PDSC_SETUP_RECOVERY false
@@ -63,12 +65,17 @@ struct pdsc {
u8 fw_generation;
unsigned long last_fw_time;
u32 last_hb;
+ struct timer_list wdtimer;
+ unsigned int wdtimer_period;
+ struct work_struct health_work;
struct pdsc_devinfo dev_info;
struct pds_core_dev_identity dev_ident;
unsigned int nintrs;
struct pdsc_intr_info *intr_info; /* array of nintrs elements */
+ struct workqueue_struct *wq;
+
unsigned int devcmd_timeout;
struct mutex devcmd_lock; /* lock for dev_cmd operations */
struct mutex config_lock; /* lock for configuration operations */
@@ -81,6 +88,8 @@ struct pdsc {
u64 __iomem *kern_dbpage;
};
+void pdsc_queue_health_check(struct pdsc *pdsc);
+
#ifdef CONFIG_DEBUG_FS
void pdsc_debugfs_create(void);
void pdsc_debugfs_destroy(void);
@@ -111,5 +120,6 @@ int pdsc_dev_init(struct pdsc *pdsc);
int pdsc_setup(struct pdsc *pdsc, bool init);
void pdsc_teardown(struct pdsc *pdsc, bool removing);
+void pdsc_health_thread(struct work_struct *work);
#endif /* _PDSC_H_ */
@@ -177,6 +177,9 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd,
err = pdsc_devcmd_wait(pdsc, max_seconds);
memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
+ if (err == -ENXIO || err == -ETIMEDOUT)
+ pdsc_queue_health_check(pdsc);
+
return err;
}
@@ -27,6 +27,31 @@ static const struct pci_device_id pdsc_id_table[] = {
};
MODULE_DEVICE_TABLE(pci, pdsc_id_table);
+void pdsc_queue_health_check(struct pdsc *pdsc)
+{
+ unsigned long mask;
+
+ /* Don't do a check when in a transition state */
+ mask = BIT_ULL(PDSC_S_INITING_DRIVER) |
+ BIT_ULL(PDSC_S_STOPPING_DRIVER);
+ if (pdsc->state & mask)
+ return;
+
+ /* Queue a new health check if one isn't already queued */
+ queue_work(pdsc->wq, &pdsc->health_work);
+}
+
+static void pdsc_wdtimer_cb(struct timer_list *t)
+{
+ struct pdsc *pdsc = from_timer(pdsc, t, wdtimer);
+
+ dev_dbg(pdsc->dev, "%s: jiffies %ld\n", __func__, jiffies);
+ mod_timer(&pdsc->wdtimer,
+ round_jiffies(jiffies + pdsc->wdtimer_period));
+
+ pdsc_queue_health_check(pdsc);
+}
+
static void pdsc_unmap_bars(struct pdsc *pdsc)
{
struct pdsc_dev_bar *bars = pdsc->bars;
@@ -137,8 +162,11 @@ static int pdsc_init_vf(struct pdsc *vf)
return -1;
}
+#define PDSC_WQ_NAME_LEN 24
+
static int pdsc_init_pf(struct pdsc *pdsc)
{
+ char wq_name[PDSC_WQ_NAME_LEN];
struct devlink *dl;
int err;
@@ -155,6 +183,13 @@ static int pdsc_init_pf(struct pdsc *pdsc)
if (err)
goto err_out_release_regions;
+ /* General workqueue and timer, but don't start timer yet */
+ snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->id);
+ pdsc->wq = create_singlethread_workqueue(wq_name);
+ INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+ timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
+ pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
+
mutex_init(&pdsc->devcmd_lock);
mutex_init(&pdsc->config_lock);
@@ -172,10 +207,19 @@ static int pdsc_init_pf(struct pdsc *pdsc)
devl_register(dl);
devl_unlock(dl);
+ /* Lastly, start the health check timer */
+ mod_timer(&pdsc->wdtimer, round_jiffies(jiffies + pdsc->wdtimer_period));
+
return 0;
err_out_unmap_bars:
mutex_unlock(&pdsc->config_lock);
+ del_timer_sync(&pdsc->wdtimer);
+ if (pdsc->wq) {
+ flush_workqueue(pdsc->wq);
+ destroy_workqueue(pdsc->wq);
+ pdsc->wq = NULL;
+ }
mutex_destroy(&pdsc->config_lock);
mutex_destroy(&pdsc->devcmd_lock);
pci_free_irq_vectors(pdsc->pdev);
@@ -277,6 +321,13 @@ static void pdsc_remove(struct pci_dev *pdev)
devl_unlock(dl);
if (!pdev->is_virtfn) {
+ del_timer_sync(&pdsc->wdtimer);
+ if (pdsc->wq) {
+ flush_workqueue(pdsc->wq);
+ destroy_workqueue(pdsc->wq);
+ pdsc->wq = NULL;
+ }
+
mutex_lock(&pdsc->config_lock);
set_bit(PDSC_S_STOPPING_DRIVER, &pdsc->state);
Add in the periodic health check and the related workqueue, as well as the handlers for when a FW reset is seen. Signed-off-by: Shannon Nelson <shannon.nelson@amd.com> --- drivers/net/ethernet/amd/pds_core/core.c | 60 ++++++++++++++++++++++++ drivers/net/ethernet/amd/pds_core/core.h | 10 ++++ drivers/net/ethernet/amd/pds_core/dev.c | 3 ++ drivers/net/ethernet/amd/pds_core/main.c | 51 ++++++++++++++++++++ 4 files changed, 124 insertions(+)