@@ -21,6 +21,46 @@ struct auxiliary_device_id pds_vfio_aux_id_table[] = {
{},
};
+static void
+pds_vfio_recovery_work(struct work_struct *work)
+{
+ struct pds_vfio_aux *vfio_aux =
+ container_of(work, struct pds_vfio_aux, work);
+ struct pds_vfio_pci_device *pds_vfio = vfio_aux->pds_vfio;
+ bool deferred_reset_needed = false;
+
+ /* Documentation states that the kernel migration driver must not
+ * generate asynchronous device state transitions outside of
+ * manipulation by the user or the VFIO_DEVICE_RESET ioctl.
+ *
+ * Since recovery is an asynchronous event received from the device,
+ * initiate a deferred reset. Only issue the deferred reset if a
+ * migration is in progress, which will cause the next step of the
+ * migration to fail. Also, if the device is in a state that will
+ * be set to VFIO_DEVICE_STATE_RUNNING on the next action (i.e. VM is
+ * shutdown and device is in VFIO_DEVICE_STATE_STOP) as that will clear
+ * the VFIO_DEVICE_STATE_ERROR when the VM starts back up.
+ */
+ mutex_lock(&pds_vfio->state_mutex);
+ if ((pds_vfio->state != VFIO_DEVICE_STATE_RUNNING &&
+ pds_vfio->state != VFIO_DEVICE_STATE_ERROR) ||
+ (pds_vfio->state == VFIO_DEVICE_STATE_RUNNING &&
+ pds_vfio_dirty_is_enabled(pds_vfio)))
+ deferred_reset_needed = true;
+ mutex_unlock(&pds_vfio->state_mutex);
+
+ /* On the next user initiated state transition, the device will
+ * transition to the VFIO_DEVICE_STATE_ERROR. At this point it's the user's
+ * responsibility to reset the device.
+ *
+ * If a VFIO_DEVICE_RESET is requested post recovery and before the next
+ * state transition, then the deferred reset state will be set to
+ * VFIO_DEVICE_STATE_RUNNING.
+ */
+ if (deferred_reset_needed)
+ pds_vfio_deferred_reset(pds_vfio, VFIO_DEVICE_STATE_ERROR);
+}
+
static void
pds_vfio_aux_notify_handler(struct pds_auxiliary_dev *padev,
union pds_core_notifyq_comp *event)
@@ -29,6 +69,23 @@ pds_vfio_aux_notify_handler(struct pds_auxiliary_dev *padev,
u16 ecode = le16_to_cpu(event->ecode);
dev_dbg(dev, "%s: event code %d\n", __func__, ecode);
+
+ /* We don't need to do anything for RESET state==0 as there is no notify
+ * or feedback mechanism available, and it is possible that we won't
+ * even see a state==0 event.
+ *
+ * Any requests from VFIO while state==0 will fail, which will return
+ * error and may cause migration to fail.
+ */
+ if (ecode == PDS_EVENT_RESET) {
+ dev_info(dev, "%s: PDS_EVENT_RESET event received, state==%d\n",
+ __func__, event->reset.state);
+ if (event->reset.state == 1) {
+ struct pds_vfio_aux *vfio_aux = auxiliary_get_drvdata(&padev->aux_dev);
+
+ schedule_work(&vfio_aux->work);
+ }
+ }
}
static int
@@ -88,6 +145,8 @@ pds_vfio_aux_probe(struct auxiliary_device *aux_dev,
goto err_out;
}
+ INIT_WORK(&vfio_aux->work, pds_vfio_recovery_work);
+
return 0;
err_out:
@@ -103,6 +162,8 @@ pds_vfio_aux_remove(struct auxiliary_device *aux_dev)
struct pds_vfio_aux *vfio_aux = auxiliary_get_drvdata(aux_dev);
struct pds_vfio_pci_device *pds_vfio = vfio_aux->pds_vfio;
+ cancel_work_sync(&vfio_aux->work);
+
if (pds_vfio) {
pds_vfio_unregister_client_cmd(pds_vfio);
vfio_aux->pds_vfio->vfio_aux = NULL;
@@ -17,6 +17,7 @@ struct pds_vfio_aux {
struct pds_auxiliary_dev *padev;
struct pds_auxiliary_drv padrv;
struct pds_vfio_pci_device *pds_vfio;
+ struct work_struct work;
};
struct auxiliary_driver *
@@ -26,10 +26,17 @@ pds_vfio_state_mutex_unlock(struct pds_vfio_pci_device *pds_vfio)
if (pds_vfio->deferred_reset) {
pds_vfio->deferred_reset = false;
if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) {
- pds_vfio->state = VFIO_DEVICE_STATE_RUNNING;
+ dev_dbg(&pds_vfio->pdev->dev, "Transitioning from VFIO_DEVICE_STATE_ERROR to %s\n",
+ pds_vfio_lm_state(pds_vfio->deferred_reset_state));
+ pds_vfio->state = pds_vfio->deferred_reset_state;
pds_vfio_put_restore_file(pds_vfio);
pds_vfio_put_save_file(pds_vfio);
+ } else if (pds_vfio->deferred_reset_state == VFIO_DEVICE_STATE_ERROR) {
+ dev_dbg(&pds_vfio->pdev->dev, "Transitioning from %s to VFIO_DEVICE_STATE_ERROR based on deferred_reset request\n",
+ pds_vfio_lm_state(pds_vfio->state));
+ pds_vfio->state = VFIO_DEVICE_STATE_ERROR;
}
+ pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
spin_unlock(&pds_vfio->reset_lock);
goto again;
}
@@ -42,6 +49,7 @@ pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio)
{
spin_lock(&pds_vfio->reset_lock);
pds_vfio->deferred_reset = true;
+ pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
if (!mutex_trylock(&pds_vfio->state_mutex)) {
spin_unlock(&pds_vfio->reset_lock);
return;
@@ -50,6 +58,18 @@ pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio)
pds_vfio_state_mutex_unlock(pds_vfio);
}
+void
+pds_vfio_deferred_reset(struct pds_vfio_pci_device *pds_vfio,
+ enum vfio_device_mig_state reset_state)
+{
+ dev_info(&pds_vfio->pdev->dev, "Requesting deferred_reset to state %s\n",
+ pds_vfio_lm_state(reset_state));
+ spin_lock(&pds_vfio->reset_lock);
+ pds_vfio->deferred_reset = true;
+ pds_vfio->deferred_reset_state = reset_state;
+ spin_unlock(&pds_vfio->reset_lock);
+}
+
static struct file *
pds_vfio_set_device_state(struct vfio_device *vdev,
enum vfio_device_mig_state new_state)
@@ -63,7 +83,13 @@ pds_vfio_set_device_state(struct vfio_device *vdev,
return ERR_PTR(-ENODEV);
mutex_lock(&pds_vfio->state_mutex);
- while (new_state != pds_vfio->state) {
+ /* only way to transition out of VFIO_DEVICE_STATE_ERROR is via
+ * VFIO_DEVICE_RESET, so prevent the state machine from running since
+ * vfio_mig_get_next_state() will throw a WARN_ON() when transitioning
+ * from VFIO_DEVICE_STATE_ERROR to any other state
+ */
+ while (pds_vfio->state != VFIO_DEVICE_STATE_ERROR &&
+ new_state != pds_vfio->state) {
enum vfio_device_mig_state next_state;
int err = vfio_mig_get_next_state(vdev, pds_vfio->state,
@@ -85,6 +111,9 @@ pds_vfio_set_device_state(struct vfio_device *vdev,
}
}
pds_vfio_state_mutex_unlock(pds_vfio);
+ /* still waiting on a deferred_reset */
+ if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR)
+ res = ERR_PTR(-EIO);
return res;
}
@@ -169,6 +198,7 @@ pds_vfio_open_device(struct vfio_device *vdev)
dev_dbg(&pds_vfio->pdev->dev, "%s: %s => VFIO_DEVICE_STATE_RUNNING\n",
__func__, pds_vfio_lm_state(pds_vfio->state));
pds_vfio->state = VFIO_DEVICE_STATE_RUNNING;
+ pds_vfio->deferred_reset_state = VFIO_DEVICE_STATE_RUNNING;
vfio_pci_core_finish_enable(&pds_vfio->vfio_coredev);
@@ -23,6 +23,7 @@ struct pds_vfio_pci_device {
enum vfio_device_mig_state state;
spinlock_t reset_lock; /* protect reset_done flow */
u8 deferred_reset;
+ enum vfio_device_mig_state deferred_reset_state;
int vf_id;
int pci_id;
@@ -34,5 +35,8 @@ struct pds_vfio_pci_device *
pds_vfio_pci_drvdata(struct pci_dev *pdev);
void
pds_vfio_reset(struct pds_vfio_pci_device *pds_vfio);
+void
+pds_vfio_deferred_reset(struct pds_vfio_pci_device *pds_vfio,
+ enum vfio_device_mig_state reset_state);
#endif /* _VFIO_DEV_H_ */