diff mbox series

[1/1] switchtec: Fix stdev_release crash after suprise device loss.

Message ID 20231110113512.83254-2-dns@arista.com (mailing list archive)
State Superseded
Headers show
Series switchtec: Fix stdev_release crash after suprise device loss. | expand

Commit Message

Daniel Stodden Nov. 10, 2023, 11:35 a.m. UTC
A pci device hot removal may occur while stdev->cdev is held open. The
call to stdev_release is then delivered during close or exit, at a
point way past switchtec_pci_remove. Otherwise the last ref would
vanish with the trailing put_device, just before return.

At that later point in time, the device layer has alreay removed
stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a
counted one. Therefore, in dma mode, the iowrite32 in stdev_release
will cause a fatal page fault, and the subsequent dma_free_coherent,
if reached, would pass a stale &stdev->pdev->dev pointer.

Fixed by moving mrpc dma shutdown into switchtec_pci_remove, after
stdev_kill. Counting the stdev->pdev ref is now optional, but may
prevent future accidents.

Signed-off-by: Daniel Stodden <dns@arista.com>
---
 drivers/pci/switch/switchtec.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index e69cac84b605..002d0205d263 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -1247,17 +1247,17 @@  static void enable_dma_mrpc(struct switchtec_dev *stdev)
 	iowrite32(SWITCHTEC_DMA_MRPC_EN, &stdev->mmio_mrpc->dma_en);
 }
 
+static void disable_dma_mrpc(struct switchtec_dev *stdev)
+{
+	iowrite32(0, &stdev->mmio_mrpc->dma_en);
+	flush_wc_buf(stdev);
+	writeq(0, &stdev->mmio_mrpc->dma_addr);
+}
+
 static void stdev_release(struct device *dev)
 {
 	struct switchtec_dev *stdev = to_stdev(dev);
 
-	if (stdev->dma_mrpc) {
-		iowrite32(0, &stdev->mmio_mrpc->dma_en);
-		flush_wc_buf(stdev);
-		writeq(0, &stdev->mmio_mrpc->dma_addr);
-		dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc),
-				stdev->dma_mrpc, stdev->dma_mrpc_dma_addr);
-	}
 	kfree(stdev);
 }
 
@@ -1301,7 +1301,7 @@  static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
 		return ERR_PTR(-ENOMEM);
 
 	stdev->alive = true;
-	stdev->pdev = pdev;
+	stdev->pdev = pci_dev_get(pdev);
 	INIT_LIST_HEAD(&stdev->mrpc_queue);
 	mutex_init(&stdev->mrpc_mutex);
 	stdev->mrpc_busy = 0;
@@ -1587,6 +1587,16 @@  static int switchtec_init_pci(struct switchtec_dev *stdev,
 	return 0;
 }
 
+static void switchtec_exit_pci(struct switchtec_dev *stdev)
+{
+	if (stdev->dma_mrpc) {
+		disable_dma_mrpc(stdev);
+		dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc),
+				  stdev->dma_mrpc, stdev->dma_mrpc_dma_addr);
+		stdev->dma_mrpc = NULL;
+	}
+}
+
 static int switchtec_pci_probe(struct pci_dev *pdev,
 			       const struct pci_device_id *id)
 {
@@ -1646,6 +1656,9 @@  static void switchtec_pci_remove(struct pci_dev *pdev)
 	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
 	dev_info(&stdev->dev, "unregistered.\n");
 	stdev_kill(stdev);
+	switchtec_exit_pci(stdev);
+	pci_dev_put(stdev->pdev);
+	stdev->pdev = NULL;
 	put_device(&stdev->dev);
 }