Message ID | 20231110201917.89016-2-dns@arista.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | switchtec: Fix stdev_release crash after suprise device loss. | expand |
On 2023-11-10 13:19, Daniel Stodden wrote: > A pci device hot removal may occur while stdev->cdev is held open. The > call to stdev_release is then delivered during close or exit, at a > point way past switchtec_pci_remove. Otherwise the last ref would > vanish with the trailing put_device, just before return. > > At that later point in time, the device layer has alreay removed > stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a > counted one. Therefore, in dma mode, the iowrite32 in stdev_release > will cause a fatal page fault, and the subsequent dma_free_coherent, > if reached, would pass a stale &stdev->pdev->dev pointer. > > Fixed by moving mrpc dma shutdown into switchtec_pci_remove, after > stdev_kill. Counting the stdev->pdev ref is now optional, but may > prevent future accidents. > > Signed-off-by: Daniel Stodden <dns@arista.com> Nice catch, thanks! The solution looks good to me, though I might quibble slightly about style: I'm not sure why we need two helper functions (disable_dma_mrpc() and switchtec_exit_pci()) that are only called in one place. I'd probably just open code both of them. Other than that: Reviewed-by: Logan Gunthorpe <logang@deltatee.com> Logan > --- > drivers/pci/switch/switchtec.c | 29 +++++++++++++++++++++-------- > 1 file changed, 21 insertions(+), 8 deletions(-) > > diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c > index e69cac84b605..002d0205d263 100644 > --- a/drivers/pci/switch/switchtec.c > +++ b/drivers/pci/switch/switchtec.c > @@ -1247,17 +1247,17 @@ static void enable_dma_mrpc(struct switchtec_dev *stdev) > iowrite32(SWITCHTEC_DMA_MRPC_EN, &stdev->mmio_mrpc->dma_en); > } > > +static void disable_dma_mrpc(struct switchtec_dev *stdev) > +{ > + iowrite32(0, &stdev->mmio_mrpc->dma_en); > + flush_wc_buf(stdev); > + writeq(0, &stdev->mmio_mrpc->dma_addr); > +} > + > static void stdev_release(struct device *dev) > { > struct switchtec_dev *stdev = to_stdev(dev); > > - if (stdev->dma_mrpc) { > - iowrite32(0, &stdev->mmio_mrpc->dma_en); > - flush_wc_buf(stdev); > - writeq(0, &stdev->mmio_mrpc->dma_addr); > - dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), > - stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); > - } > kfree(stdev); > } > > @@ -1301,7 +1301,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev) > return ERR_PTR(-ENOMEM); > > stdev->alive = true; > - stdev->pdev = pdev; > + stdev->pdev = pci_dev_get(pdev); > INIT_LIST_HEAD(&stdev->mrpc_queue); > mutex_init(&stdev->mrpc_mutex); > stdev->mrpc_busy = 0; > @@ -1587,6 +1587,16 @@ static int switchtec_init_pci(struct switchtec_dev *stdev, > return 0; > } > > +static void switchtec_exit_pci(struct switchtec_dev *stdev) > +{ > + if (stdev->dma_mrpc) { > + disable_dma_mrpc(stdev); > + dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), > + stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); > + stdev->dma_mrpc = NULL; > + } > +} > + > static int switchtec_pci_probe(struct pci_dev *pdev, > const struct pci_device_id *id) > { > @@ -1646,6 +1656,9 @@ static void switchtec_pci_remove(struct pci_dev *pdev) > ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt)); > dev_info(&stdev->dev, "unregistered.\n"); > stdev_kill(stdev); > + switchtec_exit_pci(stdev); > + pci_dev_put(stdev->pdev); > + stdev->pdev = NULL; > put_device(&stdev->dev); > } >
Hi. [Sorry, sending this twice, again, hoping this time it won't bounce, because html] > Nice catch, thanks! > > The solution looks good to me, though I might quibble slightly about > style: I'm not sure why we need two helper functions (disable_dma_mrpc() > and switchtec_exit_pci()) that are only called in one place. I'd > probably just open code both of them. > > Other than that: > > Reviewed-by: Logan Gunthorpe <logang@deltatee.com> I'm fine with it either way. I added those mainly because of the enable_dma_mrpc/switctec_init_pci counterparts were already there. And likewise single-use, as would be usual. So the subroutines would maintain symmetry between probe() and remove(). So.. just don't add the new remove()-relevant ones? (One alternative would be to send another change right after, which goes after single-use pairs altogether.) Daniel > Logan > >> --- >> drivers/pci/switch/switchtec.c | 29 +++++++++++++++++++++-------- >> 1 file changed, 21 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c >> index e69cac84b605..002d0205d263 100644 >> --- a/drivers/pci/switch/switchtec.c >> +++ b/drivers/pci/switch/switchtec.c >> @@ -1247,17 +1247,17 @@ static void enable_dma_mrpc(struct switchtec_dev *stdev) >> iowrite32(SWITCHTEC_DMA_MRPC_EN, &stdev->mmio_mrpc->dma_en); >> } >> >> +static void disable_dma_mrpc(struct switchtec_dev *stdev) >> +{ >> + iowrite32(0, &stdev->mmio_mrpc->dma_en); >> + flush_wc_buf(stdev); >> + writeq(0, &stdev->mmio_mrpc->dma_addr); >> +} >> + >> static void stdev_release(struct device *dev) >> { >> struct switchtec_dev *stdev = to_stdev(dev); >> >> - if (stdev->dma_mrpc) { >> - iowrite32(0, &stdev->mmio_mrpc->dma_en); >> - flush_wc_buf(stdev); >> - writeq(0, &stdev->mmio_mrpc->dma_addr); >> - dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), >> - stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); >> - } >> kfree(stdev); >> } >> >> @@ -1301,7 +1301,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev) >> return ERR_PTR(-ENOMEM); >> >> stdev->alive = true; >> - stdev->pdev = pdev; >> + stdev->pdev = pci_dev_get(pdev); >> INIT_LIST_HEAD(&stdev->mrpc_queue); >> mutex_init(&stdev->mrpc_mutex); >> stdev->mrpc_busy = 0; >> @@ -1587,6 +1587,16 @@ static int switchtec_init_pci(struct switchtec_dev *stdev, >> return 0; >> } >> >> +static void switchtec_exit_pci(struct switchtec_dev *stdev) >> +{ >> + if (stdev->dma_mrpc) { >> + disable_dma_mrpc(stdev); >> + dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), >> + stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); >> + stdev->dma_mrpc = NULL; >> + } >> +} >> + >> static int switchtec_pci_probe(struct pci_dev *pdev, >> const struct pci_device_id *id) >> { >> @@ -1646,6 +1656,9 @@ static void switchtec_pci_remove(struct pci_dev *pdev) >> ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt)); >> dev_info(&stdev->dev, "unregistered.\n"); >> stdev_kill(stdev); >> + switchtec_exit_pci(stdev); >> + pci_dev_put(stdev->pdev); >> + stdev->pdev = NULL; >> put_device(&stdev->dev); >> }
On 2023-11-10 14:30, Daniel Stodden wrote: > > Hi. > > [Sorry, sending this twice, again, hoping this time it won't bounce, because html] > >> Nice catch, thanks! >> >> The solution looks good to me, though I might quibble slightly about >> style: I'm not sure why we need two helper functions (disable_dma_mrpc() >> and switchtec_exit_pci()) that are only called in one place. I'd >> probably just open code both of them. >> >> Other than that: >> >> Reviewed-by: Logan Gunthorpe <logang@deltatee.com> > > I'm fine with it either way. I added those mainly because of the enable_dma_mrpc/switctec_init_pci > counterparts were already there. And likewise single-use, as would be usual. > > So the subroutines would maintain symmetry between probe() and remove(). > > So.. just don't add the new remove()-relevant ones? (One alternative would be to send another change right after, > which goes after single-use pairs altogether.) Ah, I see. Just because it's single use doesn't necessarily mean it's wrong. I would say switchtec_init_pci() should not be inlined because it's rather long and keeps switchtec_pci_probe() a little more readable. enable_dma_mrpc() more questionable. I'm not sure why it was done that way. Perhaps something changed. In any case, I'm not all that concerned. If you want to leave it the way it is, I wouldn't mind. Thanks, Logan
I don’t mind all. I’ll remove disable_dma_mrpc(). That will make things about the size as the old stdev_release() again. It’s pretty gratuitous. I’ll keep switchtec_exit_pci(), because it helps switchtec_pci_remove to remain a carefully ordered sequence of one-liners. I looked a little more through the code, and noticed that the two non-delayed work structs, mrpc_work and mrpc_timeout, are not cancelled in stdev_kill(). We seem to stop the ISR which schedules them, but do not stop the bh before teardown. And these handlers do not recognize stdev->alive (ok, they should not, because they do not hold a strong reference to stdev.) So there’s probably more coming soon-ish. Just mentioning it in case I get hit by a piano until then. Cheers, Daniel > On Nov 10, 2023, at 2:49 PM, Logan Gunthorpe <logang@deltatee.com> wrote: > > > > On 2023-11-10 14:30, Daniel Stodden wrote: >> >> Hi. >> >> [Sorry, sending this twice, again, hoping this time it won't bounce, because html] >> >>> Nice catch, thanks! >>> >>> The solution looks good to me, though I might quibble slightly about >>> style: I'm not sure why we need two helper functions (disable_dma_mrpc() >>> and switchtec_exit_pci()) that are only called in one place. I'd >>> probably just open code both of them. >>> >>> Other than that: >>> >>> Reviewed-by: Logan Gunthorpe <logang@deltatee.com> >> >> I'm fine with it either way. I added those mainly because of the enable_dma_mrpc/switctec_init_pci >> counterparts were already there. And likewise single-use, as would be usual. >> >> So the subroutines would maintain symmetry between probe() and remove(). >> >> So.. just don't add the new remove()-relevant ones? (One alternative would be to send another change right after, >> which goes after single-use pairs altogether.) > > Ah, I see. Just because it's single use doesn't necessarily mean it's > wrong. I would say switchtec_init_pci() should not be inlined because > it's rather long and keeps switchtec_pci_probe() a little more readable. > > enable_dma_mrpc() more questionable. I'm not sure why it was done that > way. Perhaps something changed. > > In any case, I'm not all that concerned. If you want to leave it the way > it is, I wouldn't mind. > > Thanks, > > Logan
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index e69cac84b605..002d0205d263 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1247,17 +1247,17 @@ static void enable_dma_mrpc(struct switchtec_dev *stdev) iowrite32(SWITCHTEC_DMA_MRPC_EN, &stdev->mmio_mrpc->dma_en); } +static void disable_dma_mrpc(struct switchtec_dev *stdev) +{ + iowrite32(0, &stdev->mmio_mrpc->dma_en); + flush_wc_buf(stdev); + writeq(0, &stdev->mmio_mrpc->dma_addr); +} + static void stdev_release(struct device *dev) { struct switchtec_dev *stdev = to_stdev(dev); - if (stdev->dma_mrpc) { - iowrite32(0, &stdev->mmio_mrpc->dma_en); - flush_wc_buf(stdev); - writeq(0, &stdev->mmio_mrpc->dma_addr); - dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), - stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); - } kfree(stdev); } @@ -1301,7 +1301,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev) return ERR_PTR(-ENOMEM); stdev->alive = true; - stdev->pdev = pdev; + stdev->pdev = pci_dev_get(pdev); INIT_LIST_HEAD(&stdev->mrpc_queue); mutex_init(&stdev->mrpc_mutex); stdev->mrpc_busy = 0; @@ -1587,6 +1587,16 @@ static int switchtec_init_pci(struct switchtec_dev *stdev, return 0; } +static void switchtec_exit_pci(struct switchtec_dev *stdev) +{ + if (stdev->dma_mrpc) { + disable_dma_mrpc(stdev); + dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), + stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); + stdev->dma_mrpc = NULL; + } +} + static int switchtec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -1646,6 +1656,9 @@ static void switchtec_pci_remove(struct pci_dev *pdev) ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt)); dev_info(&stdev->dev, "unregistered.\n"); stdev_kill(stdev); + switchtec_exit_pci(stdev); + pci_dev_put(stdev->pdev); + stdev->pdev = NULL; put_device(&stdev->dev); }
A pci device hot removal may occur while stdev->cdev is held open. The call to stdev_release is then delivered during close or exit, at a point way past switchtec_pci_remove. Otherwise the last ref would vanish with the trailing put_device, just before return. At that later point in time, the device layer has alreay removed stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a counted one. Therefore, in dma mode, the iowrite32 in stdev_release will cause a fatal page fault, and the subsequent dma_free_coherent, if reached, would pass a stale &stdev->pdev->dev pointer. Fixed by moving mrpc dma shutdown into switchtec_pci_remove, after stdev_kill. Counting the stdev->pdev ref is now optional, but may prevent future accidents. Signed-off-by: Daniel Stodden <dns@arista.com> --- drivers/pci/switch/switchtec.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-)