diff mbox series

[v3] switchtec: Fix stdev_release crash after suprise device loss.

Message ID 20231113212150.96410-2-dns@arista.com (mailing list archive)
State Accepted
Delegated to: Bjorn Helgaas
Headers show
Series [v3] switchtec: Fix stdev_release crash after suprise device loss. | expand

Commit Message

Daniel Stodden Nov. 13, 2023, 9:21 p.m. UTC
A pci device hot removal may occur while stdev->cdev is held open. The
call to stdev_release is then delivered during close or exit, at a
point way past switchtec_pci_remove. Otherwise the last ref would
vanish with the trailing put_device, just before return.

At that later point in time, the device layer has alreay removed
stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a
counted one. Therefore, in dma mode, the iowrite32 in stdev_release
will cause a fatal page fault, and the subsequent dma_free_coherent,
if reached, would pass a stale &stdev->pdev->dev pointer.

Fixed by moving mrpc dma shutdown into switchtec_pci_remove, after
stdev_kill. Counting the stdev->pdev ref is now optional, but may
prevent future accidents.

Signed-off-by: Daniel Stodden <dns@arista.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
---
 drivers/pci/switch/switchtec.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

Comments

Bjorn Helgaas Nov. 20, 2023, 9:25 p.m. UTC | #1
On Mon, Nov 13, 2023 at 01:21:50PM -0800, Daniel Stodden wrote:
> A pci device hot removal may occur while stdev->cdev is held open. The
> call to stdev_release is then delivered during close or exit, at a
> point way past switchtec_pci_remove. Otherwise the last ref would
> vanish with the trailing put_device, just before return.
> 
> At that later point in time, the device layer has alreay removed
> stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a

I guess this should say the "stdev->mmio_mrpc" (not "mrpc_mmio")?

> counted one. Therefore, in dma mode, the iowrite32 in stdev_release
> will cause a fatal page fault, and the subsequent dma_free_coherent,
> if reached, would pass a stale &stdev->pdev->dev pointer.
> 
> Fixed by moving mrpc dma shutdown into switchtec_pci_remove, after
> stdev_kill. Counting the stdev->pdev ref is now optional, but may
> prevent future accidents.
> 
> Signed-off-by: Daniel Stodden <dns@arista.com>
> Reviewed-by: Logan Gunthorpe <logang@deltatee.com>

Thanks, applied to "switchtec" for v6.8.

> ---
>  drivers/pci/switch/switchtec.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
> index e69cac84b605..d8718acdb85f 100644
> --- a/drivers/pci/switch/switchtec.c
> +++ b/drivers/pci/switch/switchtec.c
> @@ -1251,13 +1251,6 @@ static void stdev_release(struct device *dev)
>  {
>  	struct switchtec_dev *stdev = to_stdev(dev);
>  
> -	if (stdev->dma_mrpc) {
> -		iowrite32(0, &stdev->mmio_mrpc->dma_en);
> -		flush_wc_buf(stdev);
> -		writeq(0, &stdev->mmio_mrpc->dma_addr);
> -		dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc),
> -				stdev->dma_mrpc, stdev->dma_mrpc_dma_addr);
> -	}
>  	kfree(stdev);
>  }
>  
> @@ -1301,7 +1294,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
>  		return ERR_PTR(-ENOMEM);
>  
>  	stdev->alive = true;
> -	stdev->pdev = pdev;
> +	stdev->pdev = pci_dev_get(pdev);
>  	INIT_LIST_HEAD(&stdev->mrpc_queue);
>  	mutex_init(&stdev->mrpc_mutex);
>  	stdev->mrpc_busy = 0;
> @@ -1587,6 +1580,18 @@ static int switchtec_init_pci(struct switchtec_dev *stdev,
>  	return 0;
>  }
>  
> +static void switchtec_exit_pci(struct switchtec_dev *stdev)
> +{
> +	if (stdev->dma_mrpc) {
> +		iowrite32(0, &stdev->mmio_mrpc->dma_en);
> +		flush_wc_buf(stdev);
> +		writeq(0, &stdev->mmio_mrpc->dma_addr);
> +		dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc),
> +				  stdev->dma_mrpc, stdev->dma_mrpc_dma_addr);
> +		stdev->dma_mrpc = NULL;
> +	}
> +}
> +
>  static int switchtec_pci_probe(struct pci_dev *pdev,
>  			       const struct pci_device_id *id)
>  {
> @@ -1646,6 +1651,9 @@ static void switchtec_pci_remove(struct pci_dev *pdev)
>  	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
>  	dev_info(&stdev->dev, "unregistered.\n");
>  	stdev_kill(stdev);
> +	switchtec_exit_pci(stdev);
> +	pci_dev_put(stdev->pdev);
> +	stdev->pdev = NULL;
>  	put_device(&stdev->dev);
>  }
>  
> -- 
> 2.41.0
>
Daniel Stodden Nov. 21, 2023, 6:23 p.m. UTC | #2
Hi.

> On Nov 20, 2023, at 1:25 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> 
> On Mon, Nov 13, 2023 at 01:21:50PM -0800, Daniel Stodden wrote:
>> A pci device hot removal may occur while stdev->cdev is held open. The
>> call to stdev_release is then delivered during close or exit, at a
>> point way past switchtec_pci_remove. Otherwise the last ref would
>> vanish with the trailing put_device, just before return.
>> 
>> At that later point in time, the device layer has alreay removed
>> stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a
> 
> I guess this should say the "stdev->mmio_mrpc" (not "mrpc_mmio")?

Eww. My fault. Could you still correct that?

> 
>> counted one. Therefore, in dma mode, the iowrite32 in stdev_release
>> will cause a fatal page fault, and the subsequent dma_free_coherent,
>> if reached, would pass a stale &stdev->pdev->dev pointer.
>> 
>> Fixed by moving mrpc dma shutdown into switchtec_pci_remove, after
>> stdev_kill. Counting the stdev->pdev ref is now optional, but may
>> prevent future accidents.
>> 
>> Signed-off-by: Daniel Stodden <dns@arista.com>
>> Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
> 
> Thanks, applied to "switchtec" for v6.8

Thank you.

Daniel

>> ---
>> drivers/pci/switch/switchtec.c | 24 ++++++++++++++++--------
>> 1 file changed, 16 insertions(+), 8 deletions(-)
>> 
>> diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
>> index e69cac84b605..d8718acdb85f 100644
>> --- a/drivers/pci/switch/switchtec.c
>> +++ b/drivers/pci/switch/switchtec.c
>> @@ -1251,13 +1251,6 @@ static void stdev_release(struct device *dev)
>> {
>> struct switchtec_dev *stdev = to_stdev(dev);
>> 
>> - if (stdev->dma_mrpc) {
>> - iowrite32(0, &stdev->mmio_mrpc->dma_en);
>> - flush_wc_buf(stdev);
>> - writeq(0, &stdev->mmio_mrpc->dma_addr);
>> - dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc),
>> - stdev->dma_mrpc, stdev->dma_mrpc_dma_addr);
>> - }
>> kfree(stdev);
>> }
>> 
>> @@ -1301,7 +1294,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
>> return ERR_PTR(-ENOMEM);
>> 
>> stdev->alive = true;
>> - stdev->pdev = pdev;
>> + stdev->pdev = pci_dev_get(pdev);
>> INIT_LIST_HEAD(&stdev->mrpc_queue);
>> mutex_init(&stdev->mrpc_mutex);
>> stdev->mrpc_busy = 0;
>> @@ -1587,6 +1580,18 @@ static int switchtec_init_pci(struct switchtec_dev *stdev,
>> return 0;
>> }
>> 
>> +static void switchtec_exit_pci(struct switchtec_dev *stdev)
>> +{
>> + if (stdev->dma_mrpc) {
>> + iowrite32(0, &stdev->mmio_mrpc->dma_en);
>> + flush_wc_buf(stdev);
>> + writeq(0, &stdev->mmio_mrpc->dma_addr);
>> + dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc),
>> +  stdev->dma_mrpc, stdev->dma_mrpc_dma_addr);
>> + stdev->dma_mrpc = NULL;
>> + }
>> +}
>> +
>> static int switchtec_pci_probe(struct pci_dev *pdev,
>>       const struct pci_device_id *id)
>> {
>> @@ -1646,6 +1651,9 @@ static void switchtec_pci_remove(struct pci_dev *pdev)
>> ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
>> dev_info(&stdev->dev, "unregistered.\n");
>> stdev_kill(stdev);
>> + switchtec_exit_pci(stdev);
>> + pci_dev_put(stdev->pdev);
>> + stdev->pdev = NULL;
>> put_device(&stdev->dev);
>> }
>> 
>> -- 
>> 2.41.0
>>
Bjorn Helgaas Nov. 21, 2023, 6:40 p.m. UTC | #3
On Tue, Nov 21, 2023 at 10:23:51AM -0800, Daniel Stodden wrote:
> > On Nov 20, 2023, at 1:25 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Mon, Nov 13, 2023 at 01:21:50PM -0800, Daniel Stodden wrote:
> >> A pci device hot removal may occur while stdev->cdev is held open. The
> >> call to stdev_release is then delivered during close or exit, at a
> >> point way past switchtec_pci_remove. Otherwise the last ref would
> >> vanish with the trailing put_device, just before return.
> >> 
> >> At that later point in time, the device layer has alreay removed
> >> stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a
> > 
> > I guess this should say the "stdev->mmio_mrpc" (not "mrpc_mmio")?
> 
> Eww. My fault. Could you still correct that?

Yep, I speculatively made that change already, so thanks for
confirming it :)

https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git/commit/?h=switchtec&id=f9724598e29d

Bjorn
Daniel Stodden Nov. 21, 2023, 11:58 p.m. UTC | #4
> On Nov 21, 2023, at 10:40 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> 
> On Tue, Nov 21, 2023 at 10:23:51AM -0800, Daniel Stodden wrote:
>>> On Nov 20, 2023, at 1:25 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>> On Mon, Nov 13, 2023 at 01:21:50PM -0800, Daniel Stodden wrote:
>>>> A pci device hot removal may occur while stdev->cdev is held open. The
>>>> call to stdev_release is then delivered during close or exit, at a
>>>> point way past switchtec_pci_remove. Otherwise the last ref would
>>>> vanish with the trailing put_device, just before return.
>>>> 
>>>> At that later point in time, the device layer has alreay removed
>>>> stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a
>>> 
>>> I guess this should say the "stdev->mmio_mrpc" (not "mrpc_mmio")?
>> 
>> Eww. My fault. Could you still correct that?
> 
> Yep, I speculatively made that change already, so thanks for
> confirming it :)
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git/commit/?h=switchtec&id=f9724598e29d

Thanks. And sorry for what’s next. 

Look what I just found in my internal review inbox.

Signed-off/Reviewed-by: dima@arista.com

Want a v4, a follow-up, or can you re-edit that once more too, please?

Thanks + sorry,
Daniel


diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index a82576205..ddaf87e10 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -1331,6 +1331,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev)

 err_put:
        put_device(&stdev->dev);
+       put_device(&stdev->pdev);
        return ERR_PTR(rc);
 }
Daniel Stodden Nov. 22, 2023, 12:02 a.m. UTC | #5
> On Nov 21, 2023, at 3:58 PM, Daniel Stodden <dns@arista.com> wrote:
> 
> +       put_device(&stdev->pdev);

That was just a sketch. Actuall pci_dev_put.

Daniel
Bjorn Helgaas Nov. 22, 2023, 12:19 a.m. UTC | #6
On Tue, Nov 21, 2023 at 03:58:22PM -0800, Daniel Stodden wrote:
> > On Nov 21, 2023, at 10:40 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Tue, Nov 21, 2023 at 10:23:51AM -0800, Daniel Stodden wrote:
> >>> On Nov 20, 2023, at 1:25 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> >>> On Mon, Nov 13, 2023 at 01:21:50PM -0800, Daniel Stodden wrote:
> >>>> A pci device hot removal may occur while stdev->cdev is held open. The
> >>>> call to stdev_release is then delivered during close or exit, at a
> >>>> point way past switchtec_pci_remove. Otherwise the last ref would
> >>>> vanish with the trailing put_device, just before return.
> >>>> 
> >>>> At that later point in time, the device layer has alreay removed
> >>>> stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a
> >>> 
> >>> I guess this should say the "stdev->mmio_mrpc" (not "mrpc_mmio")?
> >> 
> >> Eww. My fault. Could you still correct that?
> > 
> > Yep, I speculatively made that change already, so thanks for
> > confirming it :)
> > 
> > https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git/commit/?h=switchtec&id=f9724598e29d
> 
> Thanks. And sorry for what’s next. 
> 
> Look what I just found in my internal review inbox.
> 
> Signed-off/Reviewed-by: dima@arista.com

Happy to add that, no problem, but:

  - "Signed-off-by" has a specific meaning about either being involved
    in the creation of the patch or being part of the chain of
    transmitting the patch, see
    https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?id=v6.6#n396
    Since I got the patch directly from you, adding a signed-off-by
    from dima@ would mean he created part of the patch.

  - I don't think it makes sense to include both Signed-off-by and
    Reviewed-by from the same person, since the point of Reviewed-by
    is to get review by somebody other than the author.

  - dima@ should include a name as well as the email address (I assume
    "Dmitry Safonov <dima@arista.com>")

So if you want to use a Signed-off-by from Dmitry, please post a v4
including that (ideally starting from the commit above because I
silently fixed a couple other typos (although I missed the
put_device() thing)).

If you prefer a Dmitry's Reviewed-by, no need to post a v4.  The best
thing would be for Dmitry to respond with the Reviewed-by on the
mailing list.  Some people do collect reviews internally, but I prefer
to get them directly from the reviewer.

Bjorn
Dmitry Safonov Nov. 22, 2023, 12:25 a.m. UTC | #7
HI Bjorn,

On 11/22/23 00:19, Bjorn Helgaas wrote:
> On Tue, Nov 21, 2023 at 03:58:22PM -0800, Daniel Stodden wrote:
>>> On Nov 21, 2023, at 10:40 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>> On Tue, Nov 21, 2023 at 10:23:51AM -0800, Daniel Stodden wrote:
>>>>> On Nov 20, 2023, at 1:25 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>>>>> On Mon, Nov 13, 2023 at 01:21:50PM -0800, Daniel Stodden wrote:
>>>>>> A pci device hot removal may occur while stdev->cdev is held open. The
>>>>>> call to stdev_release is then delivered during close or exit, at a
>>>>>> point way past switchtec_pci_remove. Otherwise the last ref would
>>>>>> vanish with the trailing put_device, just before return.
>>>>>>
>>>>>> At that later point in time, the device layer has alreay removed
>>>>>> stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a
>>>>>
>>>>> I guess this should say the "stdev->mmio_mrpc" (not "mrpc_mmio")?
>>>>
>>>> Eww. My fault. Could you still correct that?
>>>
>>> Yep, I speculatively made that change already, so thanks for
>>> confirming it :)
>>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git/commit/?h=switchtec&id=f9724598e29d
>>
>> Thanks. And sorry for what’s next. 
>>
>> Look what I just found in my internal review inbox.
>>
>> Signed-off/Reviewed-by: dima@arista.com
> 
> Happy to add that, no problem, but:
> 
>   - "Signed-off-by" has a specific meaning about either being involved
>     in the creation of the patch or being part of the chain of
>     transmitting the patch, see
>     https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?id=v6.6#n396
>     Since I got the patch directly from you, adding a signed-off-by
>     from dima@ would mean he created part of the patch.
> 
>   - I don't think it makes sense to include both Signed-off-by and
>     Reviewed-by from the same person, since the point of Reviewed-by
>     is to get review by somebody other than the author.
> 
>   - dima@ should include a name as well as the email address (I assume
>     "Dmitry Safonov <dima@arista.com>")
> 
> So if you want to use a Signed-off-by from Dmitry, please post a v4
> including that (ideally starting from the commit above because I
> silently fixed a couple other typos (although I missed the
> put_device() thing)).
> 
> If you prefer a Dmitry's Reviewed-by, no need to post a v4.  The best
> thing would be for Dmitry to respond with the Reviewed-by on the
> mailing list.  Some people do collect reviews internally, but I prefer
> to get them directly from the reviewer.

It's fine to drop my SoB. With the fixup above,

Reviewed-by: Dmitry Safonov <dima@arista.com>

Thanks,
            Dmitry
Bjorn Helgaas Nov. 22, 2023, 12:37 a.m. UTC | #8
On Wed, Nov 22, 2023 at 12:25:48AM +0000, Dmitry Safonov wrote:
> On 11/22/23 00:19, Bjorn Helgaas wrote:
> > On Tue, Nov 21, 2023 at 03:58:22PM -0800, Daniel Stodden wrote:
> >>> On Nov 21, 2023, at 10:40 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> >>> On Tue, Nov 21, 2023 at 10:23:51AM -0800, Daniel Stodden wrote:
> >>>>> On Nov 20, 2023, at 1:25 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> >>>>> On Mon, Nov 13, 2023 at 01:21:50PM -0800, Daniel Stodden wrote:
> >>>>>> A pci device hot removal may occur while stdev->cdev is held open. The
> >>>>>> call to stdev_release is then delivered during close or exit, at a
> >>>>>> point way past switchtec_pci_remove. Otherwise the last ref would
> >>>>>> vanish with the trailing put_device, just before return.
 
> It's fine to drop my SoB. With the fixup above,
> 
> Reviewed-by: Dmitry Safonov <dima@arista.com>

Done, thanks!
Dmitry Safonov Nov. 22, 2023, 12:37 a.m. UTC | #9
On 11/13/23 21:21, Daniel Stodden wrote:
> A pci device hot removal may occur while stdev->cdev is held open. The
> call to stdev_release is then delivered during close or exit, at a
> point way past switchtec_pci_remove. Otherwise the last ref would
> vanish with the trailing put_device, just before return.
> 
> At that later point in time, the device layer has alreay removed
> stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a
> counted one. Therefore, in dma mode, the iowrite32 in stdev_release
> will cause a fatal page fault, and the subsequent dma_free_coherent,
> if reached, would pass a stale &stdev->pdev->dev pointer.
> 
> Fixed by moving mrpc dma shutdown into switchtec_pci_remove, after
> stdev_kill. Counting the stdev->pdev ref is now optional, but may
> prevent future accidents.
> 
> Signed-off-by: Daniel Stodden <dns@arista.com>
> Reviewed-by: Logan Gunthorpe <logang@deltatee.com>

Just in case, duplicating on the patch.
With pci_dev_put(stdev->pdev) on stdev_create() err-path,

Reviewed-by: Dmitry Safonov <dima@arista.com>

> ---
>  drivers/pci/switch/switchtec.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
> index e69cac84b605..d8718acdb85f 100644
> --- a/drivers/pci/switch/switchtec.c
> +++ b/drivers/pci/switch/switchtec.c
> @@ -1251,13 +1251,6 @@ static void stdev_release(struct device *dev)
>  {
>  	struct switchtec_dev *stdev = to_stdev(dev);
>  
> -	if (stdev->dma_mrpc) {
> -		iowrite32(0, &stdev->mmio_mrpc->dma_en);
> -		flush_wc_buf(stdev);
> -		writeq(0, &stdev->mmio_mrpc->dma_addr);
> -		dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc),
> -				stdev->dma_mrpc, stdev->dma_mrpc_dma_addr);
> -	}
>  	kfree(stdev);
>  }
>  
> @@ -1301,7 +1294,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
>  		return ERR_PTR(-ENOMEM);
>  
>  	stdev->alive = true;
> -	stdev->pdev = pdev;
> +	stdev->pdev = pci_dev_get(pdev);
>  	INIT_LIST_HEAD(&stdev->mrpc_queue);
>  	mutex_init(&stdev->mrpc_mutex);
>  	stdev->mrpc_busy = 0;
> @@ -1587,6 +1580,18 @@ static int switchtec_init_pci(struct switchtec_dev *stdev,
>  	return 0;
>  }
>  
> +static void switchtec_exit_pci(struct switchtec_dev *stdev)
> +{
> +	if (stdev->dma_mrpc) {
> +		iowrite32(0, &stdev->mmio_mrpc->dma_en);
> +		flush_wc_buf(stdev);
> +		writeq(0, &stdev->mmio_mrpc->dma_addr);
> +		dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc),
> +				  stdev->dma_mrpc, stdev->dma_mrpc_dma_addr);
> +		stdev->dma_mrpc = NULL;
> +	}
> +}
> +
>  static int switchtec_pci_probe(struct pci_dev *pdev,
>  			       const struct pci_device_id *id)
>  {
> @@ -1646,6 +1651,9 @@ static void switchtec_pci_remove(struct pci_dev *pdev)
>  	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
>  	dev_info(&stdev->dev, "unregistered.\n");
>  	stdev_kill(stdev);
> +	switchtec_exit_pci(stdev);
> +	pci_dev_put(stdev->pdev);
> +	stdev->pdev = NULL;
>  	put_device(&stdev->dev);
>  }
>  

Thanks,
             Dmitry
Bjorn Helgaas Nov. 22, 2023, 12:39 a.m. UTC | #10
On Tue, Nov 21, 2023 at 04:02:12PM -0800, Daniel Stodden wrote:
> > On Nov 21, 2023, at 3:58 PM, Daniel Stodden <dns@arista.com> wrote:
> > 
> > +       put_device(&stdev->pdev);
> 
> That was just a sketch. Actuall pci_dev_put.

Can you clarify what I should do with this?  The current commit log
reads like this:

  A PCI device hot removal may occur while stdev->cdev is held open. The call
  to stdev_release() then happens during close or exit, at a point way past
  switchtec_pci_remove(). Otherwise the last ref would vanish with the
  trailing put_device(), just before return.

Are you saying it needs a change?

Bjorn
Bjorn Helgaas Nov. 22, 2023, 12:41 a.m. UTC | #11
On Wed, Nov 22, 2023 at 12:37:33AM +0000, Dmitry Safonov wrote:
> On 11/13/23 21:21, Daniel Stodden wrote:
> > A pci device hot removal may occur while stdev->cdev is held open. The
> > call to stdev_release is then delivered during close or exit, at a
> > point way past switchtec_pci_remove. Otherwise the last ref would
> > vanish with the trailing put_device, just before return.
> > 
> > At that later point in time, the device layer has alreay removed
> > stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a
> > counted one. Therefore, in dma mode, the iowrite32 in stdev_release
> > will cause a fatal page fault, and the subsequent dma_free_coherent,
> > if reached, would pass a stale &stdev->pdev->dev pointer.
> > 
> > Fixed by moving mrpc dma shutdown into switchtec_pci_remove, after
> > stdev_kill. Counting the stdev->pdev ref is now optional, but may
> > prevent future accidents.
> > 
> > Signed-off-by: Daniel Stodden <dns@arista.com>
> > Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
> 
> Just in case, duplicating on the patch.
> With pci_dev_put(stdev->pdev) on stdev_create() err-path,
> 
> Reviewed-by: Dmitry Safonov <dima@arista.com>

OK, I'm totally lost.  Please post a v4 with the content you want.

Bjorn
Daniel Stodden Nov. 22, 2023, 1:02 a.m. UTC | #12
I apologize for the confusion. 

Re-starting this thread wasn’t much about adding more reviewer names.

What I meant was what came out of his review:

diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index 073d0f7f5e43..b1990bde688c 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -1387,6 +1387,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev)

 err_put:
        put_device(&stdev->dev);
+       pci_dev_put(stdev->pdev);
        return ERR_PTR(rc);
 }


I probably should not’ have put that patch fragment at the far end of my email, past the signature.

If you prefer a v4, I’ll make a v4.

Sorry again,
Daniel

> On Nov 21, 2023, at 4:41 PM, Bjorn Helgaas <helgaas@kernel.org> wrote:
> 
> On Wed, Nov 22, 2023 at 12:37:33AM +0000, Dmitry Safonov wrote:
>> On 11/13/23 21:21, Daniel Stodden wrote:
>>> A pci device hot removal may occur while stdev->cdev is held open. The
>>> call to stdev_release is then delivered during close or exit, at a
>>> point way past switchtec_pci_remove. Otherwise the last ref would
>>> vanish with the trailing put_device, just before return.
>>> 
>>> At that later point in time, the device layer has alreay removed
>>> stdev->mrpc_mmio map. Also, the stdev->pdev reference was not a
>>> counted one. Therefore, in dma mode, the iowrite32 in stdev_release
>>> will cause a fatal page fault, and the subsequent dma_free_coherent,
>>> if reached, would pass a stale &stdev->pdev->dev pointer.
>>> 
>>> Fixed by moving mrpc dma shutdown into switchtec_pci_remove, after
>>> stdev_kill. Counting the stdev->pdev ref is now optional, but may
>>> prevent future accidents.
>>> 
>>> Signed-off-by: Daniel Stodden <dns@arista.com>
>>> Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
>> 
>> Just in case, duplicating on the patch.
>> With pci_dev_put(stdev->pdev) on stdev_create() err-path,
>> 
>> Reviewed-by: Dmitry Safonov <dima@arista.com>
> 
> OK, I'm totally lost.  Please post a v4 with the content you want.
> 
> Bjorn
Bjorn Helgaas Nov. 22, 2023, 3:16 a.m. UTC | #13
On Tue, Nov 21, 2023 at 05:02:22PM -0800, Daniel Stodden wrote:
> 
> I apologize for the confusion. 
> 
> Re-starting this thread wasn’t much about adding more reviewer names.
> 
> What I meant was what came out of his review:
> 
> diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
> index 073d0f7f5e43..b1990bde688c 100644
> --- a/drivers/pci/switch/switchtec.c
> +++ b/drivers/pci/switch/switchtec.c
> @@ -1387,6 +1387,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
> 
>  err_put:
>         put_device(&stdev->dev);
> +       pci_dev_put(stdev->pdev);
>         return ERR_PTR(rc);
>  }
> 
> 
> I probably should not’ have put that patch fragment at the far end of my email, past the signature.
> 
> If you prefer a v4, I’ll make a v4.

Yeah, just post a v4 if you don't mind.  Another version is free, and
then there's no confusion about what you intend :)
diff mbox series

Patch

diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index e69cac84b605..d8718acdb85f 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -1251,13 +1251,6 @@  static void stdev_release(struct device *dev)
 {
 	struct switchtec_dev *stdev = to_stdev(dev);
 
-	if (stdev->dma_mrpc) {
-		iowrite32(0, &stdev->mmio_mrpc->dma_en);
-		flush_wc_buf(stdev);
-		writeq(0, &stdev->mmio_mrpc->dma_addr);
-		dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc),
-				stdev->dma_mrpc, stdev->dma_mrpc_dma_addr);
-	}
 	kfree(stdev);
 }
 
@@ -1301,7 +1294,7 @@  static struct switchtec_dev *stdev_create(struct pci_dev *pdev)
 		return ERR_PTR(-ENOMEM);
 
 	stdev->alive = true;
-	stdev->pdev = pdev;
+	stdev->pdev = pci_dev_get(pdev);
 	INIT_LIST_HEAD(&stdev->mrpc_queue);
 	mutex_init(&stdev->mrpc_mutex);
 	stdev->mrpc_busy = 0;
@@ -1587,6 +1580,18 @@  static int switchtec_init_pci(struct switchtec_dev *stdev,
 	return 0;
 }
 
+static void switchtec_exit_pci(struct switchtec_dev *stdev)
+{
+	if (stdev->dma_mrpc) {
+		iowrite32(0, &stdev->mmio_mrpc->dma_en);
+		flush_wc_buf(stdev);
+		writeq(0, &stdev->mmio_mrpc->dma_addr);
+		dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc),
+				  stdev->dma_mrpc, stdev->dma_mrpc_dma_addr);
+		stdev->dma_mrpc = NULL;
+	}
+}
+
 static int switchtec_pci_probe(struct pci_dev *pdev,
 			       const struct pci_device_id *id)
 {
@@ -1646,6 +1651,9 @@  static void switchtec_pci_remove(struct pci_dev *pdev)
 	ida_simple_remove(&switchtec_minor_ida, MINOR(stdev->dev.devt));
 	dev_info(&stdev->dev, "unregistered.\n");
 	stdev_kill(stdev);
+	switchtec_exit_pci(stdev);
+	pci_dev_put(stdev->pdev);
+	stdev->pdev = NULL;
 	put_device(&stdev->dev);
 }