diff mbox series

[net-next,3/5] net: wwan: t7xx: PCIe reset rescan

Message ID 20220816042353.2416956-1-m.chetan.kumar@intel.com (mailing list archive)
State Accepted
Commit 140424d90165d83c75b23ead7c3573bb6dd74f50
Delegated to: Netdev Maintainers
Headers show
Series net: wwan: t7xx: fw flashing & coredump support | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1105 this patch: 1190
netdev/cc_maintainers warning 6 maintainers not CCed: linux-arm-kernel@lists.infradead.org edumazet@google.com chiranjeevi.rapolu@linux.intel.com matthias.bgg@gmail.com pabeni@redhat.com linux-mediatek@lists.infradead.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1105 this patch: 1190
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 86 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Kumar, M Chetan Aug. 16, 2022, 4:23 a.m. UTC
From: Haijun Liu <haijun.liu@mediatek.com>

PCI rescan module implements "rescan work queue". In firmware flashing
or coredump collection procedure WWAN device is programmed to boot in
fastboot mode and a work item is scheduled for removal & detection.
The WWAN device is reset using APCI call as part driver removal flow.
Work queue rescans pci bus at fixed interval for device detection,
later when device is detect work queue exits.

Signed-off-by: Haijun Liu <haijun.liu@mediatek.com>
Co-developed-by: Madhusmita Sahu <madhusmita.sahu@intel.com>
Signed-off-by: Madhusmita Sahu <madhusmita.sahu@intel.com>
Signed-off-by: Ricardo Martinez <ricardo.martinez@linux.intel.com>
Signed-off-by: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
Signed-off-by: Devegowda Chandrashekar <chandrashekar.devegowda@intel.com>
---
 drivers/net/wwan/t7xx/Makefile             |   3 +-
 drivers/net/wwan/t7xx/t7xx_modem_ops.c     |   5 +
 drivers/net/wwan/t7xx/t7xx_pci.c           |  51 ++++++++-
 drivers/net/wwan/t7xx/t7xx_pci.h           |   1 +
 drivers/net/wwan/t7xx/t7xx_pci_rescan.c    | 117 +++++++++++++++++++++
 drivers/net/wwan/t7xx/t7xx_pci_rescan.h    |  29 +++++
 drivers/net/wwan/t7xx/t7xx_port_wwan.c     |   6 ++
 drivers/net/wwan/t7xx/t7xx_state_monitor.c |   2 +
 8 files changed, 212 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/wwan/t7xx/t7xx_pci_rescan.c
 create mode 100644 drivers/net/wwan/t7xx/t7xx_pci_rescan.h

Comments

Ilpo Järvinen Aug. 18, 2022, 2:58 p.m. UTC | #1
On Tue, 16 Aug 2022, m.chetan.kumar@intel.com wrote:

> From: Haijun Liu <haijun.liu@mediatek.com>
> 
> PCI rescan module implements "rescan work queue". In firmware flashing
> or coredump collection procedure WWAN device is programmed to boot in
> fastboot mode and a work item is scheduled for removal & detection.
> The WWAN device is reset using APCI call as part driver removal flow.
> Work queue rescans pci bus at fixed interval for device detection,
> later when device is detect work queue exits.
> 
> Signed-off-by: Haijun Liu <haijun.liu@mediatek.com>
> Co-developed-by: Madhusmita Sahu <madhusmita.sahu@intel.com>
> Signed-off-by: Madhusmita Sahu <madhusmita.sahu@intel.com>
> Signed-off-by: Ricardo Martinez <ricardo.martinez@linux.intel.com>
> Signed-off-by: M Chetan Kumar <m.chetan.kumar@linux.intel.com>
> Signed-off-by: Devegowda Chandrashekar <chandrashekar.devegowda@intel.com>
> ---

> +       bool                    hp_enable;

Never written to.

> diff --git a/drivers/net/wwan/t7xx/t7xx_pci_rescan.c b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
> new file mode 100644
> index 000000000000..045777d8a843
> --- /dev/null
> +++ b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
> @@ -0,0 +1,117 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2021, MediaTek Inc.
> + * Copyright (c) 2021-2022, Intel Corporation.
> + */
> +
> +#define pr_fmt(fmt) KBUILD_MODNAME ":t7xx:%s: " fmt, __func__
> +#define dev_fmt(fmt) "t7xx: " fmt
> +
> +#include <linux/delay.h>
> +#include <linux/pci.h>
> +#include <linux/spinlock.h>
> +#include <linux/workqueue.h>
> +
> +#include "t7xx_pci.h"
> +#include "t7xx_pci_rescan.h"
> +
> +static struct remove_rescan_context g_mtk_rescan_context;

Any particular reason for this name?

> +void t7xx_pci_dev_rescan(void)
> +{
> +	struct pci_bus *b = NULL;
> +
> +	pci_lock_rescan_remove();
> +	while ((b = pci_find_next_bus(b)))
> +		pci_rescan_bus(b);
> +
> +	pci_unlock_rescan_remove();

I'd remove the empty line to keep the critical sections grouped together. 

> +}
> +
> +void t7xx_rescan_done(void)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
> +	if (g_mtk_rescan_context.rescan_done == 0) {
> +		pr_debug("this is a rescan probe\n");
> +		g_mtk_rescan_context.rescan_done = 1;
> +	} else {
> +		pr_debug("this is a init probe\n");
> +	}
> +	spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
> +}
> +
> +static void t7xx_remove_rescan(struct work_struct *work)
> +{
> +	struct pci_dev *pdev;
> +	int num_retries = RESCAN_RETRIES;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
> +	g_mtk_rescan_context.rescan_done = 0;
> +	pdev = g_mtk_rescan_context.dev;
> +	spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
> +
> +	if (pdev) {
> +		pci_stop_and_remove_bus_device_locked(pdev);
> +		pr_debug("start remove and rescan flow\n");
> +	}
> +
> +	do {
> +		t7xx_pci_dev_rescan();
> +		spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
> +		if (g_mtk_rescan_context.rescan_done) {
> +			spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
> +			break;
> +		}
> +
> +		spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);

Ditto.

> +		msleep(DELAY_RESCAN_MTIME);
> +	} while (num_retries--);
> +}
> +
> +void t7xx_rescan_queue_work(struct pci_dev *pdev)
> +{
> +	unsigned long flags;
> +
> +	dev_info(&pdev->dev, "start queue_mtk_rescan_work\n");
> +	spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
> +	if (!g_mtk_rescan_context.rescan_done) {
> +		dev_err(&pdev->dev, "rescan failed because last rescan undone\n");

The meaning of the message is hard to understand.

> +		spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
> +		return;
> +	}
> +
> +	g_mtk_rescan_context.dev = pdev;
> +	spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);

Crit section newlines.

> +	queue_work(g_mtk_rescan_context.pcie_rescan_wq, &g_mtk_rescan_context.service_task);
> +}
> +
> +int t7xx_rescan_init(void)
> +{
> +	spin_lock_init(&g_mtk_rescan_context.dev_lock);
> +	g_mtk_rescan_context.rescan_done = 1;
> +	g_mtk_rescan_context.dev = NULL;
> +	g_mtk_rescan_context.pcie_rescan_wq = create_singlethread_workqueue(MTK_RESCAN_WQ);
> +	if (!g_mtk_rescan_context.pcie_rescan_wq) {
> +		pr_err("Failed to create workqueue: %s\n", MTK_RESCAN_WQ);
> +		return -ENOMEM;
> +	}
> +
> +	INIT_WORK(&g_mtk_rescan_context.service_task, t7xx_remove_rescan);
> +
> +	return 0;
> +}
> +
> +void t7xx_rescan_deinit(void)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
> +	g_mtk_rescan_context.rescan_done = 0;
> +	g_mtk_rescan_context.dev = NULL;
> +	spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
> +	cancel_work_sync(&g_mtk_rescan_context.service_task);
> +	destroy_workqueue(g_mtk_rescan_context.pcie_rescan_wq);
> +}

In general, I felt this whole file was very heavy to read compared with 
the other parts of t7xx code. Maybe it will get better if 
g_mtk_rescan_context becomes shorter and re-newlining is done to better 
indicate the critical sections but we'll see.

> diff --git a/drivers/net/wwan/t7xx/t7xx_pci_rescan.h b/drivers/net/wwan/t7xx/t7xx_pci_rescan.h
> new file mode 100644
> index 000000000000..de4ca1363bb0
> --- /dev/null
> +++ b/drivers/net/wwan/t7xx/t7xx_pci_rescan.h
> @@ -0,0 +1,29 @@
> +/* SPDX-License-Identifier: GPL-2.0-only
> + *
> + * Copyright (c) 2021, MediaTek Inc.
> + * Copyright (c) 2021-2022, Intel Corporation.
> + */
> +
> +#ifndef __T7XX_PCI_RESCAN_H__
> +#define __T7XX_PCI_RESCAN_H__
> +
> +#define MTK_RESCAN_WQ "mtk_rescan_wq"
> +
> +#define DELAY_RESCAN_MTIME 1000
> +#define RESCAN_RETRIES 35
> +
> +struct remove_rescan_context {
> +	struct work_struct	 service_task;

Extra whitespace.

> +	struct workqueue_struct *pcie_rescan_wq;
> +	spinlock_t		dev_lock; /* protects device */

Perhaps use a tab before the comment instead to make some room.

> +	struct pci_dev		*dev;
> +	int			rescan_done;
> +};
Sergey Ryazanov Aug. 30, 2022, 2:02 a.m. UTC | #2
On Tue, Aug 16, 2022 at 7:12 AM <m.chetan.kumar@intel.com> wrote:
> From: Haijun Liu <haijun.liu@mediatek.com>
>
> PCI rescan module implements "rescan work queue". In firmware flashing
> or coredump collection procedure WWAN device is programmed to boot in
> fastboot mode and a work item is scheduled for removal & detection.
> The WWAN device is reset using APCI call as part driver removal flow.
> Work queue rescans pci bus at fixed interval for device detection,
> later when device is detect work queue exits.

[skipped]

> diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c
> index 871f2a27a398..2f5c6fbe601e 100644
> --- a/drivers/net/wwan/t7xx/t7xx_pci.c
> +++ b/drivers/net/wwan/t7xx/t7xx_pci.c
> @@ -715,8 +716,11 @@ static int t7xx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>                 return ret;
>         }
>
> +       t7xx_rescan_done();
>         t7xx_pcie_mac_set_int(t7xx_dev, MHCCIF_INT);
>         t7xx_pcie_mac_interrupts_en(t7xx_dev);
> +       if (!t7xx_dev->hp_enable)
> +               pci_ignore_hotplug(pdev);

pci_ignore_hotplug() also disables hotplug events for a parent bridge.
Is that how this call was intended?

>
>         return 0;
>  }

[skipped]

> +static void __exit t7xx_pci_cleanup(void)
> +{
> +       int remove_flag = 0;
> +       struct device *dev;
> +
> +       dev = driver_find_device(&t7xx_pci_driver.driver, NULL, NULL, t7xx_always_match);
> +       if (dev) {
> +               pr_debug("unregister t7xx PCIe driver while device is still exist.\n");
> +               put_device(dev);
> +               remove_flag = 1;
> +       } else {
> +               pr_debug("no t7xx PCIe driver found.\n");
> +       }
> +
> +       pci_lock_rescan_remove();
> +       pci_unregister_driver(&t7xx_pci_driver);
> +       pci_unlock_rescan_remove();
> +       t7xx_rescan_deinit();
> +
> +       if (remove_flag) {
> +               pr_debug("remove t7xx PCI device\n");
> +               pci_stop_and_remove_bus_device_locked(to_pci_dev(dev));
> +       }

What is the purpose of these operations? Should not the PCI core do
this for us on the driver unregister?

> +}
> +
> +module_exit(t7xx_pci_cleanup);
>
>  MODULE_AUTHOR("MediaTek Inc");
>  MODULE_DESCRIPTION("MediaTek PCIe 5G WWAN modem T7xx driver");

[skipped]

> diff --git a/drivers/net/wwan/t7xx/t7xx_pci_rescan.c b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
> new file mode 100644
> index 000000000000..045777d8a843
> --- /dev/null
> +++ b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c

[skipped]

> +void t7xx_pci_dev_rescan(void)
> +{
> +       struct pci_bus *b = NULL;
> +
> +       pci_lock_rescan_remove();
> +       while ((b = pci_find_next_bus(b)))
> +               pci_rescan_bus(b);

The driver does not need to rescan all buses. The device should appear
on the same bus, so the driver just needs to rescan a single and
already known bus.

> +
> +       pci_unlock_rescan_remove();
> +}

[skipped]

> +static void t7xx_remove_rescan(struct work_struct *work)
> +{
> +       struct pci_dev *pdev;
> +       int num_retries = RESCAN_RETRIES;
> +       unsigned long flags;
> +
> +       spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
> +       g_mtk_rescan_context.rescan_done = 0;
> +       pdev = g_mtk_rescan_context.dev;
> +       spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
> +
> +       if (pdev) {
> +               pci_stop_and_remove_bus_device_locked(pdev);

What is the purpose of removing the device then trying to find it by
rescanning the bus? Would not it be easier to save a PCI device
configuration, reset the device, and then restore the configuration?

The DEVLINK_CMD_RELOAD description states that this command performs
(see include/uapi/linux/devlink.h):

Hot driver reload, makes configuration changes take place. The
devlink instance is not released during the process.

And the devlink_reload() function in net/core/devlink.c is able to
survive the devlink structure memory freeing only by accident. But the
PCI device removing should do exactly that: call the device removing
callback, which will release the devlink instance.

> +               pr_debug("start remove and rescan flow\n");
> +       }
> +
> +       do {
> +               t7xx_pci_dev_rescan();
> +               spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
> +               if (g_mtk_rescan_context.rescan_done) {
> +                       spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
> +                       break;
> +               }
> +
> +               spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
> +               msleep(DELAY_RESCAN_MTIME);
> +       } while (num_retries--);
> +}

[skipped]

> diff --git a/drivers/net/wwan/t7xx/t7xx_port_wwan.c b/drivers/net/wwan/t7xx/t7xx_port_wwan.c
> index e53651ee2005..dfd7fb487fc0 100644
> --- a/drivers/net/wwan/t7xx/t7xx_port_wwan.c
> +++ b/drivers/net/wwan/t7xx/t7xx_port_wwan.c
> @@ -156,6 +156,12 @@ static void t7xx_port_wwan_md_state_notify(struct t7xx_port *port, unsigned int
>  {
>         const struct t7xx_port_conf *port_conf = port->port_conf;
>
> +       if (state == MD_STATE_EXCEPTION) {
> +               if (port->wwan_port)
> +                       wwan_port_txoff(port->wwan_port);
> +               return;
> +       }
> +

Looks unrelated to the patch description. Does this hunk really belong
to the patch?

>         if (state != MD_STATE_READY)
>                 return;
>

--
Sergey
Kumar, M Chetan Sept. 2, 2022, 4:50 a.m. UTC | #3
On 8/30/2022 7:32 AM, Sergey Ryazanov wrote:
> On Tue, Aug 16, 2022 at 7:12 AM <m.chetan.kumar@intel.com> wrote:
>> From: Haijun Liu <haijun.liu@mediatek.com>
>>
>> PCI rescan module implements "rescan work queue". In firmware flashing
>> or coredump collection procedure WWAN device is programmed to boot in
>> fastboot mode and a work item is scheduled for removal & detection.
>> The WWAN device is reset using APCI call as part driver removal flow.
>> Work queue rescans pci bus at fixed interval for device detection,
>> later when device is detect work queue exits.
> 
> [skipped]
> 
>> diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c
>> index 871f2a27a398..2f5c6fbe601e 100644
>> --- a/drivers/net/wwan/t7xx/t7xx_pci.c
>> +++ b/drivers/net/wwan/t7xx/t7xx_pci.c
>> @@ -715,8 +716,11 @@ static int t7xx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>                  return ret;
>>          }
>>
>> +       t7xx_rescan_done();
>>          t7xx_pcie_mac_set_int(t7xx_dev, MHCCIF_INT);
>>          t7xx_pcie_mac_interrupts_en(t7xx_dev);
>> +       if (!t7xx_dev->hp_enable)
>> +               pci_ignore_hotplug(pdev);
> 
> pci_ignore_hotplug() also disables hotplug events for a parent bridge.
> Is that how this call was intended?

I am checking on it. Will get back.

> 
>>
>>          return 0;
>>   }
> 
> [skipped]
> 
>> +static void __exit t7xx_pci_cleanup(void)
>> +{
>> +       int remove_flag = 0;
>> +       struct device *dev;
>> +
>> +       dev = driver_find_device(&t7xx_pci_driver.driver, NULL, NULL, t7xx_always_match);
>> +       if (dev) {
>> +               pr_debug("unregister t7xx PCIe driver while device is still exist.\n");
>> +               put_device(dev);
>> +               remove_flag = 1;
>> +       } else {
>> +               pr_debug("no t7xx PCIe driver found.\n");
>> +       }
>> +
>> +       pci_lock_rescan_remove();
>> +       pci_unregister_driver(&t7xx_pci_driver);
>> +       pci_unlock_rescan_remove();
>> +       t7xx_rescan_deinit();
>> +
>> +       if (remove_flag) {
>> +               pr_debug("remove t7xx PCI device\n");
>> +               pci_stop_and_remove_bus_device_locked(to_pci_dev(dev));
>> +       }
> 
> What is the purpose of these operations? Should not the PCI core do
> this for us on the driver unregister?

In removal flow the device need to be reset else the next insmod
would result in device handshake failure. So in driver removal flow
the device is reset and current dev is removed using 
pci_stop_and_remove_bus_device_locked().

If hotplug is disabled on PCI Express Root Port, the dev removal
procedure will not happen on device reset. So driver is explicitly
calling pci_stop_and_remove_bus_device_locked().

> 
>> +}
>> +
>> +module_exit(t7xx_pci_cleanup);
>>
>>   MODULE_AUTHOR("MediaTek Inc");
>>   MODULE_DESCRIPTION("MediaTek PCIe 5G WWAN modem T7xx driver");
> 
> [skipped]
> 
>> diff --git a/drivers/net/wwan/t7xx/t7xx_pci_rescan.c b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
>> new file mode 100644
>> index 000000000000..045777d8a843
>> --- /dev/null
>> +++ b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
> 
> [skipped]
> 
>> +void t7xx_pci_dev_rescan(void)
>> +{
>> +       struct pci_bus *b = NULL;
>> +
>> +       pci_lock_rescan_remove();
>> +       while ((b = pci_find_next_bus(b)))
>> +               pci_rescan_bus(b);
> 
> The driver does not need to rescan all buses. The device should appear
> on the same bus, so the driver just needs to rescan a single and
> already known bus.

On device reset the dev is removed so scanning all buses.

> 
>> +
>> +       pci_unlock_rescan_remove();
>> +}
> 
> [skipped]
> 
>> +static void t7xx_remove_rescan(struct work_struct *work)
>> +{
>> +       struct pci_dev *pdev;
>> +       int num_retries = RESCAN_RETRIES;
>> +       unsigned long flags;
>> +
>> +       spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
>> +       g_mtk_rescan_context.rescan_done = 0;
>> +       pdev = g_mtk_rescan_context.dev;
>> +       spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
>> +
>> +       if (pdev) {
>> +               pci_stop_and_remove_bus_device_locked(pdev);
> 
> What is the purpose of removing the device then trying to find it by
> rescanning the bus? Would not it be easier to save a PCI device
> configuration, reset the device, and then restore the configuration?

If hotplug is disabled, the device is not removed on reset. So in this 
case driver need to handle the device removal and rescan.

> The DEVLINK_CMD_RELOAD description states that this command performs
> (see include/uapi/linux/devlink.h):
> 
> Hot driver reload, makes configuration changes take place. The
> devlink instance is not released during the process.
> 
> And the devlink_reload() function in net/core/devlink.c is able to
> survive the devlink structure memory freeing only by accident. But the
> PCI device removing should do exactly that: call the device removing
> callback, which will release the devlink instance. >
>> +               pr_debug("start remove and rescan flow\n");
>> +       }
>> +
>> +       do {
>> +               t7xx_pci_dev_rescan();
>> +               spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
>> +               if (g_mtk_rescan_context.rescan_done) {
>> +                       spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
>> +                       break;
>> +               }
>> +
>> +               spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
>> +               msleep(DELAY_RESCAN_MTIME);
>> +       } while (num_retries--);
>> +}
> 
> [skipped]
> 
>> diff --git a/drivers/net/wwan/t7xx/t7xx_port_wwan.c b/drivers/net/wwan/t7xx/t7xx_port_wwan.c
>> index e53651ee2005..dfd7fb487fc0 100644
>> --- a/drivers/net/wwan/t7xx/t7xx_port_wwan.c
>> +++ b/drivers/net/wwan/t7xx/t7xx_port_wwan.c
>> @@ -156,6 +156,12 @@ static void t7xx_port_wwan_md_state_notify(struct t7xx_port *port, unsigned int
>>   {
>>          const struct t7xx_port_conf *port_conf = port->port_conf;
>>
>> +       if (state == MD_STATE_EXCEPTION) {
>> +               if (port->wwan_port)
>> +                       wwan_port_txoff(port->wwan_port);
>> +               return;
>> +       }
>> +
> 
> Looks unrelated to the patch description. Does this hunk really belong
> to the patch?

Will drop this.

> 
>>          if (state != MD_STATE_READY)
>>                  return;
>>
> 
> --
> Sergey
Sergey Ryazanov Sept. 6, 2022, 10:19 p.m. UTC | #4
On Fri, Sep 2, 2022 at 7:50 AM Kumar, M Chetan
<m.chetan.kumar@linux.intel.com> wrote:
> On 8/30/2022 7:32 AM, Sergey Ryazanov wrote:
>> On Tue, Aug 16, 2022 at 7:12 AM <m.chetan.kumar@intel.com> wrote:
>>> From: Haijun Liu <haijun.liu@mediatek.com>
>>>
>>> PCI rescan module implements "rescan work queue". In firmware flashing
>>> or coredump collection procedure WWAN device is programmed to boot in
>>> fastboot mode and a work item is scheduled for removal & detection.
>>> The WWAN device is reset using APCI call as part driver removal flow.
>>> Work queue rescans pci bus at fixed interval for device detection,
>>> later when device is detect work queue exits.
>>
>> [skipped]
>>
>>> diff --git a/drivers/net/wwan/t7xx/t7xx_pci_rescan.c b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
>>> new file mode 100644
>>> index 000000000000..045777d8a843
>>> --- /dev/null
>>> +++ b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
>>
>> [skipped]
>>
>>> +static void t7xx_remove_rescan(struct work_struct *work)
>>> +{
>>> +       struct pci_dev *pdev;
>>> +       int num_retries = RESCAN_RETRIES;
>>> +       unsigned long flags;
>>> +
>>> +       spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
>>> +       g_mtk_rescan_context.rescan_done = 0;
>>> +       pdev = g_mtk_rescan_context.dev;
>>> +       spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
>>> +
>>> +       if (pdev) {
>>> +               pci_stop_and_remove_bus_device_locked(pdev);
>>
>> What is the purpose of removing the device then trying to find it by
>> rescanning the bus? Would not it be easier to save a PCI device
>> configuration, reset the device, and then restore the configuration?
>
> If hotplug is disabled, the device is not removed on reset. So in this
> case driver need to handle the device removal and rescan.

I still can not understand this part and need a clue. Why should the
driver disable the hotplug?

And is there a more gentle way to reset the firmware without the
device object removing?

>>> +               pr_debug("start remove and rescan flow\n");
>>> +       }
>>> +
>>> +       do {
>>> +               t7xx_pci_dev_rescan();
>>> +               spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
>>> +               if (g_mtk_rescan_context.rescan_done) {
>>> +                       spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
>>> +                       break;
>>> +               }
>>> +
>>> +               spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
>>> +               msleep(DELAY_RESCAN_MTIME);
>>> +       } while (num_retries--);
>>> +}
Kumar, M Chetan Sept. 9, 2022, 12:20 p.m. UTC | #5
On 9/7/2022 3:49 AM, Sergey Ryazanov wrote:
> On Fri, Sep 2, 2022 at 7:50 AM Kumar, M Chetan
> <m.chetan.kumar@linux.intel.com> wrote:
>> On 8/30/2022 7:32 AM, Sergey Ryazanov wrote:
>>> On Tue, Aug 16, 2022 at 7:12 AM <m.chetan.kumar@intel.com> wrote:
>>>> From: Haijun Liu <haijun.liu@mediatek.com>
>>>>
>>>> PCI rescan module implements "rescan work queue". In firmware flashing
>>>> or coredump collection procedure WWAN device is programmed to boot in
>>>> fastboot mode and a work item is scheduled for removal & detection.
>>>> The WWAN device is reset using APCI call as part driver removal flow.
>>>> Work queue rescans pci bus at fixed interval for device detection,
>>>> later when device is detect work queue exits.
>>>
>>> [skipped]
>>>
>>>> diff --git a/drivers/net/wwan/t7xx/t7xx_pci_rescan.c b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
>>>> new file mode 100644
>>>> index 000000000000..045777d8a843
>>>> --- /dev/null
>>>> +++ b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
>>>
>>> [skipped]
>>>
>>>> +static void t7xx_remove_rescan(struct work_struct *work)
>>>> +{
>>>> +       struct pci_dev *pdev;
>>>> +       int num_retries = RESCAN_RETRIES;
>>>> +       unsigned long flags;
>>>> +
>>>> +       spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
>>>> +       g_mtk_rescan_context.rescan_done = 0;
>>>> +       pdev = g_mtk_rescan_context.dev;
>>>> +       spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
>>>> +
>>>> +       if (pdev) {
>>>> +               pci_stop_and_remove_bus_device_locked(pdev);
>>>
>>> What is the purpose of removing the device then trying to find it by
>>> rescanning the bus? Would not it be easier to save a PCI device
>>> configuration, reset the device, and then restore the configuration?
>>
>> If hotplug is disabled, the device is not removed on reset. So in this
>> case driver need to handle the device removal and rescan.
> 
> I still can not understand this part and need a clue. Why should the
> driver disable the hotplug?

This is a platform configuration, it could be set to enable/disable.
We can find this option in BIOS settings.

> And is there a more gentle way to reset the firmware without the
> device object removing?

Device reset causes WWAN device to fall off the BUS. If device had not 
fallen off the bus then we could have reused.

Without these changes, we need to manually execute device remove & 
rescan commands.

Could you please suggest how can we proceed here ?
diff mbox series

Patch

diff --git a/drivers/net/wwan/t7xx/Makefile b/drivers/net/wwan/t7xx/Makefile
index dc6a7d682c15..df42764b3df8 100644
--- a/drivers/net/wwan/t7xx/Makefile
+++ b/drivers/net/wwan/t7xx/Makefile
@@ -17,4 +17,5 @@  mtk_t7xx-y:=	t7xx_pci.o \
 		t7xx_hif_dpmaif_tx.o \
 		t7xx_hif_dpmaif_rx.o  \
 		t7xx_dpmaif.o \
-		t7xx_netdev.o
+		t7xx_netdev.o \
+		t7xx_pci_rescan.o
diff --git a/drivers/net/wwan/t7xx/t7xx_modem_ops.c b/drivers/net/wwan/t7xx/t7xx_modem_ops.c
index ec2269dfaf2c..fb79d041dbf5 100644
--- a/drivers/net/wwan/t7xx/t7xx_modem_ops.c
+++ b/drivers/net/wwan/t7xx/t7xx_modem_ops.c
@@ -37,6 +37,7 @@ 
 #include "t7xx_modem_ops.h"
 #include "t7xx_netdev.h"
 #include "t7xx_pci.h"
+#include "t7xx_pci_rescan.h"
 #include "t7xx_pcie_mac.h"
 #include "t7xx_port.h"
 #include "t7xx_port_proxy.h"
@@ -192,6 +193,10 @@  static irqreturn_t t7xx_rgu_isr_thread(int irq, void *data)
 
 	msleep(RGU_RESET_DELAY_MS);
 	t7xx_reset_device_via_pmic(t7xx_dev);
+
+	if (!t7xx_dev->hp_enable)
+		t7xx_rescan_queue_work(t7xx_dev->pdev);
+
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c
index 871f2a27a398..2f5c6fbe601e 100644
--- a/drivers/net/wwan/t7xx/t7xx_pci.c
+++ b/drivers/net/wwan/t7xx/t7xx_pci.c
@@ -38,6 +38,7 @@ 
 #include "t7xx_mhccif.h"
 #include "t7xx_modem_ops.h"
 #include "t7xx_pci.h"
+#include "t7xx_pci_rescan.h"
 #include "t7xx_pcie_mac.h"
 #include "t7xx_reg.h"
 #include "t7xx_state_monitor.h"
@@ -715,8 +716,11 @@  static int t7xx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return ret;
 	}
 
+	t7xx_rescan_done();
 	t7xx_pcie_mac_set_int(t7xx_dev, MHCCIF_INT);
 	t7xx_pcie_mac_interrupts_en(t7xx_dev);
+	if (!t7xx_dev->hp_enable)
+		pci_ignore_hotplug(pdev);
 
 	return 0;
 }
@@ -754,7 +758,52 @@  static struct pci_driver t7xx_pci_driver = {
 	.shutdown = t7xx_pci_shutdown,
 };
 
-module_pci_driver(t7xx_pci_driver);
+static int __init t7xx_pci_init(void)
+{
+	int ret;
+
+	t7xx_pci_dev_rescan();
+	ret = t7xx_rescan_init();
+	if (ret) {
+		pr_err("Failed to init t7xx rescan work\n");
+		return ret;
+	}
+
+	return pci_register_driver(&t7xx_pci_driver);
+}
+module_init(t7xx_pci_init);
+
+static int t7xx_always_match(struct device *dev, const void *data)
+{
+	return dev->parent->fwnode == data;
+}
+
+static void __exit t7xx_pci_cleanup(void)
+{
+	int remove_flag = 0;
+	struct device *dev;
+
+	dev = driver_find_device(&t7xx_pci_driver.driver, NULL, NULL, t7xx_always_match);
+	if (dev) {
+		pr_debug("unregister t7xx PCIe driver while device is still exist.\n");
+		put_device(dev);
+		remove_flag = 1;
+	} else {
+		pr_debug("no t7xx PCIe driver found.\n");
+	}
+
+	pci_lock_rescan_remove();
+	pci_unregister_driver(&t7xx_pci_driver);
+	pci_unlock_rescan_remove();
+	t7xx_rescan_deinit();
+
+	if (remove_flag) {
+		pr_debug("remove t7xx PCI device\n");
+		pci_stop_and_remove_bus_device_locked(to_pci_dev(dev));
+	}
+}
+
+module_exit(t7xx_pci_cleanup);
 
 MODULE_AUTHOR("MediaTek Inc");
 MODULE_DESCRIPTION("MediaTek PCIe 5G WWAN modem T7xx driver");
diff --git a/drivers/net/wwan/t7xx/t7xx_pci.h b/drivers/net/wwan/t7xx/t7xx_pci.h
index 50b37056ce5a..a87c4cae94ef 100644
--- a/drivers/net/wwan/t7xx/t7xx_pci.h
+++ b/drivers/net/wwan/t7xx/t7xx_pci.h
@@ -69,6 +69,7 @@  struct t7xx_pci_dev {
 	struct t7xx_modem	*md;
 	struct t7xx_ccmni_ctrl	*ccmni_ctlb;
 	bool			rgu_pci_irq_en;
+	bool			hp_enable;
 
 	/* Low Power Items */
 	struct list_head	md_pm_entities;
diff --git a/drivers/net/wwan/t7xx/t7xx_pci_rescan.c b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
new file mode 100644
index 000000000000..045777d8a843
--- /dev/null
+++ b/drivers/net/wwan/t7xx/t7xx_pci_rescan.c
@@ -0,0 +1,117 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, MediaTek Inc.
+ * Copyright (c) 2021-2022, Intel Corporation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ":t7xx:%s: " fmt, __func__
+#define dev_fmt(fmt) "t7xx: " fmt
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "t7xx_pci.h"
+#include "t7xx_pci_rescan.h"
+
+static struct remove_rescan_context g_mtk_rescan_context;
+
+void t7xx_pci_dev_rescan(void)
+{
+	struct pci_bus *b = NULL;
+
+	pci_lock_rescan_remove();
+	while ((b = pci_find_next_bus(b)))
+		pci_rescan_bus(b);
+
+	pci_unlock_rescan_remove();
+}
+
+void t7xx_rescan_done(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
+	if (g_mtk_rescan_context.rescan_done == 0) {
+		pr_debug("this is a rescan probe\n");
+		g_mtk_rescan_context.rescan_done = 1;
+	} else {
+		pr_debug("this is a init probe\n");
+	}
+	spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
+}
+
+static void t7xx_remove_rescan(struct work_struct *work)
+{
+	struct pci_dev *pdev;
+	int num_retries = RESCAN_RETRIES;
+	unsigned long flags;
+
+	spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
+	g_mtk_rescan_context.rescan_done = 0;
+	pdev = g_mtk_rescan_context.dev;
+	spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
+
+	if (pdev) {
+		pci_stop_and_remove_bus_device_locked(pdev);
+		pr_debug("start remove and rescan flow\n");
+	}
+
+	do {
+		t7xx_pci_dev_rescan();
+		spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
+		if (g_mtk_rescan_context.rescan_done) {
+			spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
+		msleep(DELAY_RESCAN_MTIME);
+	} while (num_retries--);
+}
+
+void t7xx_rescan_queue_work(struct pci_dev *pdev)
+{
+	unsigned long flags;
+
+	dev_info(&pdev->dev, "start queue_mtk_rescan_work\n");
+	spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
+	if (!g_mtk_rescan_context.rescan_done) {
+		dev_err(&pdev->dev, "rescan failed because last rescan undone\n");
+		spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
+		return;
+	}
+
+	g_mtk_rescan_context.dev = pdev;
+	spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
+	queue_work(g_mtk_rescan_context.pcie_rescan_wq, &g_mtk_rescan_context.service_task);
+}
+
+int t7xx_rescan_init(void)
+{
+	spin_lock_init(&g_mtk_rescan_context.dev_lock);
+	g_mtk_rescan_context.rescan_done = 1;
+	g_mtk_rescan_context.dev = NULL;
+	g_mtk_rescan_context.pcie_rescan_wq = create_singlethread_workqueue(MTK_RESCAN_WQ);
+	if (!g_mtk_rescan_context.pcie_rescan_wq) {
+		pr_err("Failed to create workqueue: %s\n", MTK_RESCAN_WQ);
+		return -ENOMEM;
+	}
+
+	INIT_WORK(&g_mtk_rescan_context.service_task, t7xx_remove_rescan);
+
+	return 0;
+}
+
+void t7xx_rescan_deinit(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&g_mtk_rescan_context.dev_lock, flags);
+	g_mtk_rescan_context.rescan_done = 0;
+	g_mtk_rescan_context.dev = NULL;
+	spin_unlock_irqrestore(&g_mtk_rescan_context.dev_lock, flags);
+	cancel_work_sync(&g_mtk_rescan_context.service_task);
+	destroy_workqueue(g_mtk_rescan_context.pcie_rescan_wq);
+}
diff --git a/drivers/net/wwan/t7xx/t7xx_pci_rescan.h b/drivers/net/wwan/t7xx/t7xx_pci_rescan.h
new file mode 100644
index 000000000000..de4ca1363bb0
--- /dev/null
+++ b/drivers/net/wwan/t7xx/t7xx_pci_rescan.h
@@ -0,0 +1,29 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2021, MediaTek Inc.
+ * Copyright (c) 2021-2022, Intel Corporation.
+ */
+
+#ifndef __T7XX_PCI_RESCAN_H__
+#define __T7XX_PCI_RESCAN_H__
+
+#define MTK_RESCAN_WQ "mtk_rescan_wq"
+
+#define DELAY_RESCAN_MTIME 1000
+#define RESCAN_RETRIES 35
+
+struct remove_rescan_context {
+	struct work_struct	 service_task;
+	struct workqueue_struct *pcie_rescan_wq;
+	spinlock_t		dev_lock; /* protects device */
+	struct pci_dev		*dev;
+	int			rescan_done;
+};
+
+void t7xx_pci_dev_rescan(void);
+void t7xx_rescan_queue_work(struct pci_dev *pdev);
+int t7xx_rescan_init(void);
+void t7xx_rescan_deinit(void);
+void t7xx_rescan_done(void);
+
+#endif	/* __T7XX_PCI_RESCAN_H__ */
diff --git a/drivers/net/wwan/t7xx/t7xx_port_wwan.c b/drivers/net/wwan/t7xx/t7xx_port_wwan.c
index e53651ee2005..dfd7fb487fc0 100644
--- a/drivers/net/wwan/t7xx/t7xx_port_wwan.c
+++ b/drivers/net/wwan/t7xx/t7xx_port_wwan.c
@@ -156,6 +156,12 @@  static void t7xx_port_wwan_md_state_notify(struct t7xx_port *port, unsigned int
 {
 	const struct t7xx_port_conf *port_conf = port->port_conf;
 
+	if (state == MD_STATE_EXCEPTION) {
+		if (port->wwan_port)
+			wwan_port_txoff(port->wwan_port);
+		return;
+	}
+
 	if (state != MD_STATE_READY)
 		return;
 
diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.c b/drivers/net/wwan/t7xx/t7xx_state_monitor.c
index c1789a558c9d..9c222809371b 100644
--- a/drivers/net/wwan/t7xx/t7xx_state_monitor.c
+++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.c
@@ -35,9 +35,11 @@ 
 #include "t7xx_hif_cldma.h"
 #include "t7xx_mhccif.h"
 #include "t7xx_modem_ops.h"
+#include "t7xx_netdev.h"
 #include "t7xx_pci.h"
 #include "t7xx_pcie_mac.h"
 #include "t7xx_port_proxy.h"
+#include "t7xx_pci_rescan.h"
 #include "t7xx_reg.h"
 #include "t7xx_state_monitor.h"