diff mbox

[5/5] mwifiex: wait for firmware dump completion in remove_card

Message ID 1477318892-22877-5-git-send-email-akarwar@marvell.com (mailing list archive)
State Changes Requested
Delegated to: Kalle Valo
Headers show

Commit Message

Amitkumar Karwar Oct. 24, 2016, 2:21 p.m. UTC
From: Xinming Hu <huxm@marvell.com>

This patch ensures to wait for firmware dump completion in
mwifiex_remove_card().

For sdio interface, reset_trigger variable is used to identify
if mwifiex_sdio_remove() is called by sdio_work during reset or
the call is from sdio subsystem.

This patch fixes a kernel crash observed during reboot when firmware
dump operation is in process.

Signed-off-by: Xinming Hu <huxm@marvell.com>
Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
---
 drivers/net/wireless/marvell/mwifiex/pcie.c |  2 ++
 drivers/net/wireless/marvell/mwifiex/sdio.c | 15 ++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

Comments

Brian Norris Oct. 24, 2016, 8:23 p.m. UTC | #1
On Mon, Oct 24, 2016 at 07:51:32PM +0530, Amitkumar Karwar wrote:
> From: Xinming Hu <huxm@marvell.com>
> 
> This patch ensures to wait for firmware dump completion in
> mwifiex_remove_card().
> 
> For sdio interface, reset_trigger variable is used to identify
> if mwifiex_sdio_remove() is called by sdio_work during reset or
> the call is from sdio subsystem.
> 
> This patch fixes a kernel crash observed during reboot when firmware
> dump operation is in process.
> 
> Signed-off-by: Xinming Hu <huxm@marvell.com>
> Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
> ---
>  drivers/net/wireless/marvell/mwifiex/pcie.c |  2 ++
>  drivers/net/wireless/marvell/mwifiex/sdio.c | 15 ++++++++++++++-
>  2 files changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
> index 986bf07..4512e86 100644
> --- a/drivers/net/wireless/marvell/mwifiex/pcie.c
> +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
> @@ -528,6 +528,8 @@ static void mwifiex_pcie_remove(struct pci_dev *pdev)
>  	if (!adapter || !adapter->priv_num)
>  		return;
>  
> +	cancel_work_sync(&pcie_work);

Is there a good reason you have to cancel the work? What if you were
just to finish it (i.e., flush_work())?

In any case, I think this is fine, so for the PCIe bits:

Reviewed-by: Brian Norris <briannorris@chromium.org>

> +
>  	if (user_rmmod && !adapter->mfg_mode) {
>  #ifdef CONFIG_PM_SLEEP
>  		if (adapter->is_suspended)
> diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c
> index 4cad1c2..f974260 100644
> --- a/drivers/net/wireless/marvell/mwifiex/sdio.c
> +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
> @@ -46,6 +46,15 @@
>   */
>  static u8 user_rmmod;
>  
> +/* reset_trigger variable is used to identify if mwifiex_sdio_remove()
> + * is called by sdio_work during reset or the call is from sdio subsystem.
> + * We will cancel sdio_work only if the call is from sdio subsystem.
> + */
> +static u8 reset_triggered;
> +
> +static void mwifiex_sdio_work(struct work_struct *work);
> +static DECLARE_WORK(sdio_work, mwifiex_sdio_work);
> +
>  static struct mwifiex_if_ops sdio_ops;
>  static unsigned long iface_work_flags;
>  
> @@ -289,6 +298,9 @@ mwifiex_sdio_remove(struct sdio_func *func)
>  	if (!adapter || !adapter->priv_num)
>  		return;
>  
> +	if (!reset_triggered)
> +		cancel_work_sync(&sdio_work);
> +
>  	mwifiex_dbg(adapter, INFO, "info: SDIO func num=%d\n", func->num);
>  
>  	if (user_rmmod && !adapter->mfg_mode) {
> @@ -2290,7 +2302,9 @@ static void mwifiex_recreate_adapter(struct sdio_mmc_card *card)
>  	 * discovered and initializes them from scratch.
>  	 */
>  
> +	reset_triggered = 1;
>  	mwifiex_sdio_remove(func);
> +	reset_triggered = 0;

Boy that's ugly! Couldn't you just create something like
__mwifiex_sdio_remove(), which does everything except the
cancel_work_sync()? Then you'd do this for the .remove() callback:

	cancel_work_sync(&sdio_work);
	__mwifiex_sdio_remove(func);

and for mwifiex_recreate_adapter() you'd just call
__mwifiex_sdio_remove()? The static variable that simply serves as a
(non-reentrant) function call parameter seems like a really poor
alternative to this simple refactoring.

Or you could just address the TODO in this function, and you wouldn't
have to do this dance at all...

Brian

>  
>  	/* power cycle the adapter */
>  	sdio_claim_host(func);
> @@ -2621,7 +2635,6 @@ static void mwifiex_sdio_work(struct work_struct *work)
>  		mwifiex_sdio_card_reset_work(save_adapter);
>  }
>  
> -static DECLARE_WORK(sdio_work, mwifiex_sdio_work);
>  /* This function resets the card */
>  static void mwifiex_sdio_card_reset(struct mwifiex_adapter *adapter)
>  {
> -- 
> 1.9.1
>
Dmitry Torokhov Oct. 25, 2016, 12:14 a.m. UTC | #2
On Mon, Oct 24, 2016 at 07:51:32PM +0530, Amitkumar Karwar wrote:
> From: Xinming Hu <huxm@marvell.com>
> 
> This patch ensures to wait for firmware dump completion in
> mwifiex_remove_card().
> 
> For sdio interface, reset_trigger variable is used to identify
> if mwifiex_sdio_remove() is called by sdio_work during reset or
> the call is from sdio subsystem.
> 
> This patch fixes a kernel crash observed during reboot when firmware
> dump operation is in process.
> 
> Signed-off-by: Xinming Hu <huxm@marvell.com>
> Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
> ---
>  drivers/net/wireless/marvell/mwifiex/pcie.c |  2 ++
>  drivers/net/wireless/marvell/mwifiex/sdio.c | 15 ++++++++++++++-
>  2 files changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
> index 986bf07..4512e86 100644
> --- a/drivers/net/wireless/marvell/mwifiex/pcie.c
> +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
> @@ -528,6 +528,8 @@ static void mwifiex_pcie_remove(struct pci_dev *pdev)
>  	if (!adapter || !adapter->priv_num)
>  		return;
>  
> +	cancel_work_sync(&pcie_work);
> +
>  	if (user_rmmod && !adapter->mfg_mode) {
>  #ifdef CONFIG_PM_SLEEP
>  		if (adapter->is_suspended)
> diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c
> index 4cad1c2..f974260 100644
> --- a/drivers/net/wireless/marvell/mwifiex/sdio.c
> +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
> @@ -46,6 +46,15 @@
>   */
>  static u8 user_rmmod;
>  
> +/* reset_trigger variable is used to identify if mwifiex_sdio_remove()
> + * is called by sdio_work during reset or the call is from sdio subsystem.
> + * We will cancel sdio_work only if the call is from sdio subsystem.
> + */
> +static u8 reset_triggered;

It would be really great if the driver supported multiple devices. IOW
please avoid module-globals.

> +
> +static void mwifiex_sdio_work(struct work_struct *work);
> +static DECLARE_WORK(sdio_work, mwifiex_sdio_work);
> +
>  static struct mwifiex_if_ops sdio_ops;
>  static unsigned long iface_work_flags;
>  
> @@ -289,6 +298,9 @@ mwifiex_sdio_remove(struct sdio_func *func)
>  	if (!adapter || !adapter->priv_num)
>  		return;
>  
> +	if (!reset_triggered)
> +		cancel_work_sync(&sdio_work);
> +
>  	mwifiex_dbg(adapter, INFO, "info: SDIO func num=%d\n", func->num);
>  
>  	if (user_rmmod && !adapter->mfg_mode) {
> @@ -2290,7 +2302,9 @@ static void mwifiex_recreate_adapter(struct sdio_mmc_card *card)
>  	 * discovered and initializes them from scratch.
>  	 */
>  
> +	reset_triggered = 1;
>  	mwifiex_sdio_remove(func);
> +	reset_triggered = 0;

What exactly happens if I trigger mwifiex_sdio_remove() from sysfs at
the same time this code is running?

>  
>  	/* power cycle the adapter */
>  	sdio_claim_host(func);
> @@ -2621,7 +2635,6 @@ static void mwifiex_sdio_work(struct work_struct *work)
>  		mwifiex_sdio_card_reset_work(save_adapter);
>  }
>  
> -static DECLARE_WORK(sdio_work, mwifiex_sdio_work);
>  /* This function resets the card */
>  static void mwifiex_sdio_card_reset(struct mwifiex_adapter *adapter)
>  {
> -- 
> 1.9.1
>
Amitkumar Karwar Oct. 25, 2016, 4:20 p.m. UTC | #3
Hi Dmitry,

> From: Dmitry Torokhov [mailto:dmitry.torokhov@gmail.com]
> Sent: Tuesday, October 25, 2016 5:44 AM
> To: Amitkumar Karwar
> Cc: linux-wireless@vger.kernel.org; Cathy Luo; Nishant Sarmukadam;
> briannorris@google.com; Xinming Hu
> Subject: Re: [PATCH 5/5] mwifiex: wait for firmware dump completion in
> remove_card
> 
> On Mon, Oct 24, 2016 at 07:51:32PM +0530, Amitkumar Karwar wrote:
> > From: Xinming Hu <huxm@marvell.com>
> >
> > This patch ensures to wait for firmware dump completion in
> > mwifiex_remove_card().
> >
> > For sdio interface, reset_trigger variable is used to identify if
> > mwifiex_sdio_remove() is called by sdio_work during reset or the call
> > is from sdio subsystem.
> >
> > This patch fixes a kernel crash observed during reboot when firmware
> > dump operation is in process.
> >
> > Signed-off-by: Xinming Hu <huxm@marvell.com>
> > Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
> > ---
> >  drivers/net/wireless/marvell/mwifiex/pcie.c |  2 ++
> > drivers/net/wireless/marvell/mwifiex/sdio.c | 15 ++++++++++++++-
> >  2 files changed, 16 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c
> > b/drivers/net/wireless/marvell/mwifiex/pcie.c
> > index 986bf07..4512e86 100644
> > --- a/drivers/net/wireless/marvell/mwifiex/pcie.c
> > +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
> > @@ -528,6 +528,8 @@ static void mwifiex_pcie_remove(struct pci_dev
> *pdev)
> >  	if (!adapter || !adapter->priv_num)
> >  		return;
> >
> > +	cancel_work_sync(&pcie_work);
> > +
> >  	if (user_rmmod && !adapter->mfg_mode) {  #ifdef CONFIG_PM_SLEEP
> >  		if (adapter->is_suspended)
> > diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c
> > b/drivers/net/wireless/marvell/mwifiex/sdio.c
> > index 4cad1c2..f974260 100644
> > --- a/drivers/net/wireless/marvell/mwifiex/sdio.c
> > +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
> > @@ -46,6 +46,15 @@
> >   */
> >  static u8 user_rmmod;
> >
> > +/* reset_trigger variable is used to identify if
> > +mwifiex_sdio_remove()
> > + * is called by sdio_work during reset or the call is from sdio
> subsystem.
> > + * We will cancel sdio_work only if the call is from sdio subsystem.
> > + */
> > +static u8 reset_triggered;
> 
> It would be really great if the driver supported multiple devices. IOW
> please avoid module-globals.

You are right. As Brian pointed out in other email, I will refactor the code and get rid of global variable.

> 
> > +
> > +static void mwifiex_sdio_work(struct work_struct *work); static
> > +DECLARE_WORK(sdio_work, mwifiex_sdio_work);
> > +
> >  static struct mwifiex_if_ops sdio_ops;  static unsigned long
> > iface_work_flags;
> >
> > @@ -289,6 +298,9 @@ mwifiex_sdio_remove(struct sdio_func *func)
> >  	if (!adapter || !adapter->priv_num)
> >  		return;
> >
> > +	if (!reset_triggered)
> > +		cancel_work_sync(&sdio_work);
> > +
> >  	mwifiex_dbg(adapter, INFO, "info: SDIO func num=%d\n", func->num);
> >
> >  	if (user_rmmod && !adapter->mfg_mode) { @@ -2290,7 +2302,9 @@
> static
> > void mwifiex_recreate_adapter(struct sdio_mmc_card *card)
> >  	 * discovered and initializes them from scratch.
> >  	 */
> >
> > +	reset_triggered = 1;
> >  	mwifiex_sdio_remove(func);
> > +	reset_triggered = 0;
> 
> What exactly happens if I trigger mwifiex_sdio_remove() from sysfs at
> the same time this code is running?

Yes. There would be a race. It will be avoided with Brian's code refactoring approach.

Regards,
Amitkumar
Amitkumar Karwar Oct. 25, 2016, 4:30 p.m. UTC | #4
Hi Brian,

> From: Brian Norris [mailto:briannorris@chromium.org]
> Sent: Tuesday, October 25, 2016 1:54 AM
> To: Amitkumar Karwar
> Cc: linux-wireless@vger.kernel.org; Cathy Luo; Nishant Sarmukadam;
> dmitry.torokhov@gmail.com; Xinming Hu
> Subject: Re: [PATCH 5/5] mwifiex: wait for firmware dump completion in
> remove_card
> 
> On Mon, Oct 24, 2016 at 07:51:32PM +0530, Amitkumar Karwar wrote:
> > From: Xinming Hu <huxm@marvell.com>
> >
> > This patch ensures to wait for firmware dump completion in
> > mwifiex_remove_card().
> >
> > For sdio interface, reset_trigger variable is used to identify if
> > mwifiex_sdio_remove() is called by sdio_work during reset or the call
> > is from sdio subsystem.
> >
> > This patch fixes a kernel crash observed during reboot when firmware
> > dump operation is in process.
> >
> > Signed-off-by: Xinming Hu <huxm@marvell.com>
> > Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
> > ---
> >  drivers/net/wireless/marvell/mwifiex/pcie.c |  2 ++
> > drivers/net/wireless/marvell/mwifiex/sdio.c | 15 ++++++++++++++-
> >  2 files changed, 16 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c
> > b/drivers/net/wireless/marvell/mwifiex/pcie.c
> > index 986bf07..4512e86 100644
> > --- a/drivers/net/wireless/marvell/mwifiex/pcie.c
> > +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
> > @@ -528,6 +528,8 @@ static void mwifiex_pcie_remove(struct pci_dev
> *pdev)
> >  	if (!adapter || !adapter->priv_num)
> >  		return;
> >
> > +	cancel_work_sync(&pcie_work);
> 
> Is there a good reason you have to cancel the work? What if you were
> just to finish it (i.e., flush_work())?

I assume if work is running, cancel_work_sync() will wait until completion. Otherwise it will cancel queued work. Firmware dump takes 20-30 seconds to complete. I think, it's ok to cancel it if work is just queued and not running yet. Reboot taking significant time due to our wait in remove handler may not be a good experience from end user point of view.

If you prefer flush_work(), I can use the same.

> 
> In any case, I think this is fine, so for the PCIe bits:
> 
> Reviewed-by: Brian Norris <briannorris@chromium.org>
> 
> > +
> >  	if (user_rmmod && !adapter->mfg_mode) {  #ifdef CONFIG_PM_SLEEP
> >  		if (adapter->is_suspended)
> > diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c
> > b/drivers/net/wireless/marvell/mwifiex/sdio.c
> > index 4cad1c2..f974260 100644
> > --- a/drivers/net/wireless/marvell/mwifiex/sdio.c
> > +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
> > @@ -46,6 +46,15 @@
> >   */
> >  static u8 user_rmmod;
> >
> > +/* reset_trigger variable is used to identify if
> > +mwifiex_sdio_remove()
> > + * is called by sdio_work during reset or the call is from sdio
> subsystem.
> > + * We will cancel sdio_work only if the call is from sdio subsystem.
> > + */
> > +static u8 reset_triggered;
> > +
> > +static void mwifiex_sdio_work(struct work_struct *work); static
> > +DECLARE_WORK(sdio_work, mwifiex_sdio_work);
> > +
> >  static struct mwifiex_if_ops sdio_ops;  static unsigned long
> > iface_work_flags;
> >
> > @@ -289,6 +298,9 @@ mwifiex_sdio_remove(struct sdio_func *func)
> >  	if (!adapter || !adapter->priv_num)
> >  		return;
> >
> > +	if (!reset_triggered)
> > +		cancel_work_sync(&sdio_work);
> > +
> >  	mwifiex_dbg(adapter, INFO, "info: SDIO func num=%d\n", func->num);
> >
> >  	if (user_rmmod && !adapter->mfg_mode) { @@ -2290,7 +2302,9 @@
> static
> > void mwifiex_recreate_adapter(struct sdio_mmc_card *card)
> >  	 * discovered and initializes them from scratch.
> >  	 */
> >
> > +	reset_triggered = 1;
> >  	mwifiex_sdio_remove(func);
> > +	reset_triggered = 0;
> 
> Boy that's ugly! Couldn't you just create something like
> __mwifiex_sdio_remove(), which does everything except the
> cancel_work_sync()? Then you'd do this for the .remove() callback:
> 
> 	cancel_work_sync(&sdio_work);
> 	__mwifiex_sdio_remove(func);
> 
> and for mwifiex_recreate_adapter() you'd just call
> __mwifiex_sdio_remove()? The static variable that simply serves as a
> (non-reentrant) function call parameter seems like a really poor
> alternative to this simple refactoring.

Thanks a lot for the suggestion.
I will use this approach in updated version.

Regards,
Amitkumar
Kalle Valo Oct. 27, 2016, 1:20 p.m. UTC | #5
Dmitry Torokhov <dmitry.torokhov@gmail.com> writes:

>> +/* reset_trigger variable is used to identify if mwifiex_sdio_remove()
>> + * is called by sdio_work during reset or the call is from sdio subsystem.
>> + * We will cancel sdio_work only if the call is from sdio subsystem.
>> + */
>> +static u8 reset_triggered;
>
> It would be really great if the driver supported multiple devices. IOW
> please avoid module-globals.

Good catch, it's a hard requirement to support multiple devices at the
same time.
Brian Norris Nov. 2, 2016, 8:45 p.m. UTC | #6
On Thu, Oct 27, 2016 at 04:20:25PM +0300, Kalle Valo wrote:
> Dmitry Torokhov <dmitry.torokhov@gmail.com> writes:
> 
> >> +/* reset_trigger variable is used to identify if mwifiex_sdio_remove()
> >> + * is called by sdio_work during reset or the call is from sdio subsystem.
> >> + * We will cancel sdio_work only if the call is from sdio subsystem.
> >> + */
> >> +static u8 reset_triggered;
> >
> > It would be really great if the driver supported multiple devices. IOW
> > please avoid module-globals.
> 
> Good catch, it's a hard requirement to support multiple devices at the
> same time.

BTW, this problem is repeated in several places throughout this driver.
For instance, look for 'user_rmmod' (why? you shouldn't need to treat
module unload differently...) and the work structs (and corresponding
'saved_adapter' and 'iface_flags') used for PCIe function-level reset
and SDIO reset.

Hopefully either Marvell's or my cleanups can move to get rid of these
anti-patterns soon...

Brian
Amitkumar Karwar Nov. 9, 2016, 12:35 p.m. UTC | #7
Hi Brian,

> From: Brian Norris [mailto:briannorris@chromium.org]
> Sent: Thursday, November 03, 2016 2:16 AM
> To: Kalle Valo
> Cc: Dmitry Torokhov; Amitkumar Karwar; linux-wireless@vger.kernel.org;
> Cathy Luo; Nishant Sarmukadam; Xinming Hu
> Subject: Re: [PATCH 5/5] mwifiex: wait for firmware dump completion in
> remove_card
> 
> On Thu, Oct 27, 2016 at 04:20:25PM +0300, Kalle Valo wrote:
> > Dmitry Torokhov <dmitry.torokhov@gmail.com> writes:
> >
> > >> +/* reset_trigger variable is used to identify if
> > >> +mwifiex_sdio_remove()
> > >> + * is called by sdio_work during reset or the call is from sdio
> subsystem.
> > >> + * We will cancel sdio_work only if the call is from sdio
> subsystem.
> > >> + */
> > >> +static u8 reset_triggered;
> > >
> > > It would be really great if the driver supported multiple devices.
> > > IOW please avoid module-globals.
> >
> > Good catch, it's a hard requirement to support multiple devices at the
> > same time.
> 
> BTW, this problem is repeated in several places throughout this driver.
> For instance, look for 'user_rmmod' (why? you shouldn't need to treat
> module unload differently...)

We have 2 kinds of teardown cases.
1) Chip is going to be powered off.
	a) System reboot
	b) Someone manually removed wifi card from system
2) User unloaded the driver.

In case 1. b), we can't have logic to terminate WIFI connection and download SHUTDOWN command to firmware, as hardware itself is not present.

This logic is useful when user just unloads and loads the driver. Firmware download will be skipped in this case, as it's already running. SHUTDOWN command sent during unload has cleared firmware's state. 

'user_rmmod' flag doesn't create problem for supporting multiple devices. The flag is true during module unload OR reboot. It's applicable for all devices.

> and the work structs (and corresponding
> 'saved_adapter' and 'iface_flags') used for PCIe function-level reset
> and SDIO reset.

We are working on the v3 of this patch series. We will try to get rid of these variables along with global "work_struct" as you suggested.

Regards,
Amitkumar
Brian Norris Nov. 9, 2016, 8:37 p.m. UTC | #8
On Wed, Nov 09, 2016 at 12:35:22PM +0000, Amitkumar Karwar wrote:
> Hi Brian,

Hi,

> > From: Brian Norris [mailto:briannorris@chromium.org]
> > Sent: Thursday, November 03, 2016 2:16 AM
> > To: Kalle Valo
> > Cc: Dmitry Torokhov; Amitkumar Karwar; linux-wireless@vger.kernel.org;
> > Cathy Luo; Nishant Sarmukadam; Xinming Hu
> > Subject: Re: [PATCH 5/5] mwifiex: wait for firmware dump completion in
> > remove_card
> > 
> > On Thu, Oct 27, 2016 at 04:20:25PM +0300, Kalle Valo wrote:
> > > Dmitry Torokhov <dmitry.torokhov@gmail.com> writes:
> > >
> > > >> +/* reset_trigger variable is used to identify if
> > > >> +mwifiex_sdio_remove()
> > > >> + * is called by sdio_work during reset or the call is from sdio
> > subsystem.
> > > >> + * We will cancel sdio_work only if the call is from sdio
> > subsystem.
> > > >> + */
> > > >> +static u8 reset_triggered;
> > > >
> > > > It would be really great if the driver supported multiple devices.
> > > > IOW please avoid module-globals.
> > >
> > > Good catch, it's a hard requirement to support multiple devices at the
> > > same time.
> > 
> > BTW, this problem is repeated in several places throughout this driver.
> > For instance, look for 'user_rmmod' (why? you shouldn't need to treat
> > module unload differently...)
> 
> We have 2 kinds of teardown cases.
> 1) Chip is going to be powered off.
> 	a) System reboot
> 	b) Someone manually removed wifi card from system
> 2) User unloaded the driver.
> 
> In case 1. b), we can't have logic to terminate WIFI connection and
> download SHUTDOWN command to firmware, as hardware itself is not
> present.

That seems like a poor way of determining the difference between hot
unplug and clean shutdown. What if unplug happens concurrently with
rmmod? Seems like it'd be better to identify hardware failures as such,
and just skip talking to HW. Also, for interfaces that support unplug
well (like USB), shouldn't you be able to get hotplug info from the
driver core, instead of having to guess the inverse of that ("this was
not a hotplug") from static variables like that?

> This logic is useful when user just unloads and loads the driver.
> Firmware download will be skipped in this case, as it's already
> running. SHUTDOWN command sent during unload has cleared firmware's
> state. 

Why is that useful?

> 'user_rmmod' flag doesn't create problem for supporting multiple
> devices. The flag is true during module unload OR reboot. It's
> applicable for all devices.
> 
> > and the work structs (and corresponding
> > 'saved_adapter' and 'iface_flags') used for PCIe function-level reset
> > and SDIO reset.
> 
> We are working on the v3 of this patch series. We will try to get rid
> of these variables along with global "work_struct" as you suggested.

Good, thanks.

Brian
Amitkumar Karwar Nov. 10, 2016, 10:01 a.m. UTC | #9
Hi Brian,

> From: Brian Norris [mailto:briannorris@chromium.org]
> Sent: Thursday, November 10, 2016 2:07 AM
> To: Amitkumar Karwar
> Cc: Kalle Valo; Dmitry Torokhov; linux-wireless@vger.kernel.org; Cathy
> Luo; Nishant Sarmukadam; Xinming Hu
> Subject: Re: [PATCH 5/5] mwifiex: wait for firmware dump completion in
> remove_card
> 
> On Wed, Nov 09, 2016 at 12:35:22PM +0000, Amitkumar Karwar wrote:
> > Hi Brian,
> 
> Hi,
> 
> > > From: Brian Norris [mailto:briannorris@chromium.org]
> > > Sent: Thursday, November 03, 2016 2:16 AM
> > > To: Kalle Valo
> > > Cc: Dmitry Torokhov; Amitkumar Karwar;
> > > linux-wireless@vger.kernel.org; Cathy Luo; Nishant Sarmukadam;
> > > Xinming Hu
> > > Subject: Re: [PATCH 5/5] mwifiex: wait for firmware dump completion
> > > in remove_card
> > >
> > > On Thu, Oct 27, 2016 at 04:20:25PM +0300, Kalle Valo wrote:
> > > > Dmitry Torokhov <dmitry.torokhov@gmail.com> writes:
> > > >
> > > > >> +/* reset_trigger variable is used to identify if
> > > > >> +mwifiex_sdio_remove()
> > > > >> + * is called by sdio_work during reset or the call is from
> > > > >> +sdio
> > > subsystem.
> > > > >> + * We will cancel sdio_work only if the call is from sdio
> > > subsystem.
> > > > >> + */
> > > > >> +static u8 reset_triggered;
> > > > >
> > > > > It would be really great if the driver supported multiple
> devices.
> > > > > IOW please avoid module-globals.
> > > >
> > > > Good catch, it's a hard requirement to support multiple devices at
> > > > the same time.
> > >
> > > BTW, this problem is repeated in several places throughout this
> driver.
> > > For instance, look for 'user_rmmod' (why? you shouldn't need to
> > > treat module unload differently...)
> >
> > We have 2 kinds of teardown cases.
> > 1) Chip is going to be powered off.
> > 	a) System reboot
> > 	b) Someone manually removed wifi card from system
> > 2) User unloaded the driver.
> >
> > In case 1. b), we can't have logic to terminate WIFI connection and
> > download SHUTDOWN command to firmware, as hardware itself is not
> > present.
> 
> That seems like a poor way of determining the difference between hot
> unplug and clean shutdown. What if unplug happens concurrently with
> rmmod?

I agree. hotunplug thread may read the flag as "true" in this race and try to interact with hardware.

> Seems like it'd be better to identify hardware failures as such,
> and just skip talking to HW. Also, for interfaces that support unplug
> well (like USB), shouldn't you be able to get hotplug info from the
> driver core, instead of having to guess the inverse of that ("this was
> not a hotplug") from static variables like that?

You are right. We can identify hardware read/write failure and continue performing remaining cleanup.
This way "user_rmmod" flag can be avoided.
I will plan for this cleanup. Tests need to be performed with SDIO, PCIe use USB devices.
I just found hotunplug info is received in case of USB.(udev->state changed to USB_STATE_NOTATTACHED). We will make use of this.

> 
> > This logic is useful when user just unloads and loads the driver.
> > Firmware download will be skipped in this case, as it's already
> > running. SHUTDOWN command sent during unload has cleared firmware's
> > state.
> 
> Why is that useful?

Sending SHUTDOWN command helps firmware reset/clean it's state. As per our protocol, SHUTDOWN is the last command downloaded to firmware. After this, firmware's state would be same as freshly downloaded firmware.
Next time when driver is loaded, firmware download is skipped, as it's already active. Driver just need to send couple of initialization commands.

Regards,
Amitkumar
Amitkumar Karwar Nov. 16, 2016, 1:07 p.m. UTC | #10
Hi Brian,

> From: Brian Norris [mailto:briannorris@chromium.org]
> Sent: Thursday, November 10, 2016 2:07 AM
> To: Amitkumar Karwar
> Cc: Kalle Valo; Dmitry Torokhov; linux-wireless@vger.kernel.org; Cathy
> Luo; Nishant Sarmukadam; Xinming Hu
> Subject: Re: [PATCH 5/5] mwifiex: wait for firmware dump completion in
> remove_card
> 
> On Wed, Nov 09, 2016 at 12:35:22PM +0000, Amitkumar Karwar wrote:
> > Hi Brian,
> 
> Hi,
> 
> > > From: Brian Norris [mailto:briannorris@chromium.org]
> > > Sent: Thursday, November 03, 2016 2:16 AM
> > > To: Kalle Valo
> > > Cc: Dmitry Torokhov; Amitkumar Karwar;
> > > linux-wireless@vger.kernel.org; Cathy Luo; Nishant Sarmukadam;
> > > Xinming Hu
> > > Subject: Re: [PATCH 5/5] mwifiex: wait for firmware dump completion
> > > in remove_card
> > >
> > > On Thu, Oct 27, 2016 at 04:20:25PM +0300, Kalle Valo wrote:
> > > > Dmitry Torokhov <dmitry.torokhov@gmail.com> writes:
> > > >
> > > > >> +/* reset_trigger variable is used to identify if
> > > > >> +mwifiex_sdio_remove()
> > > > >> + * is called by sdio_work during reset or the call is from
> > > > >> +sdio
> > > subsystem.
> > > > >> + * We will cancel sdio_work only if the call is from sdio
> > > subsystem.
> > > > >> + */
> > > > >> +static u8 reset_triggered;
> > > > >
> > > > > It would be really great if the driver supported multiple
> devices.
> > > > > IOW please avoid module-globals.
> > > >
> > > > Good catch, it's a hard requirement to support multiple devices
> at
> > > > the same time.
> > >
> > > BTW, this problem is repeated in several places throughout this
> driver.
> > > For instance, look for 'user_rmmod' (why? you shouldn't need to
> > > treat module unload differently...)
> >
> > We have 2 kinds of teardown cases.
> > 1) Chip is going to be powered off.
> > 	a) System reboot
> > 	b) Someone manually removed wifi card from system
> > 2) User unloaded the driver.
> >
> > In case 1. b), we can't have logic to terminate WIFI connection and
> > download SHUTDOWN command to firmware, as hardware itself is not
> > present.
> 
> That seems like a poor way of determining the difference between hot
> unplug and clean shutdown. What if unplug happens concurrently with
> rmmod? Seems like it'd be better to identify hardware failures as such,
> and just skip talking to HW. Also, for interfaces that support unplug
> well (like USB), shouldn't you be able to get hotplug info from the
> driver core, instead of having to guess the inverse of that ("this was
> not a hotplug") from static variables like that?
> 
> > This logic is useful when user just unloads and loads the driver.
> > Firmware download will be skipped in this case, as it's already
> > running. SHUTDOWN command sent during unload has cleared firmware's
> > state.
> 
> Why is that useful?
> 
> > 'user_rmmod' flag doesn't create problem for supporting multiple
> > devices. The flag is true during module unload OR reboot. It's
> > applicable for all devices.
> >
> > > and the work structs (and corresponding 'saved_adapter' and
> > > 'iface_flags') used for PCIe function-level reset and SDIO reset.
> >
> > We are working on the v3 of this patch series. We will try to get rid
> > of these variables along with global "work_struct" as you suggested.

I observed some crash issues while testing with PCIe/SDIO chipsets after removing user_rmmod. We are still checking them. I will not include user_rmmod removal patch in v3 series. Other pcie_work related global variables are removed in v3 series.
Card is freed and recreated during mwifiex_sdio_card_reset_work(). It is one of the activities in sdio_work. So moving sdio_work inside card won't be straight forward.

Regards,
Amitkumar
Brian Norris Nov. 16, 2016, 6:58 p.m. UTC | #11
On Wed, Nov 16, 2016 at 01:07:31PM +0000, Amitkumar Karwar wrote:
> I observed some crash issues while testing with PCIe/SDIO chipsets
> after removing user_rmmod. We are still checking them. I will not
> include user_rmmod removal patch in v3 series. Other pcie_work related
> global variables are removed in v3 series.

Sounds fine to me.

> Card is freed and recreated during mwifiex_sdio_card_reset_work(). It
> is one of the activities in sdio_work. So moving sdio_work inside card
> won't be straight forward.

That SDIO reset code is really crappy anyway, so this deserves some more
attention eventually. For instance, why does PCIe FLR have completely
different code structure? Also, the 'card' should really not be
reallocated every time. But anyway, that's not something to solve
immediately.

Brian
diff mbox

Patch

diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
index 986bf07..4512e86 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
@@ -528,6 +528,8 @@  static void mwifiex_pcie_remove(struct pci_dev *pdev)
 	if (!adapter || !adapter->priv_num)
 		return;
 
+	cancel_work_sync(&pcie_work);
+
 	if (user_rmmod && !adapter->mfg_mode) {
 #ifdef CONFIG_PM_SLEEP
 		if (adapter->is_suspended)
diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c
index 4cad1c2..f974260 100644
--- a/drivers/net/wireless/marvell/mwifiex/sdio.c
+++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
@@ -46,6 +46,15 @@ 
  */
 static u8 user_rmmod;
 
+/* reset_trigger variable is used to identify if mwifiex_sdio_remove()
+ * is called by sdio_work during reset or the call is from sdio subsystem.
+ * We will cancel sdio_work only if the call is from sdio subsystem.
+ */
+static u8 reset_triggered;
+
+static void mwifiex_sdio_work(struct work_struct *work);
+static DECLARE_WORK(sdio_work, mwifiex_sdio_work);
+
 static struct mwifiex_if_ops sdio_ops;
 static unsigned long iface_work_flags;
 
@@ -289,6 +298,9 @@  mwifiex_sdio_remove(struct sdio_func *func)
 	if (!adapter || !adapter->priv_num)
 		return;
 
+	if (!reset_triggered)
+		cancel_work_sync(&sdio_work);
+
 	mwifiex_dbg(adapter, INFO, "info: SDIO func num=%d\n", func->num);
 
 	if (user_rmmod && !adapter->mfg_mode) {
@@ -2290,7 +2302,9 @@  static void mwifiex_recreate_adapter(struct sdio_mmc_card *card)
 	 * discovered and initializes them from scratch.
 	 */
 
+	reset_triggered = 1;
 	mwifiex_sdio_remove(func);
+	reset_triggered = 0;
 
 	/* power cycle the adapter */
 	sdio_claim_host(func);
@@ -2621,7 +2635,6 @@  static void mwifiex_sdio_work(struct work_struct *work)
 		mwifiex_sdio_card_reset_work(save_adapter);
 }
 
-static DECLARE_WORK(sdio_work, mwifiex_sdio_work);
 /* This function resets the card */
 static void mwifiex_sdio_card_reset(struct mwifiex_adapter *adapter)
 {