diff mbox series

[net,v4,1/7] octeon_ep: Add checks to fix double free crashes

Message ID 20241113111319.1156507-2-srasheed@marvell.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series Double free fixes and NULL pointer checks | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 3 this patch: 3
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 4 this patch: 4
netdev/checkpatch warning WARNING: line length of 86 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 20 this patch: 20
netdev/source_inline success Was 0 now: 0
netdev/contest fail net-next-2024-11-14--00-00 (tests: 786)

Commit Message

Shinas Rasheed Nov. 13, 2024, 11:13 a.m. UTC
From: Vimlesh Kumar <vimleshk@marvell.com>

Add required checks to avoid double free. Crashes were
observed due to the same on reset scenarios, when reset
was tried multiple times, and the first reset had torn
down resources already.

Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support")
Signed-off-by: Vimlesh Kumar <vimleshk@marvell.com>
Signed-off-by: Shinas Rasheed <srasheed@marvell.com>
---
V4:
  - Removed unnecessary protective code. Added fast return from
    octep_free_irqs()
  - Improved commit message

V3: https://lore.kernel.org/all/20241108074543.1123036-2-srasheed@marvell.com/
  - Added back "Fixes" to the changelist

V2: https://lore.kernel.org/all/20241107132846.1118835-2-srasheed@marvell.com/
  - No changes

V1: https://lore.kernel.org/all/20241101103416.1064930-2-srasheed@marvell.com/

 .../net/ethernet/marvell/octeon_ep/octep_main.c    | 14 ++++++++++----
 drivers/net/ethernet/marvell/octeon_ep/octep_tx.c  |  2 ++
 2 files changed, 12 insertions(+), 4 deletions(-)

Comments

Vadim Fedorenko Nov. 13, 2024, 2:59 p.m. UTC | #1
On 13/11/2024 11:13, Shinas Rasheed wrote:
> From: Vimlesh Kumar <vimleshk@marvell.com>
> 
> Add required checks to avoid double free. Crashes were
> observed due to the same on reset scenarios, when reset
> was tried multiple times, and the first reset had torn
> down resources already.
> 
> Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support")
> Signed-off-by: Vimlesh Kumar <vimleshk@marvell.com>
> Signed-off-by: Shinas Rasheed <srasheed@marvell.com>
> ---
> V4:
>    - Removed unnecessary protective code. Added fast return from
>      octep_free_irqs()
>    - Improved commit message
> 
> V3: https://lore.kernel.org/all/20241108074543.1123036-2-srasheed@marvell.com/
>    - Added back "Fixes" to the changelist
> 
> V2: https://lore.kernel.org/all/20241107132846.1118835-2-srasheed@marvell.com/
>    - No changes
> 
> V1: https://lore.kernel.org/all/20241101103416.1064930-2-srasheed@marvell.com/
> 
>   .../net/ethernet/marvell/octeon_ep/octep_main.c    | 14 ++++++++++----
>   drivers/net/ethernet/marvell/octeon_ep/octep_tx.c  |  2 ++
>   2 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> index 549436efc204..29796544feb6 100644
> --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> @@ -496,6 +496,9 @@ static void octep_free_irqs(struct octep_device *oct)
>   {
>   	int i;
>   
> +	if (!oct->msix_entries)
> +		return;
> +
>   	/* First few MSI-X interrupts are non queue interrupts; free them */
>   	for (i = 0; i < CFG_GET_NON_IOQ_MSIX(oct->conf); i++)
>   		free_irq(oct->msix_entries[i].vector, oct);
> @@ -505,7 +508,7 @@ static void octep_free_irqs(struct octep_device *oct)
>   	for (i = CFG_GET_NON_IOQ_MSIX(oct->conf); i < oct->num_irqs; i++) {
>   		irq_set_affinity_hint(oct->msix_entries[i].vector, NULL);
>   		free_irq(oct->msix_entries[i].vector,
> -			 oct->ioq_vector[i - CFG_GET_NON_IOQ_MSIX(oct->conf)]);
> +				oct->ioq_vector[i - CFG_GET_NON_IOQ_MSIX(oct->conf)]);

Looks like this chunk was unintended, it was perfectly aligned before...

>   	}
>   	netdev_info(oct->netdev, "IRQs freed\n");
>   }
> @@ -635,8 +638,10 @@ static void octep_napi_delete(struct octep_device *oct)
>   
>   	for (i = 0; i < oct->num_oqs; i++) {
>   		netdev_dbg(oct->netdev, "Deleting NAPI on Q-%d\n", i);
> -		netif_napi_del(&oct->ioq_vector[i]->napi);
> -		oct->oq[i]->napi = NULL;
> +		if (oct->oq[i]->napi) {
> +			netif_napi_del(&oct->ioq_vector[i]->napi);
> +			oct->oq[i]->napi = NULL;
> +		}

I would just add

if (!oct->oq[i]->napi)
	continue

as the first line within the loop. Otherwise debug message could be
misleading - deleting NAPI on queue which doesn't exist.

>   	}
>   }
>   
> @@ -666,7 +671,8 @@ static void octep_napi_disable(struct octep_device *oct)
>   
>   	for (i = 0; i < oct->num_oqs; i++) {
>   		netdev_dbg(oct->netdev, "Disabling NAPI on Q-%d\n", i);
> -		napi_disable(&oct->ioq_vector[i]->napi);
> +		if (oct->oq[i]->napi)
> +			napi_disable(&oct->ioq_vector[i]->napi);

And here again, the same pattern.

>   	}
>   }
>   
> diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
> index 06851b78aa28..157bf489ae19 100644
> --- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
> +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
> @@ -323,6 +323,8 @@ void octep_free_iqs(struct octep_device *oct)
>   	int i;
>   
>   	for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
> +		if (!oct->iq[i])
> +			continue;
>   		octep_free_iq(oct->iq[i]);
>   		dev_dbg(&oct->pdev->dev,
>   			"Successfully destroyed IQ(TxQ)-%d.\n", i);
Vadim Fedorenko Nov. 13, 2024, 10:38 p.m. UTC | #2
On 13/11/2024 11:13, Shinas Rasheed wrote:
> From: Vimlesh Kumar <vimleshk@marvell.com>
> 
> Add required checks to avoid double free. Crashes were
> observed due to the same on reset scenarios, when reset
> was tried multiple times, and the first reset had torn
> down resources already.

I'm looking at the whole series and it feels like we have to deal
with the root cause rather than add protective code left and right.

The driver may potentially have some locks missing which will cause
missing resources, and to fix the root cause these locks have to be
added. WDYT?
diff mbox series

Patch

diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 549436efc204..29796544feb6 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -496,6 +496,9 @@  static void octep_free_irqs(struct octep_device *oct)
 {
 	int i;
 
+	if (!oct->msix_entries)
+		return;
+
 	/* First few MSI-X interrupts are non queue interrupts; free them */
 	for (i = 0; i < CFG_GET_NON_IOQ_MSIX(oct->conf); i++)
 		free_irq(oct->msix_entries[i].vector, oct);
@@ -505,7 +508,7 @@  static void octep_free_irqs(struct octep_device *oct)
 	for (i = CFG_GET_NON_IOQ_MSIX(oct->conf); i < oct->num_irqs; i++) {
 		irq_set_affinity_hint(oct->msix_entries[i].vector, NULL);
 		free_irq(oct->msix_entries[i].vector,
-			 oct->ioq_vector[i - CFG_GET_NON_IOQ_MSIX(oct->conf)]);
+				oct->ioq_vector[i - CFG_GET_NON_IOQ_MSIX(oct->conf)]);
 	}
 	netdev_info(oct->netdev, "IRQs freed\n");
 }
@@ -635,8 +638,10 @@  static void octep_napi_delete(struct octep_device *oct)
 
 	for (i = 0; i < oct->num_oqs; i++) {
 		netdev_dbg(oct->netdev, "Deleting NAPI on Q-%d\n", i);
-		netif_napi_del(&oct->ioq_vector[i]->napi);
-		oct->oq[i]->napi = NULL;
+		if (oct->oq[i]->napi) {
+			netif_napi_del(&oct->ioq_vector[i]->napi);
+			oct->oq[i]->napi = NULL;
+		}
 	}
 }
 
@@ -666,7 +671,8 @@  static void octep_napi_disable(struct octep_device *oct)
 
 	for (i = 0; i < oct->num_oqs; i++) {
 		netdev_dbg(oct->netdev, "Disabling NAPI on Q-%d\n", i);
-		napi_disable(&oct->ioq_vector[i]->napi);
+		if (oct->oq[i]->napi)
+			napi_disable(&oct->ioq_vector[i]->napi);
 	}
 }
 
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
index 06851b78aa28..157bf489ae19 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
@@ -323,6 +323,8 @@  void octep_free_iqs(struct octep_device *oct)
 	int i;
 
 	for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) {
+		if (!oct->iq[i])
+			continue;
 		octep_free_iq(oct->iq[i]);
 		dev_dbg(&oct->pdev->dev,
 			"Successfully destroyed IQ(TxQ)-%d.\n", i);