diff mbox series

[net,1/1] ibmvnic: serialize access to work queue on remove

Message ID 20210213044250.960317-1-sukadev@linux.ibm.com (mailing list archive)
State Accepted
Commit 4a41c421f3676fdeea91733cf434dcf319c4c351
Delegated to: Netdev Maintainers
Headers show
Series [net,1/1] ibmvnic: serialize access to work queue on remove | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net
netdev/subject_prefix success Link
netdev/cc_maintainers fail 2 blamed authors not CCed: tlfalcon@linux.ibm.com davem@davemloft.net; 7 maintainers not CCed: tlfalcon@linux.ibm.com mpe@ellerman.id.au paulus@samba.org kuba@kernel.org davem@davemloft.net benh@kernel.crashing.org linuxppc-dev@lists.ozlabs.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 19 this patch: 19
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: Use a single space after Cc:
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Sukadev Bhattiprolu Feb. 13, 2021, 4:42 a.m. UTC
The work queue is used to queue reset requests like CHANGE-PARAM or
FAILOVER resets for the worker thread. When the adapter is being removed
the adapter state is set to VNIC_REMOVING and the work queue is flushed
so no new work is added. However the check for adapter being removed is
racy in that the adapter can go into REMOVING state just after we check
and we might end up adding work just as it is being flushed (or after).

The ->rwi_lock is already being used to serialize queue/dequeue work.
Extend its usage ensure there is no race when scheduling/flushing work.

Fixes: 6954a9e4192b ("ibmvnic: Flush existing work items before device removal")
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
Cc:Uwe Kleine-König <uwe@kleine-koenig.org>
Cc:Saeed Mahameed <saeed@kernel.org>
---
Changelog
	An earlier version was reviewed by Saeed Mahmeed. But I have deferred
	some earlier patches in that set. Also, now extend the use of ->rwi_lock
	rather than defining a new lock.
---
 drivers/net/ethernet/ibm/ibmvnic.c | 27 ++++++++++++++++++++-------
 drivers/net/ethernet/ibm/ibmvnic.h |  5 ++++-
 2 files changed, 24 insertions(+), 8 deletions(-)

Comments

Dany Madden Feb. 13, 2021, 4:17 p.m. UTC | #1
On 2021-02-12 20:42, Sukadev Bhattiprolu wrote:
> The work queue is used to queue reset requests like CHANGE-PARAM or
> FAILOVER resets for the worker thread. When the adapter is being 
> removed
> the adapter state is set to VNIC_REMOVING and the work queue is flushed
> so no new work is added. However the check for adapter being removed is
> racy in that the adapter can go into REMOVING state just after we check
> and we might end up adding work just as it is being flushed (or after).
> 
> The ->rwi_lock is already being used to serialize queue/dequeue work.
> Extend its usage ensure there is no race when scheduling/flushing work.
> 
> Fixes: 6954a9e4192b ("ibmvnic: Flush existing work items before device 
> removal")
> Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>

Reviewed-by: Dany Madden <drt@linux.ibm.com>

> Cc:Uwe Kleine-König <uwe@kleine-koenig.org>
> Cc:Saeed Mahameed <saeed@kernel.org>
> ---
> Changelog
> 	An earlier version was reviewed by Saeed Mahmeed. But I have deferred
> 	some earlier patches in that set. Also, now extend the use of 
> ->rwi_lock
> 	rather than defining a new lock.
> ---
>  drivers/net/ethernet/ibm/ibmvnic.c | 27 ++++++++++++++++++++-------
>  drivers/net/ethernet/ibm/ibmvnic.h |  5 ++++-
>  2 files changed, 24 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c
> b/drivers/net/ethernet/ibm/ibmvnic.c
> index ce6b1cb0b0f9..004565b18a03 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -2442,6 +2442,8 @@ static int ibmvnic_reset(struct ibmvnic_adapter 
> *adapter,
>  	unsigned long flags;
>  	int ret;
> 
> +	spin_lock_irqsave(&adapter->rwi_lock, flags);
> +
>  	/*
>  	 * If failover is pending don't schedule any other reset.
>  	 * Instead let the failover complete. If there is already a
> @@ -2462,14 +2464,11 @@ static int ibmvnic_reset(struct
> ibmvnic_adapter *adapter,
>  		goto err;
>  	}
> 
> -	spin_lock_irqsave(&adapter->rwi_lock, flags);
> -
>  	list_for_each(entry, &adapter->rwi_list) {
>  		tmp = list_entry(entry, struct ibmvnic_rwi, list);
>  		if (tmp->reset_reason == reason) {
>  			netdev_dbg(netdev, "Skipping matching reset, reason=%d\n",
>  				   reason);
> -			spin_unlock_irqrestore(&adapter->rwi_lock, flags);
>  			ret = EBUSY;
>  			goto err;
>  		}
> @@ -2477,8 +2476,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter 
> *adapter,
> 
>  	rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC);
>  	if (!rwi) {
> -		spin_unlock_irqrestore(&adapter->rwi_lock, flags);
> -		ibmvnic_close(netdev);
>  		ret = ENOMEM;
>  		goto err;
>  	}
> @@ -2491,12 +2488,17 @@ static int ibmvnic_reset(struct
> ibmvnic_adapter *adapter,
>  	}
>  	rwi->reset_reason = reason;
>  	list_add_tail(&rwi->list, &adapter->rwi_list);
> -	spin_unlock_irqrestore(&adapter->rwi_lock, flags);
>  	netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", 
> reason);
>  	schedule_work(&adapter->ibmvnic_reset);
> 
> -	return 0;
> +	ret = 0;
>  err:
> +	/* ibmvnic_close() below can block, so drop the lock first */
> +	spin_unlock_irqrestore(&adapter->rwi_lock, flags);
> +
> +	if (ret == ENOMEM)
> +		ibmvnic_close(netdev);
> +
>  	return -ret;
>  }
> 
> @@ -5512,7 +5514,18 @@ static int ibmvnic_remove(struct vio_dev *dev)
>  	unsigned long flags;
> 
>  	spin_lock_irqsave(&adapter->state_lock, flags);
> +
> +	/* If ibmvnic_reset() is scheduling a reset, wait for it to
> +	 * finish. Then, set the state to REMOVING to prevent it from
> +	 * scheduling any more work and to have reset functions ignore
> +	 * any resets that have already been scheduled. Drop the lock
> +	 * after setting state, so __ibmvnic_reset() which is called
> +	 * from the flush_work() below, can make progress.
> +	 */
> +	spin_lock_irqsave(&adapter->rwi_lock, flags);
>  	adapter->state = VNIC_REMOVING;
> +	spin_unlock_irqrestore(&adapter->rwi_lock, flags);
> +
>  	spin_unlock_irqrestore(&adapter->state_lock, flags);
> 
>  	flush_work(&adapter->ibmvnic_reset);
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.h
> b/drivers/net/ethernet/ibm/ibmvnic.h
> index c09c3f6bba9f..3cccbba70365 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.h
> +++ b/drivers/net/ethernet/ibm/ibmvnic.h
> @@ -1081,6 +1081,7 @@ struct ibmvnic_adapter {
>  	struct tasklet_struct tasklet;
>  	enum vnic_state state;
>  	enum ibmvnic_reset_reason reset_reason;
> +	/* when taking both state and rwi locks, take state lock first */
>  	spinlock_t rwi_lock;
>  	struct list_head rwi_list;
>  	struct work_struct ibmvnic_reset;
> @@ -1097,6 +1098,8 @@ struct ibmvnic_adapter {
>  	struct ibmvnic_tunables desired;
>  	struct ibmvnic_tunables fallback;
> 
> -	/* Used for serializatin of state field */
> +	/* Used for serialization of state field. When taking both state
> +	 * and rwi locks, take state lock first.
> +	 */
>  	spinlock_t state_lock;
>  };
patchwork-bot+netdevbpf@kernel.org Feb. 15, 2021, 11:20 p.m. UTC | #2
Hello:

This patch was applied to netdev/net.git (refs/heads/master):

On Fri, 12 Feb 2021 20:42:50 -0800 you wrote:
> The work queue is used to queue reset requests like CHANGE-PARAM or
> FAILOVER resets for the worker thread. When the adapter is being removed
> the adapter state is set to VNIC_REMOVING and the work queue is flushed
> so no new work is added. However the check for adapter being removed is
> racy in that the adapter can go into REMOVING state just after we check
> and we might end up adding work just as it is being flushed (or after).
> 
> [...]

Here is the summary with links:
  - [net,1/1] ibmvnic: serialize access to work queue on remove
    https://git.kernel.org/netdev/net/c/4a41c421f367

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
diff mbox series

Patch

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index ce6b1cb0b0f9..004565b18a03 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2442,6 +2442,8 @@  static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 	unsigned long flags;
 	int ret;
 
+	spin_lock_irqsave(&adapter->rwi_lock, flags);
+
 	/*
 	 * If failover is pending don't schedule any other reset.
 	 * Instead let the failover complete. If there is already a
@@ -2462,14 +2464,11 @@  static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 		goto err;
 	}
 
-	spin_lock_irqsave(&adapter->rwi_lock, flags);
-
 	list_for_each(entry, &adapter->rwi_list) {
 		tmp = list_entry(entry, struct ibmvnic_rwi, list);
 		if (tmp->reset_reason == reason) {
 			netdev_dbg(netdev, "Skipping matching reset, reason=%d\n",
 				   reason);
-			spin_unlock_irqrestore(&adapter->rwi_lock, flags);
 			ret = EBUSY;
 			goto err;
 		}
@@ -2477,8 +2476,6 @@  static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 
 	rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC);
 	if (!rwi) {
-		spin_unlock_irqrestore(&adapter->rwi_lock, flags);
-		ibmvnic_close(netdev);
 		ret = ENOMEM;
 		goto err;
 	}
@@ -2491,12 +2488,17 @@  static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 	}
 	rwi->reset_reason = reason;
 	list_add_tail(&rwi->list, &adapter->rwi_list);
-	spin_unlock_irqrestore(&adapter->rwi_lock, flags);
 	netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
 	schedule_work(&adapter->ibmvnic_reset);
 
-	return 0;
+	ret = 0;
 err:
+	/* ibmvnic_close() below can block, so drop the lock first */
+	spin_unlock_irqrestore(&adapter->rwi_lock, flags);
+
+	if (ret == ENOMEM)
+		ibmvnic_close(netdev);
+
 	return -ret;
 }
 
@@ -5512,7 +5514,18 @@  static int ibmvnic_remove(struct vio_dev *dev)
 	unsigned long flags;
 
 	spin_lock_irqsave(&adapter->state_lock, flags);
+
+	/* If ibmvnic_reset() is scheduling a reset, wait for it to
+	 * finish. Then, set the state to REMOVING to prevent it from
+	 * scheduling any more work and to have reset functions ignore
+	 * any resets that have already been scheduled. Drop the lock
+	 * after setting state, so __ibmvnic_reset() which is called
+	 * from the flush_work() below, can make progress.
+	 */
+	spin_lock_irqsave(&adapter->rwi_lock, flags);
 	adapter->state = VNIC_REMOVING;
+	spin_unlock_irqrestore(&adapter->rwi_lock, flags);
+
 	spin_unlock_irqrestore(&adapter->state_lock, flags);
 
 	flush_work(&adapter->ibmvnic_reset);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index c09c3f6bba9f..3cccbba70365 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1081,6 +1081,7 @@  struct ibmvnic_adapter {
 	struct tasklet_struct tasklet;
 	enum vnic_state state;
 	enum ibmvnic_reset_reason reset_reason;
+	/* when taking both state and rwi locks, take state lock first */
 	spinlock_t rwi_lock;
 	struct list_head rwi_list;
 	struct work_struct ibmvnic_reset;
@@ -1097,6 +1098,8 @@  struct ibmvnic_adapter {
 	struct ibmvnic_tunables desired;
 	struct ibmvnic_tunables fallback;
 
-	/* Used for serializatin of state field */
+	/* Used for serialization of state field. When taking both state
+	 * and rwi locks, take state lock first.
+	 */
 	spinlock_t state_lock;
 };