diff mbox series

[net,3/3] ibmvnic: delay complete()

Message ID 20211029220316.2003519-3-sukadev@linux.ibm.com (mailing list archive)
State Accepted
Commit 6b278c0cb378079f3c0c61ae4a369c09ff1a4188
Delegated to: Netdev Maintainers
Headers show
Series [net,1/3] ibmvnic: don't stop queue in xmit | expand

Checks

Context Check Description
netdev/cover_letter warning Series does not have a cover letter
netdev/fixes_present success Fixes tag present in non-next series
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net
netdev/subject_prefix success Link
netdev/cc_maintainers warning 7 maintainers not CCed: mpe@ellerman.id.au benh@kernel.crashing.org tlfalcon@linux.ibm.com linuxppc-dev@lists.ozlabs.org paulus@samba.org davem@davemloft.net kuba@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Fixes tag looks correct
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 40 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success No static functions without inline keyword in header files

Commit Message

Sukadev Bhattiprolu Oct. 29, 2021, 10:03 p.m. UTC
If we get CRQ_INIT, we set errno to -EIO and first call complete() to
notify the waiter. Then we try to schedule a FAILOVER reset. If this
occurs while adapter is in PROBING state, ibmvnic_reset() changes the
error code to EAGAIN and returns without scheduling the FAILOVER. The
purpose of setting error code to EAGAIN is to ask the waiter to retry.

But due to the earlier complete() call, the waiter may already have seen
the -EIO response and decided not to retry. This can cause intermittent
failures when bringing up ibmvnic adapters during boot, specially in
in kexec/kdump kernels.

Defer the complete() call until after scheduling the reset.

Also streamline the error code to EAGAIN. Don't see why we need EIO
sometimes. All 3 callers of ibmvnic_reset_init() can handle EAGAIN.

Fixes: 17c8705838a5 ("ibmvnic: Return error code if init interrupted by transport event")
Reported-by: Vaishnavi Bhat <vaish123@in.ibm.com>
Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

Comments

Dany Madden Oct. 29, 2021, 10:25 p.m. UTC | #1
On 2021-10-29 15:03, Sukadev Bhattiprolu wrote:
> If we get CRQ_INIT, we set errno to -EIO and first call complete() to
> notify the waiter. Then we try to schedule a FAILOVER reset. If this
> occurs while adapter is in PROBING state, ibmvnic_reset() changes the
> error code to EAGAIN and returns without scheduling the FAILOVER. The
> purpose of setting error code to EAGAIN is to ask the waiter to retry.
> 
> But due to the earlier complete() call, the waiter may already have 
> seen
> the -EIO response and decided not to retry. This can cause intermittent
> failures when bringing up ibmvnic adapters during boot, specially in
> in kexec/kdump kernels.
> 
> Defer the complete() call until after scheduling the reset.
> 
> Also streamline the error code to EAGAIN. Don't see why we need EIO
> sometimes. All 3 callers of ibmvnic_reset_init() can handle EAGAIN.
> 
> Fixes: 17c8705838a5 ("ibmvnic: Return error code if init interrupted
> by transport event")
> Reported-by: Vaishnavi Bhat <vaish123@in.ibm.com>
> Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.ibm.com>

Reviewed-by: Dany Madden <drt@linux.ibm.com>

> ---
>  drivers/net/ethernet/ibm/ibmvnic.c | 16 +++++++++-------
>  1 file changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ibm/ibmvnic.c
> b/drivers/net/ethernet/ibm/ibmvnic.c
> index 50956f622b11..29cbf60dfd79 100644
> --- a/drivers/net/ethernet/ibm/ibmvnic.c
> +++ b/drivers/net/ethernet/ibm/ibmvnic.c
> @@ -2755,7 +2755,7 @@ static int ibmvnic_reset(struct ibmvnic_adapter 
> *adapter,
> 
>  	if (adapter->state == VNIC_PROBING) {
>  		netdev_warn(netdev, "Adapter reset during probe\n");
> -		adapter->init_done_rc = EAGAIN;
> +		adapter->init_done_rc = -EAGAIN;
>  		ret = EAGAIN;
>  		goto err;
>  	}
> @@ -5266,11 +5266,6 @@ static void ibmvnic_handle_crq(union ibmvnic_crq 
> *crq,
>  			 */
>  			adapter->login_pending = false;
> 
> -			if (!completion_done(&adapter->init_done)) {
> -				complete(&adapter->init_done);
> -				adapter->init_done_rc = -EIO;
> -			}
> -
>  			if (adapter->state == VNIC_DOWN)
>  				rc = ibmvnic_reset(adapter, VNIC_RESET_PASSIVE_INIT);
>  			else
> @@ -5291,6 +5286,13 @@ static void ibmvnic_handle_crq(union ibmvnic_crq 
> *crq,
>  					   rc);
>  				adapter->failover_pending = false;
>  			}
> +
> +			if (!completion_done(&adapter->init_done)) {
> +				complete(&adapter->init_done);
> +				if (!adapter->init_done_rc)
> +					adapter->init_done_rc = -EAGAIN;
> +			}
> +
>  			break;
>  		case IBMVNIC_CRQ_INIT_COMPLETE:
>  			dev_info(dev, "Partner initialization complete\n");
> @@ -5763,7 +5765,7 @@ static int ibmvnic_probe(struct vio_dev *dev,
> const struct vio_device_id *id)
>  		}
> 
>  		rc = ibmvnic_reset_init(adapter, false);
> -	} while (rc == EAGAIN);
> +	} while (rc == -EAGAIN);
> 
>  	/* We are ignoring the error from ibmvnic_reset_init() assuming that 
> the
>  	 * partner is not ready. CRQ is not active. When the partner becomes
diff mbox series

Patch

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 50956f622b11..29cbf60dfd79 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2755,7 +2755,7 @@  static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
 
 	if (adapter->state == VNIC_PROBING) {
 		netdev_warn(netdev, "Adapter reset during probe\n");
-		adapter->init_done_rc = EAGAIN;
+		adapter->init_done_rc = -EAGAIN;
 		ret = EAGAIN;
 		goto err;
 	}
@@ -5266,11 +5266,6 @@  static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 			 */
 			adapter->login_pending = false;
 
-			if (!completion_done(&adapter->init_done)) {
-				complete(&adapter->init_done);
-				adapter->init_done_rc = -EIO;
-			}
-
 			if (adapter->state == VNIC_DOWN)
 				rc = ibmvnic_reset(adapter, VNIC_RESET_PASSIVE_INIT);
 			else
@@ -5291,6 +5286,13 @@  static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 					   rc);
 				adapter->failover_pending = false;
 			}
+
+			if (!completion_done(&adapter->init_done)) {
+				complete(&adapter->init_done);
+				if (!adapter->init_done_rc)
+					adapter->init_done_rc = -EAGAIN;
+			}
+
 			break;
 		case IBMVNIC_CRQ_INIT_COMPLETE:
 			dev_info(dev, "Partner initialization complete\n");
@@ -5763,7 +5765,7 @@  static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		}
 
 		rc = ibmvnic_reset_init(adapter, false);
-	} while (rc == EAGAIN);
+	} while (rc == -EAGAIN);
 
 	/* We are ignoring the error from ibmvnic_reset_init() assuming that the
 	 * partner is not ready. CRQ is not active. When the partner becomes