diff mbox series

[v2,1/2] md-cluster: fix hanging issue while a new disk adding

Message ID 20240709104120.22243-1-heming.zhao@suse.com (mailing list archive)
State Accepted
Headers show
Series [v2,1/2] md-cluster: fix hanging issue while a new disk adding | expand

Checks

Context Check Description
mdraidci/vmtest-md-6_11-VM_Test-1 success Logs for build-kernel
mdraidci/vmtest-md-6_11-PR success PR summary
mdraidci/vmtest-md-6_11-VM_Test-0 success Logs for build-kernel

Commit Message

heming.zhao@suse.com July 9, 2024, 10:41 a.m. UTC
The commit 1bbe254e4336 ("md-cluster: check for timeout while a
new disk adding") is correct in terms of code syntax but not
suite real clustered code logic.

When a timeout occurs while adding a new disk, if recv_daemon()
bypasses the unlock for ack_lockres:CR, another node will be waiting
to grab EX lock. This will cause the cluster to hang indefinitely.

How to fix:

1. In dlm_lock_sync(), change the wait behaviour from forever to a
   timeout, This could avoid the hanging issue when another node
   fails to handle cluster msg. Another result of this change is
   that if another node receives an unknown msg (e.g. a new msg_type),
   the old code will hang, whereas the new code will timeout and fail.
   This could help cluster_md handle new msg_type from different
   nodes with different kernel/module versions (e.g. The user only
   updates one leg's kernel and monitors the stability of the new
   kernel).
2. The old code for __sendmsg() always returns 0 (success) under the
   design (must successfully unlock ->message_lockres). This commit
   makes this function return an error number when an error occurs.

Fixes: 1bbe254e4336 ("md-cluster: check for timeout while a new disk adding")
Signed-off-by: Heming Zhao <heming.zhao@suse.com>
Reviewed-by: Su Yue <glass.su@suse.com>
---
v1 -> v2:
- use define WAIT_DLM_LOCK_TIMEOUT instead of hard code
- change timeout value from 60s to 30s
- follow Kuai's suggestion to use while loop to unlock message_lockres
---
 drivers/md/md-cluster.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

Comments

Yu Kuai July 9, 2024, 11:06 a.m. UTC | #1
在 2024/07/09 18:41, Heming Zhao 写道:
> The commit 1bbe254e4336 ("md-cluster: check for timeout while a
> new disk adding") is correct in terms of code syntax but not
> suite real clustered code logic.
> 
> When a timeout occurs while adding a new disk, if recv_daemon()
> bypasses the unlock for ack_lockres:CR, another node will be waiting
> to grab EX lock. This will cause the cluster to hang indefinitely.
> 
> How to fix:
> 
> 1. In dlm_lock_sync(), change the wait behaviour from forever to a
>     timeout, This could avoid the hanging issue when another node
>     fails to handle cluster msg. Another result of this change is
>     that if another node receives an unknown msg (e.g. a new msg_type),
>     the old code will hang, whereas the new code will timeout and fail.
>     This could help cluster_md handle new msg_type from different
>     nodes with different kernel/module versions (e.g. The user only
>     updates one leg's kernel and monitors the stability of the new
>     kernel).
> 2. The old code for __sendmsg() always returns 0 (success) under the
>     design (must successfully unlock ->message_lockres). This commit
>     makes this function return an error number when an error occurs.
> 
> Fixes: 1bbe254e4336 ("md-cluster: check for timeout while a new disk adding")
> Signed-off-by: Heming Zhao <heming.zhao@suse.com>
> Reviewed-by: Su Yue <glass.su@suse.com>

Thanks for the patch.

Acked-by: Yu Kuai <yukuai3@huawei.com>
> ---
> v1 -> v2:
> - use define WAIT_DLM_LOCK_TIMEOUT instead of hard code
> - change timeout value from 60s to 30s
> - follow Kuai's suggestion to use while loop to unlock message_lockres
> ---
>   drivers/md/md-cluster.c | 22 ++++++++++++----------
>   1 file changed, 12 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
> index 8e36a0feec09..b5a802ae17bb 100644
> --- a/drivers/md/md-cluster.c
> +++ b/drivers/md/md-cluster.c
> @@ -15,6 +15,7 @@
>   
>   #define LVB_SIZE	64
>   #define NEW_DEV_TIMEOUT 5000
> +#define WAIT_DLM_LOCK_TIMEOUT (30 * HZ)
>   
>   struct dlm_lock_resource {
>   	dlm_lockspace_t *ls;
> @@ -130,8 +131,13 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
>   			0, sync_ast, res, res->bast);
>   	if (ret)
>   		return ret;
> -	wait_event(res->sync_locking, res->sync_locking_done);
> +	ret = wait_event_timeout(res->sync_locking, res->sync_locking_done,
> +				WAIT_DLM_LOCK_TIMEOUT);
>   	res->sync_locking_done = false;
> +	if (!ret) {
> +		pr_err("locking DLM '%s' timeout!\n", res->name);
> +		return -EBUSY;
> +	}
>   	if (res->lksb.sb_status == 0)
>   		res->mode = mode;
>   	return res->lksb.sb_status;
> @@ -743,7 +749,7 @@ static void unlock_comm(struct md_cluster_info *cinfo)
>    */
>   static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
>   {
> -	int error;
> +	int error, unlock_error;
>   	int slot = cinfo->slot_number - 1;
>   
>   	cmsg->slot = cpu_to_le32(slot);
> @@ -751,7 +757,7 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
>   	error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_EX);
>   	if (error) {
>   		pr_err("md-cluster: failed to get EX on MESSAGE (%d)\n", error);
> -		goto failed_message;
> +		return error;
>   	}
>   
>   	memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
> @@ -781,14 +787,10 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
>   	}
>   
>   failed_ack:
> -	error = dlm_unlock_sync(cinfo->message_lockres);
> -	if (unlikely(error != 0)) {
> +	while ((unlock_error = dlm_unlock_sync(cinfo->message_lockres)))
>   		pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
> -			error);
> -		/* in case the message can't be released due to some reason */
> -		goto failed_ack;
> -	}
> -failed_message:
> +			unlock_error);
> +
>   	return error;
>   }
>   
>
Song Liu July 12, 2024, 3:09 p.m. UTC | #2
On Tue, Jul 9, 2024 at 7:06 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> 在 2024/07/09 18:41, Heming Zhao 写道:
> > The commit 1bbe254e4336 ("md-cluster: check for timeout while a
> > new disk adding") is correct in terms of code syntax but not
> > suite real clustered code logic.
> >
> > When a timeout occurs while adding a new disk, if recv_daemon()
> > bypasses the unlock for ack_lockres:CR, another node will be waiting
> > to grab EX lock. This will cause the cluster to hang indefinitely.
> >
> > How to fix:
> >
> > 1. In dlm_lock_sync(), change the wait behaviour from forever to a
> >     timeout, This could avoid the hanging issue when another node
> >     fails to handle cluster msg. Another result of this change is
> >     that if another node receives an unknown msg (e.g. a new msg_type),
> >     the old code will hang, whereas the new code will timeout and fail.
> >     This could help cluster_md handle new msg_type from different
> >     nodes with different kernel/module versions (e.g. The user only
> >     updates one leg's kernel and monitors the stability of the new
> >     kernel).
> > 2. The old code for __sendmsg() always returns 0 (success) under the
> >     design (must successfully unlock ->message_lockres). This commit
> >     makes this function return an error number when an error occurs.
> >
> > Fixes: 1bbe254e4336 ("md-cluster: check for timeout while a new disk adding")
> > Signed-off-by: Heming Zhao <heming.zhao@suse.com>
> > Reviewed-by: Su Yue <glass.su@suse.com>
>
> Thanks for the patch.
>
> Acked-by: Yu Kuai <yukuai3@huawei.com>

Applied to md-6.11. Thanks!

Song
diff mbox series

Patch

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 8e36a0feec09..b5a802ae17bb 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -15,6 +15,7 @@ 
 
 #define LVB_SIZE	64
 #define NEW_DEV_TIMEOUT 5000
+#define WAIT_DLM_LOCK_TIMEOUT (30 * HZ)
 
 struct dlm_lock_resource {
 	dlm_lockspace_t *ls;
@@ -130,8 +131,13 @@  static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
 			0, sync_ast, res, res->bast);
 	if (ret)
 		return ret;
-	wait_event(res->sync_locking, res->sync_locking_done);
+	ret = wait_event_timeout(res->sync_locking, res->sync_locking_done,
+				WAIT_DLM_LOCK_TIMEOUT);
 	res->sync_locking_done = false;
+	if (!ret) {
+		pr_err("locking DLM '%s' timeout!\n", res->name);
+		return -EBUSY;
+	}
 	if (res->lksb.sb_status == 0)
 		res->mode = mode;
 	return res->lksb.sb_status;
@@ -743,7 +749,7 @@  static void unlock_comm(struct md_cluster_info *cinfo)
  */
 static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 {
-	int error;
+	int error, unlock_error;
 	int slot = cinfo->slot_number - 1;
 
 	cmsg->slot = cpu_to_le32(slot);
@@ -751,7 +757,7 @@  static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 	error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_EX);
 	if (error) {
 		pr_err("md-cluster: failed to get EX on MESSAGE (%d)\n", error);
-		goto failed_message;
+		return error;
 	}
 
 	memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
@@ -781,14 +787,10 @@  static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 	}
 
 failed_ack:
-	error = dlm_unlock_sync(cinfo->message_lockres);
-	if (unlikely(error != 0)) {
+	while ((unlock_error = dlm_unlock_sync(cinfo->message_lockres)))
 		pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
-			error);
-		/* in case the message can't be released due to some reason */
-		goto failed_ack;
-	}
-failed_message:
+			unlock_error);
+
 	return error;
 }