diff mbox series

[04/11] cifs: serialize channel reconnects

Message ID 20230310153211.10982-4-sprasad@microsoft.com (mailing list archive)
State New, archived
Headers show
Series [01/11] cifs: fix tcon status change after tree connect | expand

Commit Message

Shyam Prasad N March 10, 2023, 3:32 p.m. UTC
Parallel session reconnects are prone to race conditions
that are difficult to avoid cleanly. The changes so far do
ensure that parallel reconnects eventually go through.
But that can take multiple session setups on the same channel.

Avoiding that by serializing the session setups on parallel
channels. In doing so, we should avoid such issues.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/cifs/connect.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

Comments

Steve French March 10, 2023, 10:40 p.m. UTC | #1
Paulo had a few comments on patch 3 of the series, so have applied the
first two, and will wait on patch 3 and 4, but tentatively merged the
first two:

a7c73e3d5842 (HEAD -> for-next, origin/for-next) cifs: generate
signkey for the channel that's reconnecting
d169aadd50c7 cifs: fix tcon status change after tree connect

On Fri, Mar 10, 2023 at 9:33 AM Shyam Prasad N <nspmangalore@gmail.com> wrote:
>
> Parallel session reconnects are prone to race conditions
> that are difficult to avoid cleanly. The changes so far do
> ensure that parallel reconnects eventually go through.
> But that can take multiple session setups on the same channel.
>
> Avoiding that by serializing the session setups on parallel
> channels. In doing so, we should avoid such issues.
>
> Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
> ---
>  fs/cifs/connect.c | 18 ++++++++++++++----
>  1 file changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
> index 7b103f69432e..4ea1e51c3fa5 100644
> --- a/fs/cifs/connect.c
> +++ b/fs/cifs/connect.c
> @@ -222,6 +222,11 @@ cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
>                 else
>                         cifs_chan_set_need_reconnect(ses, server);
>
> +               cifs_dbg(FYI, "%s: channel connect bitmaps: 0x%lx 0x%lx\n",
> +                        __func__,
> +                        ses->chans_need_reconnect,
> +                        ses->chans_in_reconnect);
> +
>                 /* If all channels need reconnect, then tcon needs reconnect */
>                 if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
>                         spin_unlock(&ses->chan_lock);
> @@ -3744,6 +3749,11 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
>         retries = 5;
>
>         spin_lock(&ses->ses_lock);
> +       cifs_dbg(FYI, "%s: channel connect bitmaps: 0x%lx 0x%lx\n",
> +                __func__,
> +                ses->chans_need_reconnect,
> +                ses->chans_in_reconnect);
> +
>         if (ses->ses_status != SES_GOOD &&
>             ses->ses_status != SES_NEW &&
>             ses->ses_status != SES_NEED_RECON) {
> @@ -3762,11 +3772,11 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
>         }
>
>         /* another process is in the processs of sess setup */
> -       while (cifs_chan_in_reconnect(ses, server)) {
> +       while (ses->chans_in_reconnect) {
>                 spin_unlock(&ses->chan_lock);
>                 spin_unlock(&ses->ses_lock);
>                 rc = wait_event_interruptible_timeout(ses->reconnect_q,
> -                                                     (!cifs_chan_in_reconnect(ses, server)),
> +                                                     (!ses->chans_in_reconnect),
>                                                       HZ);
>                 if (rc < 0) {
>                         cifs_dbg(FYI, "%s: aborting sess setup due to a received signal by the process\n",
> @@ -3776,8 +3786,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
>                 spin_lock(&ses->ses_lock);
>                 spin_lock(&ses->chan_lock);
>
> -               /* are we still trying to reconnect? */
> -               if (!cifs_chan_in_reconnect(ses, server)) {
> +               /* did the bitmask change? */
> +               if (!ses->chans_in_reconnect) {
>                         spin_unlock(&ses->chan_lock);
>                         spin_unlock(&ses->ses_lock);
>                         goto check_again;
> --
> 2.34.1
>
diff mbox series

Patch

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7b103f69432e..4ea1e51c3fa5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -222,6 +222,11 @@  cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server,
 		else
 			cifs_chan_set_need_reconnect(ses, server);
 
+		cifs_dbg(FYI, "%s: channel connect bitmaps: 0x%lx 0x%lx\n",
+			 __func__,
+			 ses->chans_need_reconnect,
+			 ses->chans_in_reconnect);
+
 		/* If all channels need reconnect, then tcon needs reconnect */
 		if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) {
 			spin_unlock(&ses->chan_lock);
@@ -3744,6 +3749,11 @@  cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
 	retries = 5;
 
 	spin_lock(&ses->ses_lock);
+	cifs_dbg(FYI, "%s: channel connect bitmaps: 0x%lx 0x%lx\n",
+		 __func__,
+		 ses->chans_need_reconnect,
+		 ses->chans_in_reconnect);
+
 	if (ses->ses_status != SES_GOOD &&
 	    ses->ses_status != SES_NEW &&
 	    ses->ses_status != SES_NEED_RECON) {
@@ -3762,11 +3772,11 @@  cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
 	}
 
 	/* another process is in the processs of sess setup */
-	while (cifs_chan_in_reconnect(ses, server)) {
+	while (ses->chans_in_reconnect) {
 		spin_unlock(&ses->chan_lock);
 		spin_unlock(&ses->ses_lock);
 		rc = wait_event_interruptible_timeout(ses->reconnect_q,
-						      (!cifs_chan_in_reconnect(ses, server)),
+						      (!ses->chans_in_reconnect),
 						      HZ);
 		if (rc < 0) {
 			cifs_dbg(FYI, "%s: aborting sess setup due to a received signal by the process\n",
@@ -3776,8 +3786,8 @@  cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
 		spin_lock(&ses->ses_lock);
 		spin_lock(&ses->chan_lock);
 
-		/* are we still trying to reconnect? */
-		if (!cifs_chan_in_reconnect(ses, server)) {
+		/* did the bitmask change? */
+		if (!ses->chans_in_reconnect) {
 			spin_unlock(&ses->chan_lock);
 			spin_unlock(&ses->ses_lock);
 			goto check_again;