[v2] CIFS: Fix a possible memory corruption during reconnect
diff mbox

Message ID 1478820683-2954-1-git-send-email-pshilov@microsoft.com
State New
Headers show

Commit Message

Pavel Shilovsky Nov. 10, 2016, 11:31 p.m. UTC
We can not unlock/lock cifs_tcp_ses_lock while walking through ses
and tcon lists because it can corrupt list iterator pointers and
a tcon structure can be released if we don't hold an extra reference.
Fix it by moving a reconnect process to a separate delayed work
and acquiring a reference to every tcon that needs to be reconnected.
Also do not send an echo request on newly established connections.

CC: Stable <stable@vger.kernel.org>
Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/cifsglob.h  |  3 +++
 fs/cifs/cifsproto.h |  3 +++
 fs/cifs/connect.c   | 34 +++++++++++++++++++-----
 fs/cifs/smb2pdu.c   | 75 ++++++++++++++++++++++++++++++++++++-----------------
 fs/cifs/smb2proto.h |  1 +
 5 files changed, 85 insertions(+), 31 deletions(-)

Comments

Pavel Shilovsky Nov. 23, 2016, 8:07 p.m. UTC | #1
2016-11-10 15:31 GMT-08:00 Pavel Shilovsky <piastryyy@gmail.com>:
> We can not unlock/lock cifs_tcp_ses_lock while walking through ses
> and tcon lists because it can corrupt list iterator pointers and
> a tcon structure can be released if we don't hold an extra reference.
> Fix it by moving a reconnect process to a separate delayed work
> and acquiring a reference to every tcon that needs to be reconnected.
> Also do not send an echo request on newly established connections.
>
> CC: Stable <stable@vger.kernel.org>
> Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
> ---
>  fs/cifs/cifsglob.h  |  3 +++
>  fs/cifs/cifsproto.h |  3 +++
>  fs/cifs/connect.c   | 34 +++++++++++++++++++-----
>  fs/cifs/smb2pdu.c   | 75 ++++++++++++++++++++++++++++++++++++-----------------
>  fs/cifs/smb2proto.h |  1 +
>  5 files changed, 85 insertions(+), 31 deletions(-)
>
> diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
> index 1f17f6b..6dcefc8 100644
> --- a/fs/cifs/cifsglob.h
> +++ b/fs/cifs/cifsglob.h
> @@ -632,6 +632,7 @@ struct TCP_Server_Info {
>         bool    sec_mskerberos;         /* supports legacy MS Kerberos */
>         bool    large_buf;              /* is current buffer large? */
>         struct delayed_work     echo; /* echo ping workqueue job */
> +       struct delayed_work     reconnect; /* reconnect workqueue job */
>         char    *smallbuf;      /* pointer to current "small" buffer */
>         char    *bigbuf;        /* pointer to current "big" buffer */
>         unsigned int total_read; /* total amount of data read in this pass */
> @@ -648,6 +649,7 @@ struct TCP_Server_Info {
>         __u8            preauth_hash[512];
>  #endif /* CONFIG_CIFS_SMB2 */
>         unsigned long echo_interval;
> +       struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
>  };
>
>  static inline unsigned int
> @@ -850,6 +852,7 @@ struct cifs_tcon {
>         struct list_head tcon_list;
>         int tc_count;
>         struct list_head openFileList;
> +       struct list_head rlist; /* reconnect list */
>         spinlock_t open_file_lock; /* protects list above */
>         struct cifs_ses *ses;   /* pointer to session associated with */
>         char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
> diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
> index ced0e42..cd8025a 100644
> --- a/fs/cifs/cifsproto.h
> +++ b/fs/cifs/cifsproto.h
> @@ -206,6 +206,9 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
>                                          struct tcon_link *tlink,
>                                          struct cifs_pending_open *open);
>  extern void cifs_del_pending_open(struct cifs_pending_open *open);
> +extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
> +                                int from_reconnect);
> +extern void cifs_put_tcon(struct cifs_tcon *tcon);
>
>  #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)
>  extern void cifs_dfs_release_automount_timer(void);
> diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
> index 4547aed..75503af 100644
> --- a/fs/cifs/connect.c
> +++ b/fs/cifs/connect.c
> @@ -52,6 +52,9 @@
>  #include "nterr.h"
>  #include "rfc1002pdu.h"
>  #include "fscache.h"
> +#ifdef CONFIG_CIFS_SMB2
> +#include "smb2proto.h"
> +#endif
>
>  #define CIFS_PORT 445
>  #define RFC1001_PORT 139
> @@ -2100,8 +2103,8 @@ cifs_find_tcp_session(struct smb_vol *vol)
>         return NULL;
>  }
>
> -static void
> -cifs_put_tcp_session(struct TCP_Server_Info *server)
> +void
> +cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
>  {
>         struct task_struct *task;
>
> @@ -2118,6 +2121,19 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
>
>         cancel_delayed_work_sync(&server->echo);
>
> +#ifdef CONFIG_CIFS_SMB2
> +       if (from_reconnect)
> +               /*
> +                * Avoid deadlock here: reconnect work calls
> +                * cifs_put_tcp_session() at its end. Need to be sure
> +                * that reconnect work does nothing with server pointer after
> +                * that step.
> +                */
> +               cancel_delayed_work(&server->reconnect);
> +       else
> +               cancel_delayed_work_sync(&server->reconnect);
> +#endif
> +
>         spin_lock(&GlobalMid_Lock);
>         server->tcpStatus = CifsExiting;
>         spin_unlock(&GlobalMid_Lock);
> @@ -2171,6 +2187,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
>         init_waitqueue_head(&tcp_ses->request_q);
>         INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
>         mutex_init(&tcp_ses->srv_mutex);
> +       mutex_init(&tcp_ses->reconnect_mutex);
>         memcpy(tcp_ses->workstation_RFC1001_name,
>                 volume_info->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
>         memcpy(tcp_ses->server_RFC1001_name,
> @@ -2182,6 +2199,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
>         INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
>         INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
>         INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
> +#ifdef CONFIG_CIFS_SMB2
> +       INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
> +#endif
>         memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr,
>                sizeof(tcp_ses->srcaddr));
>         memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
> @@ -2340,7 +2360,7 @@ cifs_put_smb_ses(struct cifs_ses *ses)
>         spin_unlock(&cifs_tcp_ses_lock);
>
>         sesInfoFree(ses);
> -       cifs_put_tcp_session(server);
> +       cifs_put_tcp_session(server, 0);
>  }
>
>  #ifdef CONFIG_KEYS
> @@ -2514,7 +2534,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
>                 mutex_unlock(&ses->session_mutex);
>
>                 /* existing SMB ses has a server reference already */
> -               cifs_put_tcp_session(server);
> +               cifs_put_tcp_session(server, 0);
>                 free_xid(xid);
>                 return ses;
>         }
> @@ -2604,7 +2624,7 @@ cifs_find_tcon(struct cifs_ses *ses, const char *unc)
>         return NULL;
>  }
>
> -static void
> +void
>  cifs_put_tcon(struct cifs_tcon *tcon)
>  {
>         unsigned int xid;
> @@ -3792,7 +3812,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
>                 else if (ses)
>                         cifs_put_smb_ses(ses);
>                 else
> -                       cifs_put_tcp_session(server);
> +                       cifs_put_tcp_session(server, 0);
>                 bdi_destroy(&cifs_sb->bdi);
>         }
>
> @@ -4103,7 +4123,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
>         ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info);
>         if (IS_ERR(ses)) {
>                 tcon = (struct cifs_tcon *)ses;
> -               cifs_put_tcp_session(master_tcon->ses->server);
> +               cifs_put_tcp_session(master_tcon->ses->server, 0);
>                 goto out;
>         }
>
> diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
> index 5ca5ea46..950dbf7 100644
> --- a/fs/cifs/smb2pdu.c
> +++ b/fs/cifs/smb2pdu.c
> @@ -1972,6 +1972,54 @@ smb2_echo_callback(struct mid_q_entry *mid)
>         add_credits(server, credits_received, CIFS_ECHO_OP);
>  }
>
> +void smb2_reconnect_server(struct work_struct *work)
> +{
> +       struct TCP_Server_Info *server = container_of(work,
> +                                       struct TCP_Server_Info, reconnect.work);
> +       struct cifs_ses *ses;
> +       struct cifs_tcon *tcon, *tcon2;
> +       struct list_head tmp_list;
> +       int tcon_exist = false;
> +
> +       /* Prevent simultaneous reconnects that can corrupt tcon->rlist list */
> +       mutex_lock(&server->reconnect_mutex);
> +
> +       INIT_LIST_HEAD(&tmp_list);
> +       cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
> +
> +       spin_lock(&cifs_tcp_ses_lock);
> +       list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
> +               list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
> +                       if (tcon && tcon->need_reconnect) {
> +                               tcon->tc_count++;
> +                               list_add_tail(&tcon->rlist, &tmp_list);
> +                               tcon_exist = true;
> +                       }
> +               }
> +       }
> +       /*
> +        * Get the reference to server struct to be sure that the last call of
> +        * cifs_put_tcon() in the loop below won't release the server pointer.
> +        */
> +       if (tcon_exist)
> +               server->srv_count++;
> +
> +       spin_unlock(&cifs_tcp_ses_lock);
> +
> +       list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) {
> +               smb2_reconnect(SMB2_ECHO, tcon);
> +               list_del_init(&tcon->rlist);
> +               cifs_put_tcon(tcon);
> +       }
> +
> +       cifs_dbg(FYI, "Reconnecting tcons finished\n");
> +       mutex_unlock(&server->reconnect_mutex);
> +
> +       /* now we can safely release srv struct */
> +       if (tcon_exist)
> +               cifs_put_tcp_session(server, 1);
> +}
> +
>  int
>  SMB2_echo(struct TCP_Server_Info *server)
>  {
> @@ -1984,32 +2032,11 @@ SMB2_echo(struct TCP_Server_Info *server)
>         cifs_dbg(FYI, "In echo request\n");
>
>         if (server->tcpStatus == CifsNeedNegotiate) {
> -               struct list_head *tmp, *tmp2;
> -               struct cifs_ses *ses;
> -               struct cifs_tcon *tcon;
> -
> -               cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
> -               spin_lock(&cifs_tcp_ses_lock);
> -               list_for_each(tmp, &server->smb_ses_list) {
> -                       ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
> -                       list_for_each(tmp2, &ses->tcon_list) {
> -                               tcon = list_entry(tmp2, struct cifs_tcon,
> -                                                 tcon_list);
> -                               /* add check for persistent handle reconnect */
> -                               if (tcon && tcon->need_reconnect) {
> -                                       spin_unlock(&cifs_tcp_ses_lock);
> -                                       rc = smb2_reconnect(SMB2_ECHO, tcon);
> -                                       spin_lock(&cifs_tcp_ses_lock);
> -                               }
> -                       }
> -               }
> -               spin_unlock(&cifs_tcp_ses_lock);
> +               /* No need to send echo on newly established connections */
> +               mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
> +               return rc;
>         }
>
> -       /* if no session, renegotiate failed above */
> -       if (server->tcpStatus == CifsNeedNegotiate)
> -               return -EIO;
> -
>         rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
>         if (rc)
>                 return rc;
> diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
> index eb2cde2..f2d511a 100644
> --- a/fs/cifs/smb2proto.h
> +++ b/fs/cifs/smb2proto.h
> @@ -96,6 +96,7 @@ extern int smb2_open_file(const unsigned int xid,
>  extern int smb2_unlock_range(struct cifsFileInfo *cfile,
>                              struct file_lock *flock, const unsigned int xid);
>  extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
> +extern void smb2_reconnect_server(struct work_struct *work);
>
>  /*
>   * SMB2 Worker functions - most of protocol specific implementation details
> --
> 2.7.4
>

Any comments on this patch?
Aurelien Aptel Nov. 24, 2016, 4:43 p.m. UTC | #2
Hi Pavel,

Pavel Shilovsky <piastryyy@gmail.com> writes:
> 2016-11-10 15:31 GMT-08:00 Pavel Shilovsky <piastryyy@gmail.com>:
>> We can not unlock/lock cifs_tcp_ses_lock while walking through ses
>> and tcon lists because it can corrupt list iterator pointers and
>> a tcon structure can be released if we don't hold an extra reference.
>> Fix it by moving a reconnect process to a separate delayed work
>> and acquiring a reference to every tcon that needs to be reconnected.
>> Also do not send an echo request on newly established connections.

I don't fully understand what's going on here but I've successfully
tested your patch.

I've applied your patch and triggered a reconnexion on a smb2 mount by
virtually unplugging/waiting/replugging the network cable (via qemu
set_link <name> on/off). I did not notice any issues.

Let us know if you have a better scenario to test this or a way to
reproduce the previous issue.

Tested-by: Aurelien Aptel <aaptel@suse.com>
Pavel Shilovsky Nov. 28, 2016, 7:53 p.m. UTC | #3
2016-11-24 8:43 GMT-08:00 Aurélien Aptel <aaptel@suse.com>:
> Hi Pavel,
>
> Pavel Shilovsky <piastryyy@gmail.com> writes:
>> 2016-11-10 15:31 GMT-08:00 Pavel Shilovsky <piastryyy@gmail.com>:
>>> We can not unlock/lock cifs_tcp_ses_lock while walking through ses
>>> and tcon lists because it can corrupt list iterator pointers and
>>> a tcon structure can be released if we don't hold an extra reference.
>>> Fix it by moving a reconnect process to a separate delayed work
>>> and acquiring a reference to every tcon that needs to be reconnected.
>>> Also do not send an echo request on newly established connections.
>
> I don't fully understand what's going on here but I've successfully
> tested your patch.
>
> I've applied your patch and triggered a reconnexion on a smb2 mount by
> virtually unplugging/waiting/replugging the network cable (via qemu
> set_link <name> on/off). I did not notice any issues.
>
> Let us know if you have a better scenario to test this or a way to
> reproduce the previous issue.
>
> Tested-by: Aurelien Aptel <aaptel@suse.com>
>
> --
> Aurélien Aptel / SUSE Labs Samba Team
> GPG: 1839 CB5F 9F5B FB9B AA97  8C99 03C8 A49B 521B D5D3
> SUSE Linux GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
> GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)

Thank you for testing this!

I don't have a scripted reproducer but sending something wrong to the
server during mounting was triggering this issue. For example, if we
send a wrong SMB2 header on Create request the kernel crashes after
several mount attempts.
Sachin Prabhu Nov. 30, 2016, 10 a.m. UTC | #4
On Thu, 2016-11-10 at 15:31 -0800, Pavel Shilovsky wrote:
> We can not unlock/lock cifs_tcp_ses_lock while walking through ses
> and tcon lists because it can corrupt list iterator pointers and
> a tcon structure can be released if we don't hold an extra reference.
> Fix it by moving a reconnect process to a separate delayed work
> and acquiring a reference to every tcon that needs to be reconnected.
> Also do not send an echo request on newly established connections.
> 
> CC: Stable <stable@vger.kernel.org>
> Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
> ---
>  fs/cifs/cifsglob.h  |  3 +++
>  fs/cifs/cifsproto.h |  3 +++
>  fs/cifs/connect.c   | 34 +++++++++++++++++++-----
>  fs/cifs/smb2pdu.c   | 75 ++++++++++++++++++++++++++++++++++++-------
> ----------
>  fs/cifs/smb2proto.h |  1 +
>  5 files changed, 85 insertions(+), 31 deletions(-)
> 
> diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
> index 1f17f6b..6dcefc8 100644
> --- a/fs/cifs/cifsglob.h
> +++ b/fs/cifs/cifsglob.h
> @@ -632,6 +632,7 @@ struct TCP_Server_Info {
>  	bool	sec_mskerberos;		/* supports
> legacy MS Kerberos */
>  	bool	large_buf;		/* is current buffer
> large? */
>  	struct delayed_work	echo; /* echo ping workqueue job
> */
> +	struct delayed_work	reconnect; /* reconnect workqueue
> job */
>  	char	*smallbuf;	/* pointer to current "small"
> buffer */
>  	char	*bigbuf;	/* pointer to current "big"
> buffer */
>  	unsigned int total_read; /* total amount of data read in
> this pass */
> @@ -648,6 +649,7 @@ struct TCP_Server_Info {
>  	__u8		preauth_hash[512];
>  #endif /* CONFIG_CIFS_SMB2 */
>  	unsigned long echo_interval;
> +	struct mutex reconnect_mutex; /* prevent simultaneous
> reconnects */
>  };
>  
>  static inline unsigned int
> @@ -850,6 +852,7 @@ struct cifs_tcon {
>  	struct list_head tcon_list;
>  	int tc_count;
>  	struct list_head openFileList;
> +	struct list_head rlist; /* reconnect list */
>  	spinlock_t open_file_lock; /* protects list above */
>  	struct cifs_ses *ses;	/* pointer to session
> associated with */
>  	char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in
> ASCII */
> diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
> index ced0e42..cd8025a 100644
> --- a/fs/cifs/cifsproto.h
> +++ b/fs/cifs/cifsproto.h
> @@ -206,6 +206,9 @@ extern void cifs_add_pending_open_locked(struct
> cifs_fid *fid,
>  					 struct tcon_link *tlink,
>  					 struct cifs_pending_open
> *open);
>  extern void cifs_del_pending_open(struct cifs_pending_open *open);
> +extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
> +				 int from_reconnect);
> +extern void cifs_put_tcon(struct cifs_tcon *tcon);
>  
>  #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)
>  extern void cifs_dfs_release_automount_timer(void);
> diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
> index 4547aed..75503af 100644
> --- a/fs/cifs/connect.c
> +++ b/fs/cifs/connect.c
> @@ -52,6 +52,9 @@
>  #include "nterr.h"
>  #include "rfc1002pdu.h"
>  #include "fscache.h"
> +#ifdef CONFIG_CIFS_SMB2
> +#include "smb2proto.h"
> +#endif
>  
>  #define CIFS_PORT 445
>  #define RFC1001_PORT 139
> @@ -2100,8 +2103,8 @@ cifs_find_tcp_session(struct smb_vol *vol)
>  	return NULL;
>  }
>  
> -static void
> -cifs_put_tcp_session(struct TCP_Server_Info *server)
> +void
> +cifs_put_tcp_session(struct TCP_Server_Info *server, int
> from_reconnect)
>  {
>  	struct task_struct *task;
>  
> @@ -2118,6 +2121,19 @@ cifs_put_tcp_session(struct TCP_Server_Info
> *server)
>  
>  	cancel_delayed_work_sync(&server->echo);
>  
> +#ifdef CONFIG_CIFS_SMB2
> +	if (from_reconnect)
> +		/*
> +		 * Avoid deadlock here: reconnect work calls
> +		 * cifs_put_tcp_session() at its end. Need to be
> sure
> +		 * that reconnect work does nothing with server
> pointer after
> +		 * that step.
> +		 */
> +		cancel_delayed_work(&server->reconnect);
> +	else
> +		cancel_delayed_work_sync(&server->reconnect);
> +#endif

Do you need the #ifdef CONFIG_CIFS_SMB2 here considering that there are
no such macro definitions around the declarations around delayed_work
reconnect in struct TCP_Server_Info. It would also lead to unused
variable in case the kernel is compiled without CONFIG_CIFS_SMB2.

> +
>  	spin_lock(&GlobalMid_Lock);
>  	server->tcpStatus = CifsExiting;
>  	spin_unlock(&GlobalMid_Lock);
> @@ -2171,6 +2187,7 @@ cifs_get_tcp_session(struct smb_vol
> *volume_info)
>  	init_waitqueue_head(&tcp_ses->request_q);
>  	INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
>  	mutex_init(&tcp_ses->srv_mutex);
> +	mutex_init(&tcp_ses->reconnect_mutex);
>  	memcpy(tcp_ses->workstation_RFC1001_name,
>  		volume_info->source_rfc1001_name,
> RFC1001_NAME_LEN_WITH_NULL);
>  	memcpy(tcp_ses->server_RFC1001_name,
> @@ -2182,6 +2199,9 @@ cifs_get_tcp_session(struct smb_vol
> *volume_info)
>  	INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
>  	INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
>  	INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
> +#ifdef CONFIG_CIFS_SMB2
> +	INIT_DELAYED_WORK(&tcp_ses->reconnect,
> smb2_reconnect_server);
> +#endif
>  	memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr,
>  	       sizeof(tcp_ses->srcaddr));
>  	memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
> @@ -2340,7 +2360,7 @@ cifs_put_smb_ses(struct cifs_ses *ses)
>  	spin_unlock(&cifs_tcp_ses_lock);
>  
>  	sesInfoFree(ses);
> -	cifs_put_tcp_session(server);
> +	cifs_put_tcp_session(server, 0);
>  }
>  
>  #ifdef CONFIG_KEYS
> @@ -2514,7 +2534,7 @@ cifs_get_smb_ses(struct TCP_Server_Info
> *server, struct smb_vol *volume_info)
>  		mutex_unlock(&ses->session_mutex);
>  
>  		/* existing SMB ses has a server reference already
> */
> -		cifs_put_tcp_session(server);
> +		cifs_put_tcp_session(server, 0);
>  		free_xid(xid);
>  		return ses;
>  	}
> @@ -2604,7 +2624,7 @@ cifs_find_tcon(struct cifs_ses *ses, const char
> *unc)
>  	return NULL;
>  }
>  
> -static void
> +void
>  cifs_put_tcon(struct cifs_tcon *tcon)
>  {
>  	unsigned int xid;
> @@ -3792,7 +3812,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct
> smb_vol *volume_info)
>  		else if (ses)
>  			cifs_put_smb_ses(ses);
>  		else
> -			cifs_put_tcp_session(server);
> +			cifs_put_tcp_session(server, 0);
>  		bdi_destroy(&cifs_sb->bdi);
>  	}
>  
> @@ -4103,7 +4123,7 @@ cifs_construct_tcon(struct cifs_sb_info
> *cifs_sb, kuid_t fsuid)
>  	ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info);
>  	if (IS_ERR(ses)) {
>  		tcon = (struct cifs_tcon *)ses;
> -		cifs_put_tcp_session(master_tcon->ses->server);
> +		cifs_put_tcp_session(master_tcon->ses->server, 0);
>  		goto out;
>  	}
>  
> diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
> index 5ca5ea46..950dbf7 100644
> --- a/fs/cifs/smb2pdu.c
> +++ b/fs/cifs/smb2pdu.c
> @@ -1972,6 +1972,54 @@ smb2_echo_callback(struct mid_q_entry *mid)
>  	add_credits(server, credits_received, CIFS_ECHO_OP);
>  }
>  
> +void smb2_reconnect_server(struct work_struct *work)
> +{
> +	struct TCP_Server_Info *server = container_of(work,
> +					struct TCP_Server_Info,
> reconnect.work);
> +	struct cifs_ses *ses;
> +	struct cifs_tcon *tcon, *tcon2;
> +	struct list_head tmp_list;
> +	int tcon_exist = false;
> +
> +	/* Prevent simultaneous reconnects that can corrupt tcon-
> >rlist list */
> +	mutex_lock(&server->reconnect_mutex);
> +
> +	INIT_LIST_HEAD(&tmp_list);
> +	cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
> +
> +	spin_lock(&cifs_tcp_ses_lock);
> +	list_for_each_entry(ses, &server->smb_ses_list,
> smb_ses_list) {
> +		list_for_each_entry(tcon, &ses->tcon_list,
> tcon_list) {
> +			if (tcon && tcon->need_reconnect) {
> +				tcon->tc_count++;
> +				list_add_tail(&tcon->rlist,
> &tmp_list);
> +				tcon_exist = true;
> +			}
> +		}
> +	}
> +	/*
> +	 * Get the reference to server struct to be sure that the
> last call of
> +	 * cifs_put_tcon() in the loop below won't release the
> server pointer.
> +	 */
> +	if (tcon_exist)
> +		server->srv_count++;
> +
> +	spin_unlock(&cifs_tcp_ses_lock);
> +
> +	list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) {
> +		smb2_reconnect(SMB2_ECHO, tcon);
> +		list_del_init(&tcon->rlist);
> +		cifs_put_tcon(tcon);
> +	}
> +
> +	cifs_dbg(FYI, "Reconnecting tcons finished\n");
> +	mutex_unlock(&server->reconnect_mutex);
> +
> +	/* now we can safely release srv struct */
> +	if (tcon_exist)
> +		cifs_put_tcp_session(server, 1);
> +}
> +
>  int
>  SMB2_echo(struct TCP_Server_Info *server)
>  {
> @@ -1984,32 +2032,11 @@ SMB2_echo(struct TCP_Server_Info *server)
>  	cifs_dbg(FYI, "In echo request\n");
>  
>  	if (server->tcpStatus == CifsNeedNegotiate) {
> -		struct list_head *tmp, *tmp2;
> -		struct cifs_ses *ses;
> -		struct cifs_tcon *tcon;
> -
> -		cifs_dbg(FYI, "Need negotiate, reconnecting
> tcons\n");
> -		spin_lock(&cifs_tcp_ses_lock);
> -		list_for_each(tmp, &server->smb_ses_list) {
> -			ses = list_entry(tmp, struct cifs_ses,
> smb_ses_list);
> -			list_for_each(tmp2, &ses->tcon_list) {
> -				tcon = list_entry(tmp2, struct
> cifs_tcon,
> -						  tcon_list);
> -				/* add check for persistent handle
> reconnect */
> -				if (tcon && tcon->need_reconnect) {
> -					spin_unlock(&cifs_tcp_ses_lo
> ck);
> -					rc =
> smb2_reconnect(SMB2_ECHO, tcon);
> -					spin_lock(&cifs_tcp_ses_lock
> );
> -				}
> -			}
> -		}
> -		spin_unlock(&cifs_tcp_ses_lock);
> +		/* No need to send echo on newly established
> connections */
> +		mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
> +		return rc;

Shouldn't this be queue_work?


>  	}
>  
> -	/* if no session, renegotiate failed above */
> -	if (server->tcpStatus == CifsNeedNegotiate)
> -		return -EIO;
> -
>  	rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
>  	if (rc)
>  		return rc;
> diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
> index eb2cde2..f2d511a 100644
> --- a/fs/cifs/smb2proto.h
> +++ b/fs/cifs/smb2proto.h
> @@ -96,6 +96,7 @@ extern int smb2_open_file(const unsigned int xid,
>  extern int smb2_unlock_range(struct cifsFileInfo *cfile,
>  			     struct file_lock *flock, const unsigned
> int xid);
>  extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
> +extern void smb2_reconnect_server(struct work_struct *work);
>  
>  /*
>   * SMB2 Worker functions - most of protocol specific implementation
> details

Sachin Prabhu
--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Shilovsky Nov. 30, 2016, 7:24 p.m. UTC | #5
2016-11-30 2:00 GMT-08:00 Sachin Prabhu <sprabhu@redhat.com>:
> On Thu, 2016-11-10 at 15:31 -0800, Pavel Shilovsky wrote:
>> We can not unlock/lock cifs_tcp_ses_lock while walking through ses
>> and tcon lists because it can corrupt list iterator pointers and
>> a tcon structure can be released if we don't hold an extra reference.
>> Fix it by moving a reconnect process to a separate delayed work
>> and acquiring a reference to every tcon that needs to be reconnected.
>> Also do not send an echo request on newly established connections.
>>
>> CC: Stable <stable@vger.kernel.org>
>> Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
>> ---
>>  fs/cifs/cifsglob.h  |  3 +++
>>  fs/cifs/cifsproto.h |  3 +++
>>  fs/cifs/connect.c   | 34 +++++++++++++++++++-----
>>  fs/cifs/smb2pdu.c   | 75 ++++++++++++++++++++++++++++++++++++-------
>> ----------
>>  fs/cifs/smb2proto.h |  1 +
>>  5 files changed, 85 insertions(+), 31 deletions(-)
>>
>> diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
>> index 1f17f6b..6dcefc8 100644
>> --- a/fs/cifs/cifsglob.h
>> +++ b/fs/cifs/cifsglob.h
>> @@ -632,6 +632,7 @@ struct TCP_Server_Info {
>>       bool    sec_mskerberos;         /* supports
>> legacy MS Kerberos */
>>       bool    large_buf;              /* is current buffer
>> large? */
>>       struct delayed_work     echo; /* echo ping workqueue job
>> */
>> +     struct delayed_work     reconnect; /* reconnect workqueue
>> job */
>>       char    *smallbuf;      /* pointer to current "small"
>> buffer */
>>       char    *bigbuf;        /* pointer to current "big"
>> buffer */
>>       unsigned int total_read; /* total amount of data read in
>> this pass */
>> @@ -648,6 +649,7 @@ struct TCP_Server_Info {
>>       __u8            preauth_hash[512];
>>  #endif /* CONFIG_CIFS_SMB2 */
>>       unsigned long echo_interval;
>> +     struct mutex reconnect_mutex; /* prevent simultaneous
>> reconnects */
>>  };
>>
>>  static inline unsigned int
>> @@ -850,6 +852,7 @@ struct cifs_tcon {
>>       struct list_head tcon_list;
>>       int tc_count;
>>       struct list_head openFileList;
>> +     struct list_head rlist; /* reconnect list */
>>       spinlock_t open_file_lock; /* protects list above */
>>       struct cifs_ses *ses;   /* pointer to session
>> associated with */
>>       char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in
>> ASCII */
>> diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
>> index ced0e42..cd8025a 100644
>> --- a/fs/cifs/cifsproto.h
>> +++ b/fs/cifs/cifsproto.h
>> @@ -206,6 +206,9 @@ extern void cifs_add_pending_open_locked(struct
>> cifs_fid *fid,
>>                                        struct tcon_link *tlink,
>>                                        struct cifs_pending_open
>> *open);
>>  extern void cifs_del_pending_open(struct cifs_pending_open *open);
>> +extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
>> +                              int from_reconnect);
>> +extern void cifs_put_tcon(struct cifs_tcon *tcon);
>>
>>  #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)
>>  extern void cifs_dfs_release_automount_timer(void);
>> diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
>> index 4547aed..75503af 100644
>> --- a/fs/cifs/connect.c
>> +++ b/fs/cifs/connect.c
>> @@ -52,6 +52,9 @@
>>  #include "nterr.h"
>>  #include "rfc1002pdu.h"
>>  #include "fscache.h"
>> +#ifdef CONFIG_CIFS_SMB2
>> +#include "smb2proto.h"
>> +#endif
>>
>>  #define CIFS_PORT 445
>>  #define RFC1001_PORT 139
>> @@ -2100,8 +2103,8 @@ cifs_find_tcp_session(struct smb_vol *vol)
>>       return NULL;
>>  }
>>
>> -static void
>> -cifs_put_tcp_session(struct TCP_Server_Info *server)
>> +void
>> +cifs_put_tcp_session(struct TCP_Server_Info *server, int
>> from_reconnect)
>>  {
>>       struct task_struct *task;
>>
>> @@ -2118,6 +2121,19 @@ cifs_put_tcp_session(struct TCP_Server_Info
>> *server)
>>
>>       cancel_delayed_work_sync(&server->echo);
>>
>> +#ifdef CONFIG_CIFS_SMB2
>> +     if (from_reconnect)
>> +             /*
>> +              * Avoid deadlock here: reconnect work calls
>> +              * cifs_put_tcp_session() at its end. Need to be
>> sure
>> +              * that reconnect work does nothing with server
>> pointer after
>> +              * that step.
>> +              */
>> +             cancel_delayed_work(&server->reconnect);
>> +     else
>> +             cancel_delayed_work_sync(&server->reconnect);
>> +#endif
>
> Do you need the #ifdef CONFIG_CIFS_SMB2 here considering that there are
> no such macro definitions around the declarations around delayed_work
> reconnect in struct TCP_Server_Info. It would also lead to unused
> variable in case the kernel is compiled without CONFIG_CIFS_SMB2.

Thank you for reviewing this.

I posted the next version of the patch yesterday ([PATCH 4/5] CIFS:
Fix a possible memory corruption during reconnect) that addresses this
problem by moving fields inside TCP_Server_Info to CONFIG_CIFS_SMB2.

>
>> +
>>       spin_lock(&GlobalMid_Lock);
>>       server->tcpStatus = CifsExiting;
>>       spin_unlock(&GlobalMid_Lock);
>> @@ -2171,6 +2187,7 @@ cifs_get_tcp_session(struct smb_vol
>> *volume_info)
>>       init_waitqueue_head(&tcp_ses->request_q);
>>       INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
>>       mutex_init(&tcp_ses->srv_mutex);
>> +     mutex_init(&tcp_ses->reconnect_mutex);
>>       memcpy(tcp_ses->workstation_RFC1001_name,
>>               volume_info->source_rfc1001_name,
>> RFC1001_NAME_LEN_WITH_NULL);
>>       memcpy(tcp_ses->server_RFC1001_name,
>> @@ -2182,6 +2199,9 @@ cifs_get_tcp_session(struct smb_vol
>> *volume_info)
>>       INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
>>       INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
>>       INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
>> +#ifdef CONFIG_CIFS_SMB2
>> +     INIT_DELAYED_WORK(&tcp_ses->reconnect,
>> smb2_reconnect_server);
>> +#endif
>>       memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr,
>>              sizeof(tcp_ses->srcaddr));
>>       memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
>> @@ -2340,7 +2360,7 @@ cifs_put_smb_ses(struct cifs_ses *ses)
>>       spin_unlock(&cifs_tcp_ses_lock);
>>
>>       sesInfoFree(ses);
>> -     cifs_put_tcp_session(server);
>> +     cifs_put_tcp_session(server, 0);
>>  }
>>
>>  #ifdef CONFIG_KEYS
>> @@ -2514,7 +2534,7 @@ cifs_get_smb_ses(struct TCP_Server_Info
>> *server, struct smb_vol *volume_info)
>>               mutex_unlock(&ses->session_mutex);
>>
>>               /* existing SMB ses has a server reference already
>> */
>> -             cifs_put_tcp_session(server);
>> +             cifs_put_tcp_session(server, 0);
>>               free_xid(xid);
>>               return ses;
>>       }
>> @@ -2604,7 +2624,7 @@ cifs_find_tcon(struct cifs_ses *ses, const char
>> *unc)
>>       return NULL;
>>  }
>>
>> -static void
>> +void
>>  cifs_put_tcon(struct cifs_tcon *tcon)
>>  {
>>       unsigned int xid;
>> @@ -3792,7 +3812,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct
>> smb_vol *volume_info)
>>               else if (ses)
>>                       cifs_put_smb_ses(ses);
>>               else
>> -                     cifs_put_tcp_session(server);
>> +                     cifs_put_tcp_session(server, 0);
>>               bdi_destroy(&cifs_sb->bdi);
>>       }
>>
>> @@ -4103,7 +4123,7 @@ cifs_construct_tcon(struct cifs_sb_info
>> *cifs_sb, kuid_t fsuid)
>>       ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info);
>>       if (IS_ERR(ses)) {
>>               tcon = (struct cifs_tcon *)ses;
>> -             cifs_put_tcp_session(master_tcon->ses->server);
>> +             cifs_put_tcp_session(master_tcon->ses->server, 0);
>>               goto out;
>>       }
>>
>> diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
>> index 5ca5ea46..950dbf7 100644
>> --- a/fs/cifs/smb2pdu.c
>> +++ b/fs/cifs/smb2pdu.c
>> @@ -1972,6 +1972,54 @@ smb2_echo_callback(struct mid_q_entry *mid)
>>       add_credits(server, credits_received, CIFS_ECHO_OP);
>>  }
>>
>> +void smb2_reconnect_server(struct work_struct *work)
>> +{
>> +     struct TCP_Server_Info *server = container_of(work,
>> +                                     struct TCP_Server_Info,
>> reconnect.work);
>> +     struct cifs_ses *ses;
>> +     struct cifs_tcon *tcon, *tcon2;
>> +     struct list_head tmp_list;
>> +     int tcon_exist = false;
>> +
>> +     /* Prevent simultaneous reconnects that can corrupt tcon-
>> >rlist list */
>> +     mutex_lock(&server->reconnect_mutex);
>> +
>> +     INIT_LIST_HEAD(&tmp_list);
>> +     cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
>> +
>> +     spin_lock(&cifs_tcp_ses_lock);
>> +     list_for_each_entry(ses, &server->smb_ses_list,
>> smb_ses_list) {
>> +             list_for_each_entry(tcon, &ses->tcon_list,
>> tcon_list) {
>> +                     if (tcon && tcon->need_reconnect) {
>> +                             tcon->tc_count++;
>> +                             list_add_tail(&tcon->rlist,
>> &tmp_list);
>> +                             tcon_exist = true;
>> +                     }
>> +             }
>> +     }
>> +     /*
>> +      * Get the reference to server struct to be sure that the
>> last call of
>> +      * cifs_put_tcon() in the loop below won't release the
>> server pointer.
>> +      */
>> +     if (tcon_exist)
>> +             server->srv_count++;
>> +
>> +     spin_unlock(&cifs_tcp_ses_lock);
>> +
>> +     list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) {
>> +             smb2_reconnect(SMB2_ECHO, tcon);
>> +             list_del_init(&tcon->rlist);
>> +             cifs_put_tcon(tcon);
>> +     }
>> +
>> +     cifs_dbg(FYI, "Reconnecting tcons finished\n");
>> +     mutex_unlock(&server->reconnect_mutex);
>> +
>> +     /* now we can safely release srv struct */
>> +     if (tcon_exist)
>> +             cifs_put_tcp_session(server, 1);
>> +}
>> +
>>  int
>>  SMB2_echo(struct TCP_Server_Info *server)
>>  {
>> @@ -1984,32 +2032,11 @@ SMB2_echo(struct TCP_Server_Info *server)
>>       cifs_dbg(FYI, "In echo request\n");
>>
>>       if (server->tcpStatus == CifsNeedNegotiate) {
>> -             struct list_head *tmp, *tmp2;
>> -             struct cifs_ses *ses;
>> -             struct cifs_tcon *tcon;
>> -
>> -             cifs_dbg(FYI, "Need negotiate, reconnecting
>> tcons\n");
>> -             spin_lock(&cifs_tcp_ses_lock);
>> -             list_for_each(tmp, &server->smb_ses_list) {
>> -                     ses = list_entry(tmp, struct cifs_ses,
>> smb_ses_list);
>> -                     list_for_each(tmp2, &ses->tcon_list) {
>> -                             tcon = list_entry(tmp2, struct
>> cifs_tcon,
>> -                                               tcon_list);
>> -                             /* add check for persistent handle
>> reconnect */
>> -                             if (tcon && tcon->need_reconnect) {
>> -                                     spin_unlock(&cifs_tcp_ses_lo
>> ck);
>> -                                     rc =
>> smb2_reconnect(SMB2_ECHO, tcon);
>> -                                     spin_lock(&cifs_tcp_ses_lock
>> );
>> -                             }
>> -                     }
>> -             }
>> -             spin_unlock(&cifs_tcp_ses_lock);
>> +             /* No need to send echo on newly established
>> connections */
>> +             mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
>> +             return rc;
>
> Shouldn't this be queue_work?

It is made it this way because of the following: if someone submitted
the work with a delay previously (now the code doesn't do it but it is
possible in the future), we want to overwrite this delay to 0 since we
need an immediate session/tcon reconnect here.

>>       }
>>
>> -     /* if no session, renegotiate failed above */
>> -     if (server->tcpStatus == CifsNeedNegotiate)
>> -             return -EIO;
>> -
>>       rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
>>       if (rc)
>>               return rc;
>> diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
>> index eb2cde2..f2d511a 100644
>> --- a/fs/cifs/smb2proto.h
>> +++ b/fs/cifs/smb2proto.h
>> @@ -96,6 +96,7 @@ extern int smb2_open_file(const unsigned int xid,
>>  extern int smb2_unlock_range(struct cifsFileInfo *cfile,
>>                            struct file_lock *flock, const unsigned
>> int xid);
>>  extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
>> +extern void smb2_reconnect_server(struct work_struct *work);
>>
>>  /*
>>   * SMB2 Worker functions - most of protocol specific implementation
>> details
>
> Sachin Prabhu
Sachin Prabhu Dec. 1, 2016, 9:35 a.m. UTC | #6
On Wed, 2016-11-30 at 11:24 -0800, Pavel Shilovsky wrote:

> > Do you need the #ifdef CONFIG_CIFS_SMB2 here considering that there
> > are
> > no such macro definitions around the declarations around
> > delayed_work
> > reconnect in struct TCP_Server_Info. It would also lead to unused
> > variable in case the kernel is compiled without CONFIG_CIFS_SMB2.
> 
> Thank you for reviewing this.
> 
> I posted the next version of the patch yesterday ([PATCH 4/5] CIFS:
> Fix a possible memory corruption during reconnect) that addresses
> this
> problem by moving fields inside TCP_Server_Info to CONFIG_CIFS_SMB2.
> 

Thanks. I'll review the patch separately.


> > > +             /* No need to send echo on newly established
> > > connections */
> > > +             mod_delayed_work(cifsiod_wq, &server->reconnect,
> > > 0);
> > > +             return rc;
> > 
> > Shouldn't this be queue_work?
> 
> It is made it this way because of the following: if someone submitted
> the work with a delay previously (now the code doesn't do it but it
> is
> possible in the future), we want to overwrite this delay to 0 since
> we
> need an immediate session/tcon reconnect here.


OK. It should work without any problem but I would have stuck to the
expected functions and use mod_delayed_work() only when new code which
queues the work separately has been added. It is strange to see only
mod_delayed_work() when the work isn't queued anywhere else. Apart from
that small concern, I am fine with it.

Sachin Prabhu
--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Shilovsky Dec. 5, 2016, 9:39 p.m. UTC | #7
2016-12-01 1:35 GMT-08:00 Sachin Prabhu <sprabhu@redhat.com>:
> On Wed, 2016-11-30 at 11:24 -0800, Pavel Shilovsky wrote:
>>
>> > Do you need the #ifdef CONFIG_CIFS_SMB2 here considering that there
>> > are
>> > no such macro definitions around the declarations around
>> > delayed_work
>> > reconnect in struct TCP_Server_Info. It would also lead to unused
>> > variable in case the kernel is compiled without CONFIG_CIFS_SMB2.
>>
>> Thank you for reviewing this.
>>
>> I posted the next version of the patch yesterday ([PATCH 4/5] CIFS:
>> Fix a possible memory corruption during reconnect) that addresses
>> this
>> problem by moving fields inside TCP_Server_Info to CONFIG_CIFS_SMB2.
>>
>
> Thanks. I'll review the patch separately.
>
>
>> > > +             /* No need to send echo on newly established
>> > > connections */
>> > > +             mod_delayed_work(cifsiod_wq, &server->reconnect,
>> > > 0);
>> > > +             return rc;
>> >
>> > Shouldn't this be queue_work?
>>
>> It is made it this way because of the following: if someone submitted
>> the work with a delay previously (now the code doesn't do it but it
>> is
>> possible in the future), we want to overwrite this delay to 0 since
>> we
>> need an immediate session/tcon reconnect here.
>>
>
> OK. It should work without any problem but I would have stuck to the
> expected functions and use mod_delayed_work() only when new code which
> queues the work separately has been added. It is strange to see only
> mod_delayed_work() when the work isn't queued anywhere else. Apart from
> that small concern, I am fine with it.
>
> Sachin Prabhu

Ok, it makes sense. I have posted the next version (v2) of the
patchset that addresses your suggestions to the list.

Patch
diff mbox

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1f17f6b..6dcefc8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -632,6 +632,7 @@  struct TCP_Server_Info {
 	bool	sec_mskerberos;		/* supports legacy MS Kerberos */
 	bool	large_buf;		/* is current buffer large? */
 	struct delayed_work	echo; /* echo ping workqueue job */
+	struct delayed_work	reconnect; /* reconnect workqueue job */
 	char	*smallbuf;	/* pointer to current "small" buffer */
 	char	*bigbuf;	/* pointer to current "big" buffer */
 	unsigned int total_read; /* total amount of data read in this pass */
@@ -648,6 +649,7 @@  struct TCP_Server_Info {
 	__u8		preauth_hash[512];
 #endif /* CONFIG_CIFS_SMB2 */
 	unsigned long echo_interval;
+	struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
 };
 
 static inline unsigned int
@@ -850,6 +852,7 @@  struct cifs_tcon {
 	struct list_head tcon_list;
 	int tc_count;
 	struct list_head openFileList;
+	struct list_head rlist; /* reconnect list */
 	spinlock_t open_file_lock; /* protects list above */
 	struct cifs_ses *ses;	/* pointer to session associated with */
 	char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index ced0e42..cd8025a 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -206,6 +206,9 @@  extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
 					 struct tcon_link *tlink,
 					 struct cifs_pending_open *open);
 extern void cifs_del_pending_open(struct cifs_pending_open *open);
+extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
+				 int from_reconnect);
+extern void cifs_put_tcon(struct cifs_tcon *tcon);
 
 #if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)
 extern void cifs_dfs_release_automount_timer(void);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4547aed..75503af 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -52,6 +52,9 @@ 
 #include "nterr.h"
 #include "rfc1002pdu.h"
 #include "fscache.h"
+#ifdef CONFIG_CIFS_SMB2
+#include "smb2proto.h"
+#endif
 
 #define CIFS_PORT 445
 #define RFC1001_PORT 139
@@ -2100,8 +2103,8 @@  cifs_find_tcp_session(struct smb_vol *vol)
 	return NULL;
 }
 
-static void
-cifs_put_tcp_session(struct TCP_Server_Info *server)
+void
+cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
 {
 	struct task_struct *task;
 
@@ -2118,6 +2121,19 @@  cifs_put_tcp_session(struct TCP_Server_Info *server)
 
 	cancel_delayed_work_sync(&server->echo);
 
+#ifdef CONFIG_CIFS_SMB2
+	if (from_reconnect)
+		/*
+		 * Avoid deadlock here: reconnect work calls
+		 * cifs_put_tcp_session() at its end. Need to be sure
+		 * that reconnect work does nothing with server pointer after
+		 * that step.
+		 */
+		cancel_delayed_work(&server->reconnect);
+	else
+		cancel_delayed_work_sync(&server->reconnect);
+#endif
+
 	spin_lock(&GlobalMid_Lock);
 	server->tcpStatus = CifsExiting;
 	spin_unlock(&GlobalMid_Lock);
@@ -2171,6 +2187,7 @@  cifs_get_tcp_session(struct smb_vol *volume_info)
 	init_waitqueue_head(&tcp_ses->request_q);
 	INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
 	mutex_init(&tcp_ses->srv_mutex);
+	mutex_init(&tcp_ses->reconnect_mutex);
 	memcpy(tcp_ses->workstation_RFC1001_name,
 		volume_info->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
 	memcpy(tcp_ses->server_RFC1001_name,
@@ -2182,6 +2199,9 @@  cifs_get_tcp_session(struct smb_vol *volume_info)
 	INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
 	INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
 	INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
+#ifdef CONFIG_CIFS_SMB2
+	INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
+#endif
 	memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr,
 	       sizeof(tcp_ses->srcaddr));
 	memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
@@ -2340,7 +2360,7 @@  cifs_put_smb_ses(struct cifs_ses *ses)
 	spin_unlock(&cifs_tcp_ses_lock);
 
 	sesInfoFree(ses);
-	cifs_put_tcp_session(server);
+	cifs_put_tcp_session(server, 0);
 }
 
 #ifdef CONFIG_KEYS
@@ -2514,7 +2534,7 @@  cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 		mutex_unlock(&ses->session_mutex);
 
 		/* existing SMB ses has a server reference already */
-		cifs_put_tcp_session(server);
+		cifs_put_tcp_session(server, 0);
 		free_xid(xid);
 		return ses;
 	}
@@ -2604,7 +2624,7 @@  cifs_find_tcon(struct cifs_ses *ses, const char *unc)
 	return NULL;
 }
 
-static void
+void
 cifs_put_tcon(struct cifs_tcon *tcon)
 {
 	unsigned int xid;
@@ -3792,7 +3812,7 @@  cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
 		else if (ses)
 			cifs_put_smb_ses(ses);
 		else
-			cifs_put_tcp_session(server);
+			cifs_put_tcp_session(server, 0);
 		bdi_destroy(&cifs_sb->bdi);
 	}
 
@@ -4103,7 +4123,7 @@  cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
 	ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info);
 	if (IS_ERR(ses)) {
 		tcon = (struct cifs_tcon *)ses;
-		cifs_put_tcp_session(master_tcon->ses->server);
+		cifs_put_tcp_session(master_tcon->ses->server, 0);
 		goto out;
 	}
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5ca5ea46..950dbf7 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1972,6 +1972,54 @@  smb2_echo_callback(struct mid_q_entry *mid)
 	add_credits(server, credits_received, CIFS_ECHO_OP);
 }
 
+void smb2_reconnect_server(struct work_struct *work)
+{
+	struct TCP_Server_Info *server = container_of(work,
+					struct TCP_Server_Info, reconnect.work);
+	struct cifs_ses *ses;
+	struct cifs_tcon *tcon, *tcon2;
+	struct list_head tmp_list;
+	int tcon_exist = false;
+
+	/* Prevent simultaneous reconnects that can corrupt tcon->rlist list */
+	mutex_lock(&server->reconnect_mutex);
+
+	INIT_LIST_HEAD(&tmp_list);
+	cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
+
+	spin_lock(&cifs_tcp_ses_lock);
+	list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+		list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+			if (tcon && tcon->need_reconnect) {
+				tcon->tc_count++;
+				list_add_tail(&tcon->rlist, &tmp_list);
+				tcon_exist = true;
+			}
+		}
+	}
+	/*
+	 * Get the reference to server struct to be sure that the last call of
+	 * cifs_put_tcon() in the loop below won't release the server pointer.
+	 */
+	if (tcon_exist)
+		server->srv_count++;
+
+	spin_unlock(&cifs_tcp_ses_lock);
+
+	list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) {
+		smb2_reconnect(SMB2_ECHO, tcon);
+		list_del_init(&tcon->rlist);
+		cifs_put_tcon(tcon);
+	}
+
+	cifs_dbg(FYI, "Reconnecting tcons finished\n");
+	mutex_unlock(&server->reconnect_mutex);
+
+	/* now we can safely release srv struct */
+	if (tcon_exist)
+		cifs_put_tcp_session(server, 1);
+}
+
 int
 SMB2_echo(struct TCP_Server_Info *server)
 {
@@ -1984,32 +2032,11 @@  SMB2_echo(struct TCP_Server_Info *server)
 	cifs_dbg(FYI, "In echo request\n");
 
 	if (server->tcpStatus == CifsNeedNegotiate) {
-		struct list_head *tmp, *tmp2;
-		struct cifs_ses *ses;
-		struct cifs_tcon *tcon;
-
-		cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
-		spin_lock(&cifs_tcp_ses_lock);
-		list_for_each(tmp, &server->smb_ses_list) {
-			ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
-			list_for_each(tmp2, &ses->tcon_list) {
-				tcon = list_entry(tmp2, struct cifs_tcon,
-						  tcon_list);
-				/* add check for persistent handle reconnect */
-				if (tcon && tcon->need_reconnect) {
-					spin_unlock(&cifs_tcp_ses_lock);
-					rc = smb2_reconnect(SMB2_ECHO, tcon);
-					spin_lock(&cifs_tcp_ses_lock);
-				}
-			}
-		}
-		spin_unlock(&cifs_tcp_ses_lock);
+		/* No need to send echo on newly established connections */
+		mod_delayed_work(cifsiod_wq, &server->reconnect, 0);
+		return rc;
 	}
 
-	/* if no session, renegotiate failed above */
-	if (server->tcpStatus == CifsNeedNegotiate)
-		return -EIO;
-
 	rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
 	if (rc)
 		return rc;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index eb2cde2..f2d511a 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -96,6 +96,7 @@  extern int smb2_open_file(const unsigned int xid,
 extern int smb2_unlock_range(struct cifsFileInfo *cfile,
 			     struct file_lock *flock, const unsigned int xid);
 extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
+extern void smb2_reconnect_server(struct work_struct *work);
 
 /*
  * SMB2 Worker functions - most of protocol specific implementation details