diff mbox

libiscsi: Fix host busy blocking during connection teardown

Message ID 1435108318-13625-1-git-send-email-sony.john@avagotech.com (mailing list archive)
State New, archived
Headers show

Commit Message

John Soni Jose June 24, 2015, 1:11 a.m. UTC
Issue:
 In case of hw iscsi offload, an host can have N-number of active
 connections. There can be IO's running on some connections which
 make host->host_busy always TRUE. Now if logout from a connection
 is tried then the code gets into an infinite loop as host->host_busy
 is always TRUE.

 iscsi_conn_teardown(....)
 {
   .........
    /*
     * Block until all in-progress commands for this connection
     * time out or fail.
     */
     for (;;) {
      spin_lock_irqsave(session->host->host_lock, flags);
      if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */
	      spin_unlock_irqrestore(session->host->host_lock, flags);
              break;
      }
     spin_unlock_irqrestore(session->host->host_lock, flags);
     msleep_interruptible(500);
     iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): "
                 "host_busy %d host_failed %d\n",
	          atomic_read(&session->host->host_busy),
	          session->host->host_failed);

	................
	...............
     }
  }
  This is not an issue with software-iscsi/iser as each cxn is a separate
  host.

 Fix:
 Acquiring eh_mutex in iscsi_conn_teardown() before setting
 session->state = ISCSI_STATE_TERMINATE.

Signed-off-by: John Soni Jose <sony.john@aavagotech.com>
---
 drivers/scsi/libiscsi.c | 25 ++-----------------------
 1 file changed, 2 insertions(+), 23 deletions(-)

Comments

Mike Christie July 2, 2015, 8:16 p.m. UTC | #1
On 6/23/15, 8:11 PM, John Soni Jose wrote:
>   Issue:
>   In case of hw iscsi offload, an host can have N-number of active
>   connections. There can be IO's running on some connections which
>   make host->host_busy always TRUE. Now if logout from a connection
>   is tried then the code gets into an infinite loop as host->host_busy
>   is always TRUE.
>
>   iscsi_conn_teardown(....)
>   {
>     .........
>      /*
>       * Block until all in-progress commands for this connection
>       * time out or fail.
>       */
>       for (;;) {
>        spin_lock_irqsave(session->host->host_lock, flags);
>        if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */
> 	      spin_unlock_irqrestore(session->host->host_lock, flags);
>                break;
>        }
>       spin_unlock_irqrestore(session->host->host_lock, flags);
>       msleep_interruptible(500);
>       iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): "
>                   "host_busy %d host_failed %d\n",
> 	          atomic_read(&session->host->host_busy),
> 	          session->host->host_failed);
>
> 	................
> 	...............
>       }
>    }
>    This is not an issue with software-iscsi/iser as each cxn is a separate
>    host.
>
>   Fix:
>   Acquiring eh_mutex in iscsi_conn_teardown() before setting
>   session->state = ISCSI_STATE_TERMINATE.
>
> Signed-off-by: John Soni Jose <sony.john@aavagotech.com>
> ---
>   drivers/scsi/libiscsi.c | 25 ++-----------------------
>   1 file changed, 2 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
> index 8053f24..98d9bb6 100644
> --- a/drivers/scsi/libiscsi.c
> +++ b/drivers/scsi/libiscsi.c
> @@ -2941,10 +2941,10 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
>   {
>   	struct iscsi_conn *conn = cls_conn->dd_data;
>   	struct iscsi_session *session = conn->session;
> -	unsigned long flags;
>
>   	del_timer_sync(&conn->transport_timer);
>
> +	mutex_lock(&session->eh_mutex);
>   	spin_lock_bh(&session->frwd_lock);
>   	conn->c_stage = ISCSI_CONN_CLEANUP_WAIT;
>   	if (session->leadconn == conn) {
> @@ -2956,28 +2956,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
>   	}
>   	spin_unlock_bh(&session->frwd_lock);
>
> -	/*
> -	 * Block until all in-progress commands for this connection
> -	 * time out or fail.
> -	 */
> -	for (;;) {
> -		spin_lock_irqsave(session->host->host_lock, flags);
> -		if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */
> -			spin_unlock_irqrestore(session->host->host_lock, flags);
> -			break;
> -		}
> -		spin_unlock_irqrestore(session->host->host_lock, flags);
> -		msleep_interruptible(500);
> -		iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): "
> -				  "host_busy %d host_failed %d\n",
> -				  atomic_read(&session->host->host_busy),
> -				  session->host->host_failed);
> -		/*
> -		 * force eh_abort() to unblock
> -		 */
> -		wake_up(&conn->ehwait);
> -	}
> -
>   	/* flush queued up work because we free the connection below */
>   	iscsi_suspend_tx(conn);
>
> @@ -2994,6 +2972,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
>   	if (session->leadconn == conn)
>   		session->leadconn = NULL;
>   	spin_unlock_bh(&session->frwd_lock);
> +	mutex_unlock(&session->eh_mutex);
>
>   	iscsi_destroy_conn(cls_conn);
>   }

It looks ok.

Reviewed-by: Mike Christie <michaelc@cs.wisc.edu>

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Leech July 8, 2015, 8:08 p.m. UTC | #2
On Tue, Jun 23, 2015 at 6:11 PM, John Soni Jose <sony.john@avagotech.com> wrote:
>
>  Issue:
>  In case of hw iscsi offload, an host can have N-number of active
>  connections. There can be IO's running on some connections which
>  make host->host_busy always TRUE. Now if logout from a connection
>  is tried then the code gets into an infinite loop as host->host_busy
>  is always TRUE.
>
>  iscsi_conn_teardown(....)
>  {
>    .........
>     /*
>      * Block until all in-progress commands for this connection
>      * time out or fail.
>      */
>      for (;;) {
>       spin_lock_irqsave(session->host->host_lock, flags);
>       if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */
>               spin_unlock_irqrestore(session->host->host_lock, flags);
>               break;
>       }
>      spin_unlock_irqrestore(session->host->host_lock, flags);
>      msleep_interruptible(500);
>      iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): "
>                  "host_busy %d host_failed %d\n",
>                   atomic_read(&session->host->host_busy),
>                   session->host->host_failed);
>
>         ................
>         ...............
>      }
>   }
>   This is not an issue with software-iscsi/iser as each cxn is a separate
>   host.
>
>  Fix:
>  Acquiring eh_mutex in iscsi_conn_teardown() before setting
>  session->state = ISCSI_STATE_TERMINATE.
>
> Signed-off-by: John Soni Jose <sony.john@aavagotech.com>
> ---
>  drivers/scsi/libiscsi.c | 25 ++-----------------------
>  1 file changed, 2 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
> index 8053f24..98d9bb6 100644
> --- a/drivers/scsi/libiscsi.c
> +++ b/drivers/scsi/libiscsi.c
> @@ -2941,10 +2941,10 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
>  {
>         struct iscsi_conn *conn = cls_conn->dd_data;
>         struct iscsi_session *session = conn->session;
> -       unsigned long flags;
>
>         del_timer_sync(&conn->transport_timer);
>
> +       mutex_lock(&session->eh_mutex);
>         spin_lock_bh(&session->frwd_lock);
>         conn->c_stage = ISCSI_CONN_CLEANUP_WAIT;
>         if (session->leadconn == conn) {
> @@ -2956,28 +2956,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
>         }
>         spin_unlock_bh(&session->frwd_lock);
>
> -       /*
> -        * Block until all in-progress commands for this connection
> -        * time out or fail.
> -        */
> -       for (;;) {
> -               spin_lock_irqsave(session->host->host_lock, flags);
> -               if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */
> -                       spin_unlock_irqrestore(session->host->host_lock, flags);
> -                       break;
> -               }
> -               spin_unlock_irqrestore(session->host->host_lock, flags);
> -               msleep_interruptible(500);
> -               iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): "
> -                                 "host_busy %d host_failed %d\n",
> -                                 atomic_read(&session->host->host_busy),
> -                                 session->host->host_failed);
> -               /*
> -                * force eh_abort() to unblock
> -                */
> -               wake_up(&conn->ehwait);
> -       }
> -
>         /* flush queued up work because we free the connection below */
>         iscsi_suspend_tx(conn);
>
> @@ -2994,6 +2972,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
>         if (session->leadconn == conn)
>                 session->leadconn = NULL;
>         spin_unlock_bh(&session->frwd_lock);
> +       mutex_unlock(&session->eh_mutex);
>
>         iscsi_destroy_conn(cls_conn);
>  }
> --


Looks good to me, solid reasoning on why host_busy is the wrong thing to check.

Reviewed-by: Chris Leech <cleech@redhat.com>
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Leech Aug. 12, 2015, 5:10 p.m. UTC | #3
Bump.

We've done some testing, and this does prevent a deadlock that can be
triggered with a session logout while other sessions on the same HBA
are under high IO load.  I'd be nice to see this merged.

- Chris

On Tue, Jun 23, 2015 at 6:11 PM, John Soni Jose <sony.john@avagotech.com> wrote:
>  Issue:
>  In case of hw iscsi offload, an host can have N-number of active
>  connections. There can be IO's running on some connections which
>  make host->host_busy always TRUE. Now if logout from a connection
>  is tried then the code gets into an infinite loop as host->host_busy
>  is always TRUE.
>
>  iscsi_conn_teardown(....)
>  {
>    .........
>     /*
>      * Block until all in-progress commands for this connection
>      * time out or fail.
>      */
>      for (;;) {
>       spin_lock_irqsave(session->host->host_lock, flags);
>       if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */
>               spin_unlock_irqrestore(session->host->host_lock, flags);
>               break;
>       }
>      spin_unlock_irqrestore(session->host->host_lock, flags);
>      msleep_interruptible(500);
>      iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): "
>                  "host_busy %d host_failed %d\n",
>                   atomic_read(&session->host->host_busy),
>                   session->host->host_failed);
>
>         ................
>         ...............
>      }
>   }
>   This is not an issue with software-iscsi/iser as each cxn is a separate
>   host.
>
>  Fix:
>  Acquiring eh_mutex in iscsi_conn_teardown() before setting
>  session->state = ISCSI_STATE_TERMINATE.
>
> Signed-off-by: John Soni Jose <sony.john@aavagotech.com>
> ---
>  drivers/scsi/libiscsi.c | 25 ++-----------------------
>  1 file changed, 2 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
> index 8053f24..98d9bb6 100644
> --- a/drivers/scsi/libiscsi.c
> +++ b/drivers/scsi/libiscsi.c
> @@ -2941,10 +2941,10 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
>  {
>         struct iscsi_conn *conn = cls_conn->dd_data;
>         struct iscsi_session *session = conn->session;
> -       unsigned long flags;
>
>         del_timer_sync(&conn->transport_timer);
>
> +       mutex_lock(&session->eh_mutex);
>         spin_lock_bh(&session->frwd_lock);
>         conn->c_stage = ISCSI_CONN_CLEANUP_WAIT;
>         if (session->leadconn == conn) {
> @@ -2956,28 +2956,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
>         }
>         spin_unlock_bh(&session->frwd_lock);
>
> -       /*
> -        * Block until all in-progress commands for this connection
> -        * time out or fail.
> -        */
> -       for (;;) {
> -               spin_lock_irqsave(session->host->host_lock, flags);
> -               if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */
> -                       spin_unlock_irqrestore(session->host->host_lock, flags);
> -                       break;
> -               }
> -               spin_unlock_irqrestore(session->host->host_lock, flags);
> -               msleep_interruptible(500);
> -               iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): "
> -                                 "host_busy %d host_failed %d\n",
> -                                 atomic_read(&session->host->host_busy),
> -                                 session->host->host_failed);
> -               /*
> -                * force eh_abort() to unblock
> -                */
> -               wake_up(&conn->ehwait);
> -       }
> -
>         /* flush queued up work because we free the connection below */
>         iscsi_suspend_tx(conn);
>
> @@ -2994,6 +2972,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
>         if (session->leadconn == conn)
>                 session->leadconn = NULL;
>         spin_unlock_bh(&session->frwd_lock);
> +       mutex_unlock(&session->eh_mutex);
>
>         iscsi_destroy_conn(cls_conn);
>  }
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
James Bottomley Aug. 12, 2015, 5:14 p.m. UTC | #4
On Wed, 2015-08-12 at 10:10 -0700, Chris Leech wrote:
> Bump.
> 
> We've done some testing, and this does prevent a deadlock that can be
> triggered with a session logout while other sessions on the same HBA
> are under high IO load.  I'd be nice to see this merged.

so now it needs a tag for stable?

James

> - Chris
> 
> On Tue, Jun 23, 2015 at 6:11 PM, John Soni Jose <sony.john@avagotech.com> wrote:
> >  Issue:
> >  In case of hw iscsi offload, an host can have N-number of active
> >  connections. There can be IO's running on some connections which
> >  make host->host_busy always TRUE. Now if logout from a connection
> >  is tried then the code gets into an infinite loop as host->host_busy
> >  is always TRUE.
> >
> >  iscsi_conn_teardown(....)
> >  {
> >    .........
> >     /*
> >      * Block until all in-progress commands for this connection
> >      * time out or fail.
> >      */
> >      for (;;) {
> >       spin_lock_irqsave(session->host->host_lock, flags);
> >       if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */
> >               spin_unlock_irqrestore(session->host->host_lock, flags);
> >               break;
> >       }
> >      spin_unlock_irqrestore(session->host->host_lock, flags);
> >      msleep_interruptible(500);
> >      iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): "
> >                  "host_busy %d host_failed %d\n",
> >                   atomic_read(&session->host->host_busy),
> >                   session->host->host_failed);
> >
> >         ................
> >         ...............
> >      }
> >   }
> >   This is not an issue with software-iscsi/iser as each cxn is a separate
> >   host.
> >
> >  Fix:
> >  Acquiring eh_mutex in iscsi_conn_teardown() before setting
> >  session->state = ISCSI_STATE_TERMINATE.
> >
> > Signed-off-by: John Soni Jose <sony.john@aavagotech.com>
> > ---
> >  drivers/scsi/libiscsi.c | 25 ++-----------------------
> >  1 file changed, 2 insertions(+), 23 deletions(-)
> >
> > diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
> > index 8053f24..98d9bb6 100644
> > --- a/drivers/scsi/libiscsi.c
> > +++ b/drivers/scsi/libiscsi.c
> > @@ -2941,10 +2941,10 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
> >  {
> >         struct iscsi_conn *conn = cls_conn->dd_data;
> >         struct iscsi_session *session = conn->session;
> > -       unsigned long flags;
> >
> >         del_timer_sync(&conn->transport_timer);
> >
> > +       mutex_lock(&session->eh_mutex);
> >         spin_lock_bh(&session->frwd_lock);
> >         conn->c_stage = ISCSI_CONN_CLEANUP_WAIT;
> >         if (session->leadconn == conn) {
> > @@ -2956,28 +2956,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
> >         }
> >         spin_unlock_bh(&session->frwd_lock);
> >
> > -       /*
> > -        * Block until all in-progress commands for this connection
> > -        * time out or fail.
> > -        */
> > -       for (;;) {
> > -               spin_lock_irqsave(session->host->host_lock, flags);
> > -               if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */
> > -                       spin_unlock_irqrestore(session->host->host_lock, flags);
> > -                       break;
> > -               }
> > -               spin_unlock_irqrestore(session->host->host_lock, flags);
> > -               msleep_interruptible(500);
> > -               iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): "
> > -                                 "host_busy %d host_failed %d\n",
> > -                                 atomic_read(&session->host->host_busy),
> > -                                 session->host->host_failed);
> > -               /*
> > -                * force eh_abort() to unblock
> > -                */
> > -               wake_up(&conn->ehwait);
> > -       }
> > -
> >         /* flush queued up work because we free the connection below */
> >         iscsi_suspend_tx(conn);
> >
> > @@ -2994,6 +2972,7 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
> >         if (session->leadconn == conn)
> >                 session->leadconn = NULL;
> >         spin_unlock_bh(&session->frwd_lock);
> > +       mutex_unlock(&session->eh_mutex);
> >
> >         iscsi_destroy_conn(cls_conn);
> >  }
> > --
> > 1.8.3.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 



--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 8053f24..98d9bb6 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2941,10 +2941,10 @@  void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 {
 	struct iscsi_conn *conn = cls_conn->dd_data;
 	struct iscsi_session *session = conn->session;
-	unsigned long flags;
 
 	del_timer_sync(&conn->transport_timer);
 
+	mutex_lock(&session->eh_mutex);
 	spin_lock_bh(&session->frwd_lock);
 	conn->c_stage = ISCSI_CONN_CLEANUP_WAIT;
 	if (session->leadconn == conn) {
@@ -2956,28 +2956,6 @@  void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	}
 	spin_unlock_bh(&session->frwd_lock);
 
-	/*
-	 * Block until all in-progress commands for this connection
-	 * time out or fail.
-	 */
-	for (;;) {
-		spin_lock_irqsave(session->host->host_lock, flags);
-		if (!atomic_read(&session->host->host_busy)) { /* OK for ERL == 0 */
-			spin_unlock_irqrestore(session->host->host_lock, flags);
-			break;
-		}
-		spin_unlock_irqrestore(session->host->host_lock, flags);
-		msleep_interruptible(500);
-		iscsi_conn_printk(KERN_INFO, conn, "iscsi conn_destroy(): "
-				  "host_busy %d host_failed %d\n",
-				  atomic_read(&session->host->host_busy),
-				  session->host->host_failed);
-		/*
-		 * force eh_abort() to unblock
-		 */
-		wake_up(&conn->ehwait);
-	}
-
 	/* flush queued up work because we free the connection below */
 	iscsi_suspend_tx(conn);
 
@@ -2994,6 +2972,7 @@  void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	if (session->leadconn == conn)
 		session->leadconn = NULL;
 	spin_unlock_bh(&session->frwd_lock);
+	mutex_unlock(&session->eh_mutex);
 
 	iscsi_destroy_conn(cls_conn);
 }