[2/3] scsi: Do not rely on blk-mq for double completions

Message ID	20181114162601.11477-2-keith.busch@intel.com (mailing list archive)
State	Superseded
Headers	show Return-Path: <linux-scsi-owner@kernel.org> From: Keith Busch <keith.busch@intel.com> To: linux-scsi@vger.kernel.org, linux-block@vger.kernel.org Cc: Jens Axboe <axboe@kernel.dk>, Keith Busch <keith.busch@intel.com> Subject: [PATCH 2/3] scsi: Do not rely on blk-mq for double completions Date: Wed, 14 Nov 2018 09:26:00 -0700 Message-Id: <20181114162601.11477-2-keith.busch@intel.com> In-Reply-To: <20181114162601.11477-1-keith.busch@intel.com> References: <20181114162601.11477-1-keith.busch@intel.com> Sender: linux-scsi-owner@vger.kernel.org Precedence: bulk
Series	[1/3] blk-mq: Return true if request was completed \| expand [1/3] blk-mq: Return true if request was completed [2/3] scsi: Do not rely on blk-mq for double completions [3/3] blk-mq: Simplify request completion state

Message ID

20181114162601.11477-2-keith.busch@intel.com (mailing list archive)

State

Superseded

Headers

From: Keith Busch <keith.busch@intel.com>
To: linux-scsi@vger.kernel.org, linux-block@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>, Keith Busch <keith.busch@intel.com>
Subject: [PATCH 2/3] scsi: Do not rely on blk-mq for double completions
Date: Wed, 14 Nov 2018 09:26:00 -0700
Message-Id: <20181114162601.11477-2-keith.busch@intel.com>
In-Reply-To: <20181114162601.11477-1-keith.busch@intel.com>
References: <20181114162601.11477-1-keith.busch@intel.com>
Sender: linux-scsi-owner@vger.kernel.org
Precedence: bulk

Series

[1/3] blk-mq: Return true if request was completed | expand

Commit Message

Keith Busch Nov. 14, 2018, 4:26 p.m. UTC

The scsi timeout error handling had been directly updating the request
state to prevent a natural completion and error handling from completing
the same request twice. Fix this layering violation by having scsi
control the fate of its commands with scsi owned flags rather than
use blk-mq's.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 drivers/scsi/scsi_error.c | 17 +++--------------
 drivers/scsi/scsi_lib.c   |  6 +++++-
 include/scsi/scsi_cmnd.h  |  5 ++++-
 3 files changed, 12 insertions(+), 16 deletions(-)

Comments

Bart Van Assche Nov. 14, 2018, 5:51 p.m. UTC | #1

On Wed, 2018-11-14 at 09:26 -0700, Keith Busch wrote:
> The scsi timeout error handling had been directly updating the request
> state to prevent a natural completion and error handling from completing
> the same request twice. Fix this layering violation by having scsi
> control the fate of its commands with scsi owned flags rather than
> use blk-mq's.
> 
> Signed-off-by: Keith Busch <keith.busch@intel.com>
> ---
>  drivers/scsi/scsi_error.c | 17 +++--------------
>  drivers/scsi/scsi_lib.c   |  6 +++++-
>  include/scsi/scsi_cmnd.h  |  5 ++++-
>  3 files changed, 12 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index c736d61b1648..f89e829a1c51 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -199,6 +199,9 @@ scsi_abort_command(struct scsi_cmnd *scmd)
>  		return FAILED;
>  	}
>  
> +	if (test_and_set_bit(__SCMD_COMPLETE, &scmd->flags))
> +		return SUCCESS;
> +
>  	spin_lock_irqsave(shost->host_lock, flags);
>  	if (shost->eh_deadline != -1 && !shost->last_reset)
>  		shost->last_reset = jiffies;
> @@ -296,20 +299,6 @@ enum blk_eh_timer_return scsi_times_out(struct request
> *req)
>  		rtn = host->hostt->eh_timed_out(scmd);
>  
>  	if (rtn == BLK_EH_DONE) {
> -		/*
> -		 * For blk-mq, we must set the request state to complete
> now
> -		 * before sending the request to the scsi error handler.
> This
> -		 * will prevent a use-after-free in the event the LLD
> manages
> -		 * to complete the request before the error handler
> finishes
> -		 * processing this timed out request.
> -		 *
> -		 * If the request was already completed, then the LLD beat
> the
> -		 * time out handler from transferring the request to the
> scsi
> -		 * error handler. In that case we can return immediately
> as no
> -		 * further action is required.
> -		 */
> -		if (req->q->mq_ops && !blk_mq_mark_complete(req))
> -			return rtn;
>  		if (scsi_abort_command(scmd) != SUCCESS) {
>  			set_host_byte(scmd, DID_TIME_OUT);
>  			scsi_eh_scmd_add(scmd);
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index c7fccbb8f554..1e74137f1073 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -2044,8 +2044,11 @@ static int scsi_mq_prep_fn(struct request *req)
>  
>  static void scsi_mq_done(struct scsi_cmnd *cmd)
>  {
> +	if (test_and_set_bit(__SCMD_COMPLETE, &cmd->flags))
> +		return;
>  	trace_scsi_dispatch_cmd_done(cmd);
> -	blk_mq_complete_request(cmd->request);
> +	if (unlikely(!blk_mq_complete_request(cmd->request)))
> +		clear_bit(__SCMD_COMPLETE, &cmd->flags);
>  }
>  
>  static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx)
> @@ -2104,6 +2107,7 @@ static blk_status_t scsi_queue_rq(struct
> blk_mq_hw_ctx *hctx,
>  			goto out_dec_host_busy;
>  		req->rq_flags |= RQF_DONTPREP;
>  	} else {
> +		cmd->flags &= ~SCMD_COMPLETE;
>  		blk_mq_start_request(req);
>  	}


Hi Keith,

Please Cc Martin Petersen and the scsi mailing list for SCSI patches.

Regarding this patch: I think this patch introduces a subtle but severe bug
in the SCSI core, namely that if an abort is processed concurrently with
request completion with "fake timeout" enabled that the abort is ignored.

Bart.

Keith Busch Nov. 14, 2018, 6 p.m. UTC | #2

On Wed, Nov 14, 2018 at 09:51:36AM -0800, Bart Van Assche wrote:
> Regarding this patch: I think this patch introduces a subtle but severe bug
> in the SCSI core, namely that if an abort is processed concurrently with
> request completion with "fake timeout" enabled that the abort is ignored.

That requires the following occur concurrently:

  1. A real completion
  2. A real timeout
  3. A fake timeout

That can't happen on a production kernel, and highly improbable
on the fake one. We can still fix it by having scsi timeout return
BLK_EH_RESET_TIMER in this case. I didn't like adding code just to work
around error injection, but there isn't a good alternative at the moment.

Keith Busch Nov. 14, 2018, 6:10 p.m. UTC | #3

On Wed, Nov 14, 2018 at 11:00:18AM -0700, Keith Busch wrote:
> On Wed, Nov 14, 2018 at 09:51:36AM -0800, Bart Van Assche wrote:
> > Regarding this patch: I think this patch introduces a subtle but severe bug
> > in the SCSI core, namely that if an abort is processed concurrently with
> > request completion with "fake timeout" enabled that the abort is ignored.
> 
> That requires the following occur concurrently:
> 
>   1. A real completion
>   2. A real timeout
>   3. A fake timeout
> 
> That can't happen on a production kernel, and highly improbable
> on the fake one. We can still fix it by having scsi timeout return
> BLK_EH_RESET_TIMER in this case. I didn't like adding code just to work
> around error injection, but there isn't a good alternative at the moment.

So do this instead:

--8<---
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ff372b335ced..d343024e732a 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -199,9 +199,6 @@ scsi_abort_command(struct scsi_cmnd *scmd)
 		return FAILED;
 	}
 
-	if (test_and_set_bit(__SCMD_COMPLETE, &scmd->flags))
-		return SUCCESS;
-
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (shost->eh_deadline != -1 && !shost->last_reset)
 		shost->last_reset = jiffies;
@@ -299,6 +296,8 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
 		rtn = host->hostt->eh_timed_out(scmd);
 
 	if (rtn == BLK_EH_DONE) {
+		if (test_and_set_bit(__SCMD_COMPLETE, &scmd->flags))
+			return BLK_EH_RESET_TIMER;
 		if (scsi_abort_command(scmd) != SUCCESS) {
 			set_host_byte(scmd, DID_TIME_OUT);
 			scsi_eh_scmd_add(scmd);
-->8---

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index c736d61b1648..f89e829a1c51 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -199,6 +199,9 @@  scsi_abort_command(struct scsi_cmnd *scmd)
 		return FAILED;
 	}
 
+	if (test_and_set_bit(__SCMD_COMPLETE, &scmd->flags))
+		return SUCCESS;
+
 	spin_lock_irqsave(shost->host_lock, flags);
 	if (shost->eh_deadline != -1 && !shost->last_reset)
 		shost->last_reset = jiffies;
@@ -296,20 +299,6 @@  enum blk_eh_timer_return scsi_times_out(struct request *req)
 		rtn = host->hostt->eh_timed_out(scmd);
 
 	if (rtn == BLK_EH_DONE) {
-		/*
-		 * For blk-mq, we must set the request state to complete now
-		 * before sending the request to the scsi error handler. This
-		 * will prevent a use-after-free in the event the LLD manages
-		 * to complete the request before the error handler finishes
-		 * processing this timed out request.
-		 *
-		 * If the request was already completed, then the LLD beat the
-		 * time out handler from transferring the request to the scsi
-		 * error handler. In that case we can return immediately as no
-		 * further action is required.
-		 */
-		if (req->q->mq_ops && !blk_mq_mark_complete(req))
-			return rtn;
 		if (scsi_abort_command(scmd) != SUCCESS) {
 			set_host_byte(scmd, DID_TIME_OUT);
 			scsi_eh_scmd_add(scmd);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c7fccbb8f554..1e74137f1073 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2044,8 +2044,11 @@  static int scsi_mq_prep_fn(struct request *req)
 
 static void scsi_mq_done(struct scsi_cmnd *cmd)
 {
+	if (test_and_set_bit(__SCMD_COMPLETE, &cmd->flags))
+		return;
 	trace_scsi_dispatch_cmd_done(cmd);
-	blk_mq_complete_request(cmd->request);
+	if (unlikely(!blk_mq_complete_request(cmd->request)))
+		clear_bit(__SCMD_COMPLETE, &cmd->flags);
 }
 
 static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx)
@@ -2104,6 +2107,7 @@  static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 			goto out_dec_host_busy;
 		req->rq_flags |= RQF_DONTPREP;
 	} else {
+		cmd->flags &= ~SCMD_COMPLETE;
 		blk_mq_start_request(req);
 	}
 
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index c891ada3c5c2..acef13c628d3 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -58,6 +58,9 @@  struct scsi_pointer {
 #define SCMD_TAGGED		(1 << 0)
 #define SCMD_UNCHECKED_ISA_DMA	(1 << 1)
 #define SCMD_INITIALIZED	(1 << 2)
+
+#define __SCMD_COMPLETE		3
+#define SCMD_COMPLETE		(1 << __SCMD_COMPLETE)
 /* flags preserved across unprep / reprep */
 #define SCMD_PRESERVED_FLAGS	(SCMD_UNCHECKED_ISA_DMA | SCMD_INITIALIZED)
 
@@ -144,7 +147,7 @@  struct scsi_cmnd {
 					 * to be at an address < 16Mb). */
 
 	int result;		/* Status code from lower level driver */
-	int flags;		/* Command flags */
+	unsigned long flags;	/* Command flags */
 
 	unsigned char tag;	/* SCSI-II queued command tag */
 };

[2/3] scsi: Do not rely on blk-mq for double completions

Commit Message

Comments

Patch