[v2] Avoid that ATA error handling can trigger a kernel hang or oops
diff mbox

Message ID 20180222193020.30276-1-bart.vanassche@wdc.com
State Accepted
Headers show

Commit Message

Bart Van Assche Feb. 22, 2018, 7:30 p.m. UTC
Avoid that the recently introduced call_rcu() call in the SCSI core
triggers a double call_rcu() call.

Reported-by: Natanael Copa <ncopa@alpinelinux.org>
Reported-by: Damien Le Moal <damien.lemoal@wdc.com>
References: https://bugzilla.kernel.org/show_bug.cgi?id=198861
Fixes: 3bd6f43f5cb3 ("scsi: core: Ensure that the SCSI error handler gets woken up")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Natanael Copa <ncopa@alpinelinux.org>
Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Alexandre Oliva <oliva@gnu.org>
Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: <stable@vger.kernel.org>
---
 drivers/scsi/hosts.c      | 3 ---
 drivers/scsi/scsi_error.c | 5 +++--
 drivers/scsi/scsi_lib.c   | 2 ++
 include/scsi/scsi_cmnd.h  | 3 +++
 include/scsi/scsi_host.h  | 2 --
 5 files changed, 8 insertions(+), 7 deletions(-)

Comments

Natanael Copa Feb. 23, 2018, 3:46 p.m. UTC | #1
On Thu, 22 Feb 2018 11:30:20 -0800
Bart Van Assche <bart.vanassche@wdc.com> wrote:

> Avoid that the recently introduced call_rcu() call in the SCSI core
> triggers a double call_rcu() call.

This patch also prevents my machine from hanging. However, the ATA
error messages that I previously have had are gone, so I don't know if
it is because the underlying problem is gone, or if the error messages
are simply not reported.

-nc

> 
> Reported-by: Natanael Copa <ncopa@alpinelinux.org>
> Reported-by: Damien Le Moal <damien.lemoal@wdc.com>
> References: https://bugzilla.kernel.org/show_bug.cgi?id=198861
> Fixes: 3bd6f43f5cb3 ("scsi: core: Ensure that the SCSI error handler gets woken up")
> Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
> Cc: Natanael Copa <ncopa@alpinelinux.org>
> Cc: Damien Le Moal <damien.lemoal@wdc.com>
> Cc: Alexandre Oliva <oliva@gnu.org>
> Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
> Cc: Hannes Reinecke <hare@suse.com>
> Cc: Johannes Thumshirn <jthumshirn@suse.de>
> Cc: <stable@vger.kernel.org>
> ---
>  drivers/scsi/hosts.c      | 3 ---
>  drivers/scsi/scsi_error.c | 5 +++--
>  drivers/scsi/scsi_lib.c   | 2 ++
>  include/scsi/scsi_cmnd.h  | 3 +++
>  include/scsi/scsi_host.h  | 2 --
>  5 files changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
> index a0a7e4ff255c..7279d3d2e941 100644
> --- a/drivers/scsi/hosts.c
> +++ b/drivers/scsi/hosts.c
> @@ -334,8 +334,6 @@ static void scsi_host_dev_release(struct device *dev)
>  	if (shost->work_q)
>  		destroy_workqueue(shost->work_q);
>  
> -	destroy_rcu_head(&shost->rcu);
> -
>  	if (shost->shost_state == SHOST_CREATED) {
>  		/*
>  		 * Free the shost_dev device name here if scsi_host_alloc()
> @@ -404,7 +402,6 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
>  	INIT_LIST_HEAD(&shost->starved_list);
>  	init_waitqueue_head(&shost->host_wait);
>  	mutex_init(&shost->scan_mutex);
> -	init_rcu_head(&shost->rcu);
>  
>  	index = ida_simple_get(&host_index_ida, 0, 0, GFP_KERNEL);
>  	if (index < 0)
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 96f988a7efda..9b0242f84407 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -229,7 +229,8 @@ static void scsi_eh_reset(struct scsi_cmnd *scmd)
>  
>  static void scsi_eh_inc_host_failed(struct rcu_head *head)
>  {
> -	struct Scsi_Host *shost = container_of(head, typeof(*shost), rcu);
> +	struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu);
> +	struct Scsi_Host *shost = scmd->device->host;
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(shost->host_lock, flags);
> @@ -265,7 +266,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
>  	 * Ensure that all tasks observe the host state change before the
>  	 * host_failed change.
>  	 */
> -	call_rcu(&shost->rcu, scsi_eh_inc_host_failed);
> +	call_rcu(&scmd->rcu, scsi_eh_inc_host_failed);
>  }
>  
>  /**
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index aea5a1ae318b..e1ca2160aa40 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -671,6 +671,7 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
>  	if (!blk_rq_is_scsi(req)) {
>  		WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
>  		cmd->flags &= ~SCMD_INITIALIZED;
> +		destroy_rcu_head(&cmd->rcu);
>  	}
>  
>  	if (req->mq_ctx) {
> @@ -1151,6 +1152,7 @@ static void scsi_initialize_rq(struct request *rq)
>  	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
>  
>  	scsi_req_init(&cmd->req);
> +	init_rcu_head(&cmd->rcu);
>  	cmd->jiffies_at_alloc = jiffies;
>  	cmd->retries = 0;
>  }
> diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
> index d8d4a902a88d..2280b2351739 100644
> --- a/include/scsi/scsi_cmnd.h
> +++ b/include/scsi/scsi_cmnd.h
> @@ -68,6 +68,9 @@ struct scsi_cmnd {
>  	struct list_head list;  /* scsi_cmnd participates in queue lists */
>  	struct list_head eh_entry; /* entry for the host eh_cmd_q */
>  	struct delayed_work abort_work;
> +
> +	struct rcu_head rcu;
> +
>  	int eh_eflags;		/* Used by error handlr */
>  
>  	/*
> diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
> index 19317585ae48..7aedb6776b4f 100644
> --- a/include/scsi/scsi_host.h
> +++ b/include/scsi/scsi_host.h
> @@ -577,8 +577,6 @@ struct Scsi_Host {
>  		struct blk_mq_tag_set	tag_set;
>  	};
>  
> -	struct rcu_head rcu;
> -
>  	atomic_t host_busy;		   /* commands actually active on low-level */
>  	atomic_t host_blocked;
>
Salvatore Bonaccorso Feb. 26, 2018, 11:52 a.m. UTC | #2
Hi,

On Fri, Feb 23, 2018 at 04:46:30PM +0100, Natanael Copa wrote:
> On Thu, 22 Feb 2018 11:30:20 -0800
> Bart Van Assche <bart.vanassche@wdc.com> wrote:
> 
> > Avoid that the recently introduced call_rcu() call in the SCSI core
> > triggers a double call_rcu() call.
> 
> This patch also prevents my machine from hanging. However, the ATA
> error messages that I previously have had are gone, so I don't know if
> it is because the underlying problem is gone, or if the error messages
> are simply not reported.

For reference, the patch has as well been confirmed to solve the
problem for Jean-Francois Pirus in https://bugs.debian.org/891467
(on top of 4.15.4)

Regards,
Salvatore
Bart Van Assche Feb. 27, 2018, 6:53 p.m. UTC | #3
On Thu, 2018-02-22 at 11:30 -0800, Bart Van Assche wrote:
> Avoid that the recently introduced call_rcu() call in the SCSI core

> triggers a double call_rcu() call.

> [ ... ]


Can anyone review this patch? Multiple users have confirmed independently
that this patch fixes the double call_rcu() issue for them.

Thanks,

Bart.
Damien Le Moal Feb. 27, 2018, 9:11 p.m. UTC | #4
On 2018/02/27 10:53, Bart Van Assche wrote:
> On Thu, 2018-02-22 at 11:30 -0800, Bart Van Assche wrote:

>> Avoid that the recently introduced call_rcu() call in the SCSI core

>> triggers a double call_rcu() call.

>> [ ... ]

> 

> Can anyone review this patch? Multiple users have confirmed independently

> that this patch fixes the double call_rcu() issue for them.

> 

> Thanks,

> 

> Bart.


Please feel free to add:

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>

Tested-by: Damien Le Moal <damien.lemoal@wdc.com>



-- 
Damien Le Moal
Western Digital Research
Martin K. Petersen Feb. 28, 2018, 2:14 a.m. UTC | #5
Bart,

> Avoid that the recently introduced call_rcu() call in the SCSI core
> triggers a double call_rcu() call.

Applied to 4.16/scsi-fixes. Thank you!

Patch
diff mbox

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index a0a7e4ff255c..7279d3d2e941 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -334,8 +334,6 @@  static void scsi_host_dev_release(struct device *dev)
 	if (shost->work_q)
 		destroy_workqueue(shost->work_q);
 
-	destroy_rcu_head(&shost->rcu);
-
 	if (shost->shost_state == SHOST_CREATED) {
 		/*
 		 * Free the shost_dev device name here if scsi_host_alloc()
@@ -404,7 +402,6 @@  struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 	INIT_LIST_HEAD(&shost->starved_list);
 	init_waitqueue_head(&shost->host_wait);
 	mutex_init(&shost->scan_mutex);
-	init_rcu_head(&shost->rcu);
 
 	index = ida_simple_get(&host_index_ida, 0, 0, GFP_KERNEL);
 	if (index < 0)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 96f988a7efda..9b0242f84407 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -229,7 +229,8 @@  static void scsi_eh_reset(struct scsi_cmnd *scmd)
 
 static void scsi_eh_inc_host_failed(struct rcu_head *head)
 {
-	struct Scsi_Host *shost = container_of(head, typeof(*shost), rcu);
+	struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu);
+	struct Scsi_Host *shost = scmd->device->host;
 	unsigned long flags;
 
 	spin_lock_irqsave(shost->host_lock, flags);
@@ -265,7 +266,7 @@  void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
 	 * Ensure that all tasks observe the host state change before the
 	 * host_failed change.
 	 */
-	call_rcu(&shost->rcu, scsi_eh_inc_host_failed);
+	call_rcu(&scmd->rcu, scsi_eh_inc_host_failed);
 }
 
 /**
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index aea5a1ae318b..e1ca2160aa40 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -671,6 +671,7 @@  static bool scsi_end_request(struct request *req, blk_status_t error,
 	if (!blk_rq_is_scsi(req)) {
 		WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
 		cmd->flags &= ~SCMD_INITIALIZED;
+		destroy_rcu_head(&cmd->rcu);
 	}
 
 	if (req->mq_ctx) {
@@ -1151,6 +1152,7 @@  static void scsi_initialize_rq(struct request *rq)
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
 
 	scsi_req_init(&cmd->req);
+	init_rcu_head(&cmd->rcu);
 	cmd->jiffies_at_alloc = jiffies;
 	cmd->retries = 0;
 }
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index d8d4a902a88d..2280b2351739 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -68,6 +68,9 @@  struct scsi_cmnd {
 	struct list_head list;  /* scsi_cmnd participates in queue lists */
 	struct list_head eh_entry; /* entry for the host eh_cmd_q */
 	struct delayed_work abort_work;
+
+	struct rcu_head rcu;
+
 	int eh_eflags;		/* Used by error handlr */
 
 	/*
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 19317585ae48..7aedb6776b4f 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -577,8 +577,6 @@  struct Scsi_Host {
 		struct blk_mq_tag_set	tag_set;
 	};
 
-	struct rcu_head rcu;
-
 	atomic_t host_busy;		   /* commands actually active on low-level */
 	atomic_t host_blocked;