diff mbox series

scsi: sd_zbc: update write pointer offset cache

Message ID 3cfebe48d09db73041b7849be71ffbcec7ee40b3.1615369586.git.johannes.thumshirn@wdc.com (mailing list archive)
State Accepted
Headers show
Series scsi: sd_zbc: update write pointer offset cache | expand

Commit Message

Johannes Thumshirn March 10, 2021, 9:48 a.m. UTC
Recent changes changed the completion of SCSI commands from Soft-IRQ
context to IRQ context. This triggers the following warning, when we're
completing writes to zoned block devices that go through the zone append
emulation:

 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2
 Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015
 RIP: 0010:__local_bh_disable_ip+0x3f/0x50
 RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006
 RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013
 RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd
 RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0
 R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200
 R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218
 FS:  0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0
 Call Trace:
  <IRQ>
  _raw_spin_lock_bh+0x18/0x40
  sd_zbc_complete+0x43d/0x1150
  sd_done+0x631/0x1040
  ? mark_lock+0xe4/0x2fd0
  ? provisioning_mode_store+0x3f0/0x3f0
  scsi_finish_command+0x31b/0x5c0
  _scsih_io_done+0x960/0x29e0 [mpt3sas]
  ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas]
  ? __lock_acquire+0x166b/0x58b0
  ? _get_st_from_smid+0x4a/0x80 [mpt3sas]
  _base_process_reply_queue+0x23f/0x26e0 [mpt3sas]
  ? lock_is_held_type+0x98/0x110
  ? find_held_lock+0x2c/0x110
  ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas]
  _base_interrupt+0x8d/0xd0 [mpt3sas]
  ? rcu_read_lock_sched_held+0x3f/0x70
  __handle_irq_event_percpu+0x24d/0x600
  handle_irq_event+0xef/0x240
  ? handle_irq_event_percpu+0x110/0x110
  handle_edge_irq+0x1f6/0xb60
  __common_interrupt+0x75/0x160
  common_interrupt+0x7b/0xa0
  </IRQ>
  asm_common_interrupt+0x1e/0x40

Don't use spin_lock_bh() to protect the update of the write pointer offset
cache, but use spin_lock_irqsave() for it.

Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 drivers/scsi/sd_zbc.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

Comments

Damien Le Moal March 10, 2021, 10:45 p.m. UTC | #1
On 2021/03/10 18:48, Johannes Thumshirn wrote:
> Recent changes changed the completion of SCSI commands from Soft-IRQ
> context to IRQ context. This triggers the following warning, when we're
> completing writes to zoned block devices that go through the zone append
> emulation:
> 
>  CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2
>  Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015
>  RIP: 0010:__local_bh_disable_ip+0x3f/0x50
>  RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006
>  RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013
>  RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd
>  RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0
>  R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200
>  R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218
>  FS:  0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000
>  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>  CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0
>  Call Trace:
>   <IRQ>
>   _raw_spin_lock_bh+0x18/0x40
>   sd_zbc_complete+0x43d/0x1150
>   sd_done+0x631/0x1040
>   ? mark_lock+0xe4/0x2fd0
>   ? provisioning_mode_store+0x3f0/0x3f0
>   scsi_finish_command+0x31b/0x5c0
>   _scsih_io_done+0x960/0x29e0 [mpt3sas]
>   ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas]
>   ? __lock_acquire+0x166b/0x58b0
>   ? _get_st_from_smid+0x4a/0x80 [mpt3sas]
>   _base_process_reply_queue+0x23f/0x26e0 [mpt3sas]
>   ? lock_is_held_type+0x98/0x110
>   ? find_held_lock+0x2c/0x110
>   ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas]
>   _base_interrupt+0x8d/0xd0 [mpt3sas]
>   ? rcu_read_lock_sched_held+0x3f/0x70
>   __handle_irq_event_percpu+0x24d/0x600
>   handle_irq_event+0xef/0x240
>   ? handle_irq_event_percpu+0x110/0x110
>   handle_edge_irq+0x1f6/0xb60
>   __common_interrupt+0x75/0x160
>   common_interrupt+0x7b/0xa0
>   </IRQ>
>   asm_common_interrupt+0x1e/0x40
> 
> Don't use spin_lock_bh() to protect the update of the write pointer offset
> cache, but use spin_lock_irqsave() for it.
> 
> Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  drivers/scsi/sd_zbc.c | 19 +++++++++++--------
>  1 file changed, 11 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
> index ee558675eab4..994f1b8e3504 100644
> --- a/drivers/scsi/sd_zbc.c
> +++ b/drivers/scsi/sd_zbc.c
> @@ -280,27 +280,28 @@ static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
>  static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
>  {
>  	struct scsi_disk *sdkp;
> +	unsigned long flags;
>  	unsigned int zno;
>  	int ret;
>  
>  	sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
>  
> -	spin_lock_bh(&sdkp->zones_wp_offset_lock);
> +	spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
>  	for (zno = 0; zno < sdkp->nr_zones; zno++) {
>  		if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
>  			continue;
>  
> -		spin_unlock_bh(&sdkp->zones_wp_offset_lock);
> +		spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
>  		ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
>  					     SD_BUF_SIZE,
>  					     zno * sdkp->zone_blocks, true);
> -		spin_lock_bh(&sdkp->zones_wp_offset_lock);
> +		spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
>  		if (!ret)
>  			sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
>  					    zno, sd_zbc_update_wp_offset_cb,
>  					    sdkp);
>  	}
> -	spin_unlock_bh(&sdkp->zones_wp_offset_lock);
> +	spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
>  
>  	scsi_device_put(sdkp->device);
>  }
> @@ -324,6 +325,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
>  	struct request *rq = cmd->request;
>  	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
>  	unsigned int wp_offset, zno = blk_rq_zone_no(rq);
> +	unsigned long flags;
>  	blk_status_t ret;
>  
>  	ret = sd_zbc_cmnd_checks(cmd);
> @@ -337,7 +339,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
>  	if (!blk_req_zone_write_trylock(rq))
>  		return BLK_STS_ZONE_RESOURCE;
>  
> -	spin_lock_bh(&sdkp->zones_wp_offset_lock);
> +	spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
>  	wp_offset = sdkp->zones_wp_offset[zno];
>  	switch (wp_offset) {
>  	case SD_ZBC_INVALID_WP_OFST:
> @@ -366,7 +368,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
>  
>  		*lba += wp_offset;
>  	}
> -	spin_unlock_bh(&sdkp->zones_wp_offset_lock);
> +	spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
>  	if (ret)
>  		blk_req_zone_write_unlock(rq);
>  	return ret;
> @@ -445,6 +447,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
>  	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
>  	unsigned int zno = blk_rq_zone_no(rq);
>  	enum req_opf op = req_op(rq);
> +	unsigned long flags;
>  
>  	/*
>  	 * If we got an error for a command that needs updating the write
> @@ -452,7 +455,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
>  	 * invalid to force an update from disk the next time a zone append
>  	 * command is issued.
>  	 */
> -	spin_lock_bh(&sdkp->zones_wp_offset_lock);
> +	spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
>  
>  	if (result && op != REQ_OP_ZONE_RESET_ALL) {
>  		if (op == REQ_OP_ZONE_APPEND) {
> @@ -496,7 +499,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
>  	}
>  
>  unlock_wp_offset:
> -	spin_unlock_bh(&sdkp->zones_wp_offset_lock);
> +	spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
>  
>  	return good_bytes;
>  }
> 

Looks good to me.

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Shinichiro Kawasaki March 11, 2021, 3:14 a.m. UTC | #2
On Mar 10, 2021 / 18:48, Johannes Thumshirn wrote:
> Recent changes changed the completion of SCSI commands from Soft-IRQ
> context to IRQ context. This triggers the following warning, when we're
> completing writes to zoned block devices that go through the zone append
> emulation:
> 
>  CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2
>  Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015
>  RIP: 0010:__local_bh_disable_ip+0x3f/0x50
>  RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006
>  RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013
>  RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd
>  RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0
>  R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200
>  R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218
>  FS:  0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000
>  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>  CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0
>  Call Trace:
>   <IRQ>
>   _raw_spin_lock_bh+0x18/0x40
>   sd_zbc_complete+0x43d/0x1150
>   sd_done+0x631/0x1040
>   ? mark_lock+0xe4/0x2fd0
>   ? provisioning_mode_store+0x3f0/0x3f0
>   scsi_finish_command+0x31b/0x5c0
>   _scsih_io_done+0x960/0x29e0 [mpt3sas]
>   ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas]
>   ? __lock_acquire+0x166b/0x58b0
>   ? _get_st_from_smid+0x4a/0x80 [mpt3sas]
>   _base_process_reply_queue+0x23f/0x26e0 [mpt3sas]
>   ? lock_is_held_type+0x98/0x110
>   ? find_held_lock+0x2c/0x110
>   ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas]
>   _base_interrupt+0x8d/0xd0 [mpt3sas]
>   ? rcu_read_lock_sched_held+0x3f/0x70
>   __handle_irq_event_percpu+0x24d/0x600
>   handle_irq_event+0xef/0x240
>   ? handle_irq_event_percpu+0x110/0x110
>   handle_edge_irq+0x1f6/0xb60
>   __common_interrupt+0x75/0x160
>   common_interrupt+0x7b/0xa0
>   </IRQ>
>   asm_common_interrupt+0x1e/0x40
> 
> Don't use spin_lock_bh() to protect the update of the write pointer offset
> cache, but use spin_lock_irqsave() for it.
> 
> Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>

I tested and confirmed that this patch fixes the WARNING. Thanks!

Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Bart Van Assche March 11, 2021, 3:13 p.m. UTC | #3
On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
> Recent changes [ ... ]

Please add Fixes: and/or Cc: stable tags as appropriate.

Thanks,

Bart.
Johannes Thumshirn March 11, 2021, 3:18 p.m. UTC | #4
On 11/03/2021 16:13, Bart Van Assche wrote:
> On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
>> Recent changes [ ... ]
> 
> Please add Fixes: and/or Cc: stable tags as appropriate.

I couldn't pin down the offending commit and I can't reproduce it locally
as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.
Bart Van Assche March 11, 2021, 3:48 p.m. UTC | #5
On 3/11/21 7:18 AM, Johannes Thumshirn wrote:
> On 11/03/2021 16:13, Bart Van Assche wrote:
>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
>>> Recent changes [ ... ]
>>
>> Please add Fixes: and/or Cc: stable tags as appropriate.
> 
> I couldn't pin down the offending commit and I can't reproduce it locally
> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.

That's weird. Did Shinichiro use a HBA? Could this be the result of a
behavior change in the HBA driver?

Thanks,

Bart.
Johannes Thumshirn March 11, 2021, 3:54 p.m. UTC | #6
On 11/03/2021 16:48, Bart Van Assche wrote:
> On 3/11/21 7:18 AM, Johannes Thumshirn wrote:
>> On 11/03/2021 16:13, Bart Van Assche wrote:
>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
>>>> Recent changes [ ... ]
>>>
>>> Please add Fixes: and/or Cc: stable tags as appropriate.
>>
>> I couldn't pin down the offending commit and I can't reproduce it locally
>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.
> 
> That's weird. Did Shinichiro use a HBA? Could this be the result of a
> behavior change in the HBA driver?

Yes I've looked at the commits in mpt3sas, but can't really pinpoint the 
offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared 
host tagset for CPU hotplug") is the only one that /looks/ as if it could
be causing it, but I don't know mpt3sas well enough.

FWIW added Sreekanth
Bart Van Assche March 12, 2021, 2:56 a.m. UTC | #7
On 3/11/21 7:54 AM, Johannes Thumshirn wrote:
> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the 
> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared 
> host tagset for CPU hotplug") is the only one that /looks/ as if it could
> be causing it, but I don't know mpt3sas well enough.
> 
> FWIW added Sreekanth

Hi Johannes,

I'd like to see a full root-cause analysis before proceeding with this
patch because as far as I can see the call chain for
scsi_finish_command() is as follows (entirely in soft-IRQ context):

blk_done_softirq()
  blk_complete_reqs()
    scsi_softirq_done()
      scsi_finish_command()

Thanks,

Bart.
Shinichiro Kawasaki March 12, 2021, 4:38 a.m. UTC | #8
On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote:
> On 11/03/2021 16:48, Bart Van Assche wrote:
> > On 3/11/21 7:18 AM, Johannes Thumshirn wrote:
> >> On 11/03/2021 16:13, Bart Van Assche wrote:
> >>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
> >>>> Recent changes [ ... ]
> >>>
> >>> Please add Fixes: and/or Cc: stable tags as appropriate.
> >>
> >> I couldn't pin down the offending commit and I can't reproduce it locally
> >> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.
> > 
> > That's weird. Did Shinichiro use a HBA? Could this be the result of a
> > behavior change in the HBA driver?
> 
> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the 
> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared 
> host tagset for CPU hotplug") is the only one that /looks/ as if it could
> be causing it, but I don't know mpt3sas well enough.
> 
> FWIW added Sreekanth

The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA
Broadcom 9400. It can be recreated by running blktests block/004 on the drive
(after reboot). It is also recreated with SATA SMR drive with the HBA, but not
observed with SATA drives connected to AHCI.

I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose
it indicates that the commit changed HBA driver behavior.
Johannes Thumshirn March 12, 2021, 5:51 a.m. UTC | #9
On 12/03/2021 05:38, Shinichiro Kawasaki wrote:
> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA
> Broadcom 9400. It can be recreated by running blktests block/004 on the drive
> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not
> observed with SATA drives connected to AHCI.
> 
> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose
> it indicates that the commit changed HBA driver behavior.


Alright then it's:
Fixes: 664f0dce2058 ("scsi: mpt3sas: Add support for shared host tagset for CPU hotplug")

Martin, can you pick it up for the fixes queue?
Damien Le Moal March 12, 2021, 7:27 a.m. UTC | #10
On 2021/03/12 13:38, Shinichiro Kawasaki wrote:
> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote:
>> On 11/03/2021 16:48, Bart Van Assche wrote:
>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote:
>>>> On 11/03/2021 16:13, Bart Van Assche wrote:
>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
>>>>>> Recent changes [ ... ]
>>>>>
>>>>> Please add Fixes: and/or Cc: stable tags as appropriate.
>>>>
>>>> I couldn't pin down the offending commit and I can't reproduce it locally
>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.
>>>
>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a
>>> behavior change in the HBA driver?
>>
>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the 
>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared 
>> host tagset for CPU hotplug") is the only one that /looks/ as if it could
>> be causing it, but I don't know mpt3sas well enough.
>>
>> FWIW added Sreekanth
> 
> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA
> Broadcom 9400. It can be recreated by running blktests block/004 on the drive
> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not
> observed with SATA drives connected to AHCI.
> 
> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose
> it indicates that the commit changed HBA driver behavior.

Can you send the warning splat with backtrace ?
Johannes Thumshirn March 12, 2021, 7:59 a.m. UTC | #11
On 12/03/2021 08:27, Damien Le Moal wrote:
> On 2021/03/12 13:38, Shinichiro Kawasaki wrote:
>> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote:
>>> On 11/03/2021 16:48, Bart Van Assche wrote:
>>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote:
>>>>> On 11/03/2021 16:13, Bart Van Assche wrote:
>>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
>>>>>>> Recent changes [ ... ]
>>>>>>
>>>>>> Please add Fixes: and/or Cc: stable tags as appropriate.
>>>>>
>>>>> I couldn't pin down the offending commit and I can't reproduce it locally
>>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.
>>>>
>>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a
>>>> behavior change in the HBA driver?
>>>
>>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the 
>>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared 
>>> host tagset for CPU hotplug") is the only one that /looks/ as if it could
>>> be causing it, but I don't know mpt3sas well enough.
>>>
>>> FWIW added Sreekanth
>>
>> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA
>> Broadcom 9400. It can be recreated by running blktests block/004 on the drive
>> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not
>> observed with SATA drives connected to AHCI.
>>
>> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose
>> it indicates that the commit changed HBA driver behavior.
> 
> Can you send the warning splat with backtrace ?
> 

The warning splat is in the commit message:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2
 Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015
 RIP: 0010:__local_bh_disable_ip+0x3f/0x50
 RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006
 RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013
 RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd
 RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0
 R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200
 R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218
 FS:  0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0
 Call Trace:
  <IRQ>
  _raw_spin_lock_bh+0x18/0x40
  sd_zbc_complete+0x43d/0x1150
  sd_done+0x631/0x1040
  ? mark_lock+0xe4/0x2fd0
  ? provisioning_mode_store+0x3f0/0x3f0
  scsi_finish_command+0x31b/0x5c0
  _scsih_io_done+0x960/0x29e0 [mpt3sas]
  ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas]
  ? __lock_acquire+0x166b/0x58b0
  ? _get_st_from_smid+0x4a/0x80 [mpt3sas]
  _base_process_reply_queue+0x23f/0x26e0 [mpt3sas]
  ? lock_is_held_type+0x98/0x110
  ? find_held_lock+0x2c/0x110
  ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas]
  _base_interrupt+0x8d/0xd0 [mpt3sas]
  ? rcu_read_lock_sched_held+0x3f/0x70
  __handle_irq_event_percpu+0x24d/0x600
  handle_irq_event+0xef/0x240
  ? handle_irq_event_percpu+0x110/0x110
  handle_edge_irq+0x1f6/0xb60
  __common_interrupt+0x75/0x160
  common_interrupt+0x7b/0xa0
  </IRQ>
  asm_common_interrupt+0x1e/0x40
Damien Le Moal March 12, 2021, 8:20 a.m. UTC | #12
On 2021/03/12 16:59, Johannes Thumshirn wrote:
> On 12/03/2021 08:27, Damien Le Moal wrote:
>> On 2021/03/12 13:38, Shinichiro Kawasaki wrote:
>>> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote:
>>>> On 11/03/2021 16:48, Bart Van Assche wrote:
>>>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote:
>>>>>> On 11/03/2021 16:13, Bart Van Assche wrote:
>>>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
>>>>>>>> Recent changes [ ... ]
>>>>>>>
>>>>>>> Please add Fixes: and/or Cc: stable tags as appropriate.
>>>>>>
>>>>>> I couldn't pin down the offending commit and I can't reproduce it locally
>>>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.
>>>>>
>>>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a
>>>>> behavior change in the HBA driver?
>>>>
>>>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the 
>>>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared 
>>>> host tagset for CPU hotplug") is the only one that /looks/ as if it could
>>>> be causing it, but I don't know mpt3sas well enough.
>>>>
>>>> FWIW added Sreekanth
>>>
>>> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA
>>> Broadcom 9400. It can be recreated by running blktests block/004 on the drive
>>> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not
>>> observed with SATA drives connected to AHCI.
>>>
>>> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose
>>> it indicates that the commit changed HBA driver behavior.
>>
>> Can you send the warning splat with backtrace ?
>>
> 
> The warning splat is in the commit message:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2
>  Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015
>  RIP: 0010:__local_bh_disable_ip+0x3f/0x50
>  RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006
>  RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013
>  RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd
>  RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0
>  R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200
>  R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218
>  FS:  0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000
>  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>  CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0
>  Call Trace:
>   <IRQ>
>   _raw_spin_lock_bh+0x18/0x40
>   sd_zbc_complete+0x43d/0x1150
>   sd_done+0x631/0x1040
>   ? mark_lock+0xe4/0x2fd0
>   ? provisioning_mode_store+0x3f0/0x3f0
>   scsi_finish_command+0x31b/0x5c0
>   _scsih_io_done+0x960/0x29e0 [mpt3sas]
>   ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas]
>   ? __lock_acquire+0x166b/0x58b0
>   ? _get_st_from_smid+0x4a/0x80 [mpt3sas]
>   _base_process_reply_queue+0x23f/0x26e0 [mpt3sas]
>   ? lock_is_held_type+0x98/0x110
>   ? find_held_lock+0x2c/0x110
>   ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas]
>   _base_interrupt+0x8d/0xd0 [mpt3sas]
>   ? rcu_read_lock_sched_held+0x3f/0x70
>   __handle_irq_event_percpu+0x24d/0x600
>   handle_irq_event+0xef/0x240
>   ? handle_irq_event_percpu+0x110/0x110
>   handle_edge_irq+0x1f6/0xb60
>   __common_interrupt+0x75/0x160
>   common_interrupt+0x7b/0xa0
>   </IRQ>
>   asm_common_interrupt+0x1e/0x40
> 

Looking at patch 664f0dce2058, all that seems to be done is to enable
nr_hw_queue > 1. I do not see any change of locking context or irq handling.
From the backtrace, it does not look like scsi_finish_command() is called from
softirq... Probably a change in that area is responsible ?
Johannes Thumshirn March 12, 2021, 8:58 a.m. UTC | #13
On 12/03/2021 09:20, Damien Le Moal wrote:
> On 2021/03/12 16:59, Johannes Thumshirn wrote:
>> On 12/03/2021 08:27, Damien Le Moal wrote:
>>> On 2021/03/12 13:38, Shinichiro Kawasaki wrote:
>>>> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote:
>>>>> On 11/03/2021 16:48, Bart Van Assche wrote:
>>>>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote:
>>>>>>> On 11/03/2021 16:13, Bart Van Assche wrote:
>>>>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
>>>>>>>>> Recent changes [ ... ]
>>>>>>>>
>>>>>>>> Please add Fixes: and/or Cc: stable tags as appropriate.
>>>>>>>
>>>>>>> I couldn't pin down the offending commit and I can't reproduce it locally
>>>>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.
>>>>>>
>>>>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a
>>>>>> behavior change in the HBA driver?
>>>>>
>>>>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the 
>>>>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared 
>>>>> host tagset for CPU hotplug") is the only one that /looks/ as if it could
>>>>> be causing it, but I don't know mpt3sas well enough.
>>>>>
>>>>> FWIW added Sreekanth
>>>>
>>>> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA
>>>> Broadcom 9400. It can be recreated by running blktests block/004 on the drive
>>>> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not
>>>> observed with SATA drives connected to AHCI.
>>>>
>>>> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose
>>>> it indicates that the commit changed HBA driver behavior.
>>>
>>> Can you send the warning splat with backtrace ?
>>>
>>
>> The warning splat is in the commit message:
>> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2
>>  Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015
>>  RIP: 0010:__local_bh_disable_ip+0x3f/0x50
>>  RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006
>>  RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013
>>  RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd
>>  RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0
>>  R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200
>>  R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218
>>  FS:  0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000
>>  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>  CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0
>>  Call Trace:
>>   <IRQ>
>>   _raw_spin_lock_bh+0x18/0x40
>>   sd_zbc_complete+0x43d/0x1150
>>   sd_done+0x631/0x1040
>>   ? mark_lock+0xe4/0x2fd0
>>   ? provisioning_mode_store+0x3f0/0x3f0
>>   scsi_finish_command+0x31b/0x5c0
>>   _scsih_io_done+0x960/0x29e0 [mpt3sas]
>>   ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas]
>>   ? __lock_acquire+0x166b/0x58b0
>>   ? _get_st_from_smid+0x4a/0x80 [mpt3sas]
>>   _base_process_reply_queue+0x23f/0x26e0 [mpt3sas]
>>   ? lock_is_held_type+0x98/0x110
>>   ? find_held_lock+0x2c/0x110
>>   ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas]
>>   _base_interrupt+0x8d/0xd0 [mpt3sas]
>>   ? rcu_read_lock_sched_held+0x3f/0x70
>>   __handle_irq_event_percpu+0x24d/0x600
>>   handle_irq_event+0xef/0x240
>>   ? handle_irq_event_percpu+0x110/0x110
>>   handle_edge_irq+0x1f6/0xb60
>>   __common_interrupt+0x75/0x160
>>   common_interrupt+0x7b/0xa0
>>   </IRQ>
>>   asm_common_interrupt+0x1e/0x40
>>
> 
> Looking at patch 664f0dce2058, all that seems to be done is to enable
> nr_hw_queue > 1. I do not see any change of locking context or irq handling.
> From the backtrace, it does not look like scsi_finish_command() is called from
> softirq... Probably a change in that area is responsible ?
> 


In scsi_lib.c we only have these two patches in that area:

684da7628d93 ("block: remove unnecessary argument from blk_execute_rq")
962c8dcdd5fa ("scsi: core: Add a new error code DID_TRANSPORT_MARGINAL in scsi.h")

and none of them can cause the failure either. In block we have:

0a2efafbb1c7 ("blk-mq: Always complete remote completions requests in softirq")

but this doesn't look guilty as well, all it does is raising a softirq for all
block completions local and remote.
Shinichiro Kawasaki March 12, 2021, 10:05 a.m. UTC | #14
On Mar 12, 2021 / 08:58, Johannes Thumshirn wrote:
> On 12/03/2021 09:20, Damien Le Moal wrote:
> > On 2021/03/12 16:59, Johannes Thumshirn wrote:
> >> On 12/03/2021 08:27, Damien Le Moal wrote:
> >>> On 2021/03/12 13:38, Shinichiro Kawasaki wrote:
> >>>> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote:
> >>>>> On 11/03/2021 16:48, Bart Van Assche wrote:
> >>>>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote:
> >>>>>>> On 11/03/2021 16:13, Bart Van Assche wrote:
> >>>>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
> >>>>>>>>> Recent changes [ ... ]
> >>>>>>>>
> >>>>>>>> Please add Fixes: and/or Cc: stable tags as appropriate.
> >>>>>>>
> >>>>>>> I couldn't pin down the offending commit and I can't reproduce it locally
> >>>>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.
> >>>>>>
> >>>>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a
> >>>>>> behavior change in the HBA driver?
> >>>>>
> >>>>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the 
> >>>>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared 
> >>>>> host tagset for CPU hotplug") is the only one that /looks/ as if it could
> >>>>> be causing it, but I don't know mpt3sas well enough.
> >>>>>
> >>>>> FWIW added Sreekanth
> >>>>
> >>>> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA
> >>>> Broadcom 9400. It can be recreated by running blktests block/004 on the drive
> >>>> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not
> >>>> observed with SATA drives connected to AHCI.
> >>>>
> >>>> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose
> >>>> it indicates that the commit changed HBA driver behavior.
> >>>
> >>> Can you send the warning splat with backtrace ?
> >>>
> >>
> >> The warning splat is in the commit message:
> >> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2
> >>  Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015
> >>  RIP: 0010:__local_bh_disable_ip+0x3f/0x50
> >>  RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006
> >>  RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013
> >>  RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd
> >>  RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0
> >>  R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200
> >>  R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218
> >>  FS:  0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000
> >>  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> >>  CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0
> >>  Call Trace:
> >>   <IRQ>
> >>   _raw_spin_lock_bh+0x18/0x40
> >>   sd_zbc_complete+0x43d/0x1150
> >>   sd_done+0x631/0x1040
> >>   ? mark_lock+0xe4/0x2fd0
> >>   ? provisioning_mode_store+0x3f0/0x3f0
> >>   scsi_finish_command+0x31b/0x5c0
> >>   _scsih_io_done+0x960/0x29e0 [mpt3sas]
> >>   ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas]
> >>   ? __lock_acquire+0x166b/0x58b0
> >>   ? _get_st_from_smid+0x4a/0x80 [mpt3sas]
> >>   _base_process_reply_queue+0x23f/0x26e0 [mpt3sas]
> >>   ? lock_is_held_type+0x98/0x110
> >>   ? find_held_lock+0x2c/0x110
> >>   ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas]
> >>   _base_interrupt+0x8d/0xd0 [mpt3sas]
> >>   ? rcu_read_lock_sched_held+0x3f/0x70
> >>   __handle_irq_event_percpu+0x24d/0x600
> >>   handle_irq_event+0xef/0x240
> >>   ? handle_irq_event_percpu+0x110/0x110
> >>   handle_edge_irq+0x1f6/0xb60
> >>   __common_interrupt+0x75/0x160
> >>   common_interrupt+0x7b/0xa0
> >>   </IRQ>
> >>   asm_common_interrupt+0x1e/0x40
> >>
> > 
> > Looking at patch 664f0dce2058, all that seems to be done is to enable
> > nr_hw_queue > 1. I do not see any change of locking context or irq handling.
> > From the backtrace, it does not look like scsi_finish_command() is called from
> > softirq... Probably a change in that area is responsible ?
> > 
> 
> 
> In scsi_lib.c we only have these two patches in that area:
> 
> 684da7628d93 ("block: remove unnecessary argument from blk_execute_rq")
> 962c8dcdd5fa ("scsi: core: Add a new error code DID_TRANSPORT_MARGINAL in scsi.h")
> 
> and none of them can cause the failure either. In block we have:
> 
> 0a2efafbb1c7 ("blk-mq: Always complete remote completions requests in softirq")
> 
> but this doesn't look guilty as well, all it does is raising a softirq for all
> block completions local and remote.

In blk_mq_complete_request_remote(), I found the following code.

	if (rq->q->nr_hw_queues == 1) {
		blk_mq_raise_softirq(rq);
		return true;
	}
	return false;

My mere guess is that the commit 664f0dce2058 changed the shost->nr_hw_queue
from zero to a value larger than 1 (with my test system, it is 8), it is
propagated to rq->q->nr_hw_queues, then blk_mq_raise_softirq() is no longer
called.

The call stack I assume is as follows: without calling blk_mq_raise_softirq(),
there are all executed in IRQ context, probably.

  _scsih_io_done()
    scmd->scsi_done() = scsi_mq_done()
      blk_mq_complete_request()
        blk_mq_complete_request_remote() ... did not call blk_mq_raise_softirq()
        rq->q->mq_ops->complete() = scsi_soft_irq_done()
	  scsi_finish_command()
	    drv->done() = sd_done()

Will confirm this guess further.
Johannes Thumshirn March 12, 2021, 10:28 a.m. UTC | #15
On 12/03/2021 11:05, Shinichiro Kawasaki wrote:
> On Mar 12, 2021 / 08:58, Johannes Thumshirn wrote:
>> On 12/03/2021 09:20, Damien Le Moal wrote:
>>> On 2021/03/12 16:59, Johannes Thumshirn wrote:
>>>> On 12/03/2021 08:27, Damien Le Moal wrote:
>>>>> On 2021/03/12 13:38, Shinichiro Kawasaki wrote:
>>>>>> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote:
>>>>>>> On 11/03/2021 16:48, Bart Van Assche wrote:
>>>>>>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote:
>>>>>>>>> On 11/03/2021 16:13, Bart Van Assche wrote:
>>>>>>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
>>>>>>>>>>> Recent changes [ ... ]
>>>>>>>>>>
>>>>>>>>>> Please add Fixes: and/or Cc: stable tags as appropriate.
>>>>>>>>>
>>>>>>>>> I couldn't pin down the offending commit and I can't reproduce it locally
>>>>>>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.
>>>>>>>>
>>>>>>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a
>>>>>>>> behavior change in the HBA driver?
>>>>>>>
>>>>>>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the 
>>>>>>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared 
>>>>>>> host tagset for CPU hotplug") is the only one that /looks/ as if it could
>>>>>>> be causing it, but I don't know mpt3sas well enough.
>>>>>>>
>>>>>>> FWIW added Sreekanth
>>>>>>
>>>>>> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA
>>>>>> Broadcom 9400. It can be recreated by running blktests block/004 on the drive
>>>>>> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not
>>>>>> observed with SATA drives connected to AHCI.
>>>>>>
>>>>>> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose
>>>>>> it indicates that the commit changed HBA driver behavior.
>>>>>
>>>>> Can you send the warning splat with backtrace ?
>>>>>
>>>>
>>>> The warning splat is in the commit message:
>>>> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2
>>>>  Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015
>>>>  RIP: 0010:__local_bh_disable_ip+0x3f/0x50
>>>>  RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006
>>>>  RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013
>>>>  RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd
>>>>  RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0
>>>>  R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200
>>>>  R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218
>>>>  FS:  0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000
>>>>  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>>  CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0
>>>>  Call Trace:
>>>>   <IRQ>
>>>>   _raw_spin_lock_bh+0x18/0x40
>>>>   sd_zbc_complete+0x43d/0x1150
>>>>   sd_done+0x631/0x1040
>>>>   ? mark_lock+0xe4/0x2fd0
>>>>   ? provisioning_mode_store+0x3f0/0x3f0
>>>>   scsi_finish_command+0x31b/0x5c0
>>>>   _scsih_io_done+0x960/0x29e0 [mpt3sas]
>>>>   ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas]
>>>>   ? __lock_acquire+0x166b/0x58b0
>>>>   ? _get_st_from_smid+0x4a/0x80 [mpt3sas]
>>>>   _base_process_reply_queue+0x23f/0x26e0 [mpt3sas]
>>>>   ? lock_is_held_type+0x98/0x110
>>>>   ? find_held_lock+0x2c/0x110
>>>>   ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas]
>>>>   _base_interrupt+0x8d/0xd0 [mpt3sas]
>>>>   ? rcu_read_lock_sched_held+0x3f/0x70
>>>>   __handle_irq_event_percpu+0x24d/0x600
>>>>   handle_irq_event+0xef/0x240
>>>>   ? handle_irq_event_percpu+0x110/0x110
>>>>   handle_edge_irq+0x1f6/0xb60
>>>>   __common_interrupt+0x75/0x160
>>>>   common_interrupt+0x7b/0xa0
>>>>   </IRQ>
>>>>   asm_common_interrupt+0x1e/0x40
>>>>
>>>
>>> Looking at patch 664f0dce2058, all that seems to be done is to enable
>>> nr_hw_queue > 1. I do not see any change of locking context or irq handling.
>>> From the backtrace, it does not look like scsi_finish_command() is called from
>>> softirq... Probably a change in that area is responsible ?
>>>
>>
>>
>> In scsi_lib.c we only have these two patches in that area:
>>
>> 684da7628d93 ("block: remove unnecessary argument from blk_execute_rq")
>> 962c8dcdd5fa ("scsi: core: Add a new error code DID_TRANSPORT_MARGINAL in scsi.h")
>>
>> and none of them can cause the failure either. In block we have:
>>
>> 0a2efafbb1c7 ("blk-mq: Always complete remote completions requests in softirq")
>>
>> but this doesn't look guilty as well, all it does is raising a softirq for all
>> block completions local and remote.
> 
> In blk_mq_complete_request_remote(), I found the following code.
> 
> 	if (rq->q->nr_hw_queues == 1) {
> 		blk_mq_raise_softirq(rq);
> 		return true;
> 	}
> 	return false;
> 
> My mere guess is that the commit 664f0dce2058 changed the shost->nr_hw_queue
> from zero to a value larger than 1 (with my test system, it is 8), it is
> propagated to rq->q->nr_hw_queues, then blk_mq_raise_softirq() is no longer
> called.
> 
> The call stack I assume is as follows: without calling blk_mq_raise_softirq(),
> there are all executed in IRQ context, probably.
> 
>   _scsih_io_done()
>     scmd->scsi_done() = scsi_mq_done()
>       blk_mq_complete_request()
>         blk_mq_complete_request_remote() ... did not call blk_mq_raise_softirq()
>         rq->q->mq_ops->complete() = scsi_soft_irq_done()
> 	  scsi_finish_command()
> 	    drv->done() = sd_done()
> 
> Will confirm this guess further.
> 

But commit 0a2efafbb1c7 ("blk-mq: Always complete remote completions requests 
in softirq") changed it to:

 
-       /*
-        * For most of single queue controllers, there is only one irq vector
-        * for handling I/O completion, and the only irq's affinity is set
-        * to all possible CPUs.  On most of ARCHs, this affinity means the irq
-        * is handled on one specific CPU.
-        *
-        * So complete I/O requests in softirq context in case of single queue
-        * devices to avoid degrading I/O performance due to irqsoff latency.
-        */
-       if (rq->q->nr_hw_queues == 1)
-               blk_mq_trigger_softirq(rq);
-       else
-               rq->q->mq_ops->complete(rq);
+       blk_mq_trigger_softirq(rq);
 }

So to my understanding, we will always complete in a softirq.
Shinichiro Kawasaki March 12, 2021, 11:48 a.m. UTC | #16
On Mar 12, 2021 / 10:28, Johannes Thumshirn wrote:
> On 12/03/2021 11:05, Shinichiro Kawasaki wrote:
> > On Mar 12, 2021 / 08:58, Johannes Thumshirn wrote:
> >> On 12/03/2021 09:20, Damien Le Moal wrote:
> >>> On 2021/03/12 16:59, Johannes Thumshirn wrote:
> >>>> On 12/03/2021 08:27, Damien Le Moal wrote:
> >>>>> On 2021/03/12 13:38, Shinichiro Kawasaki wrote:
> >>>>>> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote:
> >>>>>>> On 11/03/2021 16:48, Bart Van Assche wrote:
> >>>>>>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote:
> >>>>>>>>> On 11/03/2021 16:13, Bart Van Assche wrote:
> >>>>>>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
> >>>>>>>>>>> Recent changes [ ... ]
> >>>>>>>>>>
> >>>>>>>>>> Please add Fixes: and/or Cc: stable tags as appropriate.
> >>>>>>>>>
> >>>>>>>>> I couldn't pin down the offending commit and I can't reproduce it locally
> >>>>>>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.
> >>>>>>>>
> >>>>>>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a
> >>>>>>>> behavior change in the HBA driver?
> >>>>>>>
> >>>>>>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the 
> >>>>>>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared 
> >>>>>>> host tagset for CPU hotplug") is the only one that /looks/ as if it could
> >>>>>>> be causing it, but I don't know mpt3sas well enough.
> >>>>>>>
> >>>>>>> FWIW added Sreekanth
> >>>>>>
> >>>>>> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA
> >>>>>> Broadcom 9400. It can be recreated by running blktests block/004 on the drive
> >>>>>> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not
> >>>>>> observed with SATA drives connected to AHCI.
> >>>>>>
> >>>>>> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose
> >>>>>> it indicates that the commit changed HBA driver behavior.
> >>>>>
> >>>>> Can you send the warning splat with backtrace ?
> >>>>>
> >>>>
> >>>> The warning splat is in the commit message:
> >>>> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2
> >>>>  Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015
> >>>>  RIP: 0010:__local_bh_disable_ip+0x3f/0x50
> >>>>  RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006
> >>>>  RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013
> >>>>  RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd
> >>>>  RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0
> >>>>  R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200
> >>>>  R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218
> >>>>  FS:  0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000
> >>>>  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> >>>>  CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0
> >>>>  Call Trace:
> >>>>   <IRQ>
> >>>>   _raw_spin_lock_bh+0x18/0x40
> >>>>   sd_zbc_complete+0x43d/0x1150
> >>>>   sd_done+0x631/0x1040
> >>>>   ? mark_lock+0xe4/0x2fd0
> >>>>   ? provisioning_mode_store+0x3f0/0x3f0
> >>>>   scsi_finish_command+0x31b/0x5c0
> >>>>   _scsih_io_done+0x960/0x29e0 [mpt3sas]
> >>>>   ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas]
> >>>>   ? __lock_acquire+0x166b/0x58b0
> >>>>   ? _get_st_from_smid+0x4a/0x80 [mpt3sas]
> >>>>   _base_process_reply_queue+0x23f/0x26e0 [mpt3sas]
> >>>>   ? lock_is_held_type+0x98/0x110
> >>>>   ? find_held_lock+0x2c/0x110
> >>>>   ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas]
> >>>>   _base_interrupt+0x8d/0xd0 [mpt3sas]
> >>>>   ? rcu_read_lock_sched_held+0x3f/0x70
> >>>>   __handle_irq_event_percpu+0x24d/0x600
> >>>>   handle_irq_event+0xef/0x240
> >>>>   ? handle_irq_event_percpu+0x110/0x110
> >>>>   handle_edge_irq+0x1f6/0xb60
> >>>>   __common_interrupt+0x75/0x160
> >>>>   common_interrupt+0x7b/0xa0
> >>>>   </IRQ>
> >>>>   asm_common_interrupt+0x1e/0x40
> >>>>
> >>>
> >>> Looking at patch 664f0dce2058, all that seems to be done is to enable
> >>> nr_hw_queue > 1. I do not see any change of locking context or irq handling.
> >>> From the backtrace, it does not look like scsi_finish_command() is called from
> >>> softirq... Probably a change in that area is responsible ?
> >>>
> >>
> >>
> >> In scsi_lib.c we only have these two patches in that area:
> >>
> >> 684da7628d93 ("block: remove unnecessary argument from blk_execute_rq")
> >> 962c8dcdd5fa ("scsi: core: Add a new error code DID_TRANSPORT_MARGINAL in scsi.h")
> >>
> >> and none of them can cause the failure either. In block we have:
> >>
> >> 0a2efafbb1c7 ("blk-mq: Always complete remote completions requests in softirq")
> >>
> >> but this doesn't look guilty as well, all it does is raising a softirq for all
> >> block completions local and remote.
> > 
> > In blk_mq_complete_request_remote(), I found the following code.
> > 
> > 	if (rq->q->nr_hw_queues == 1) {
> > 		blk_mq_raise_softirq(rq);
> > 		return true;
> > 	}
> > 	return false;
> > 
> > My mere guess is that the commit 664f0dce2058 changed the shost->nr_hw_queue
> > from zero to a value larger than 1 (with my test system, it is 8), it is
> > propagated to rq->q->nr_hw_queues, then blk_mq_raise_softirq() is no longer
> > called.
> > 
> > The call stack I assume is as follows: without calling blk_mq_raise_softirq(),
> > there are all executed in IRQ context, probably.
> > 
> >   _scsih_io_done()
> >     scmd->scsi_done() = scsi_mq_done()
> >       blk_mq_complete_request()
> >         blk_mq_complete_request_remote() ... did not call blk_mq_raise_softirq()
> >         rq->q->mq_ops->complete() = scsi_soft_irq_done()
> > 	  scsi_finish_command()
> > 	    drv->done() = sd_done()
> > 
> > Will confirm this guess further.
> > 
> 
> But commit 0a2efafbb1c7 ("blk-mq: Always complete remote completions requests 
> in softirq") changed it to:
> 
>  
> -       /*
> -        * For most of single queue controllers, there is only one irq vector
> -        * for handling I/O completion, and the only irq's affinity is set
> -        * to all possible CPUs.  On most of ARCHs, this affinity means the irq
> -        * is handled on one specific CPU.
> -        *
> -        * So complete I/O requests in softirq context in case of single queue
> -        * devices to avoid degrading I/O performance due to irqsoff latency.
> -        */
> -       if (rq->q->nr_hw_queues == 1)
> -               blk_mq_trigger_softirq(rq);
> -       else
> -               rq->q->mq_ops->complete(rq);
> +       blk_mq_trigger_softirq(rq);
>  }
> 
> So to my understanding, we will always complete in a softirq.
>

My understanding is the change above in __blk_mq_complete_request_remote() is
used for IPI (it is triggered in blk_mq_complete_send_ipi()). Let me quote
blk_mq_complete_request_remote() below (similar name but without underscores).
If blk_mq_complete_need_ipi(rq) returns false, blk_mq_complete_send_ipi(rq) is
not called. In this case, the commit 0a2efafbb1c7 does not affect.

bool blk_mq_complete_request_remote(struct request *rq)
{
	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);

	/*
	 * For a polled request, always complete locallly, it's pointless
	 * to redirect the completion.
	 */
	if (rq->cmd_flags & REQ_HIPRI)
		return false;

	if (blk_mq_complete_need_ipi(rq)) {
		blk_mq_complete_send_ipi(rq);
		return true;
	}

	if (rq->q->nr_hw_queues == 1) {
		blk_mq_raise_softirq(rq);
		return true;
	}
	return false;
}

With my test environment and some debug prints, I confirmed these two:

- The commit 664f0dce2058 changed q->nr_hw_queues value of drives on HBA
  from 1 to 8.
- The commit 664f0dce2058 changed the blk_mq_complete_request_remote()
  return value from true to false.
  This indicates that blk_mq_complete_need_ipi(rq) returns false.

So now I believe the commit 664f0dce2058 changed scsi_finish_command()
context from soft-IRQ to IRQ.
Johannes Thumshirn March 12, 2021, 12:02 p.m. UTC | #17
On 12/03/2021 12:48, Shinichiro Kawasaki wrote:
> So now I believe the commit 664f0dce2058 changed scsi_finish_command()
> context from soft-IRQ to IRQ.

That would fit you examination that a revert of 664f0dce2058 fixes the WARN.
Johannes Thumshirn March 16, 2021, 7:53 a.m. UTC | #18
Martin ping?
Martin K. Petersen March 17, 2021, 1:42 a.m. UTC | #19
Johannes,

> Martin ping?

When I originally read the thread I was left with the impression that we
were just papering over an underlying blk-mq issue. However, I read the
whole thing again and feel this fix is acceptable.
Johannes Thumshirn March 17, 2021, 7:45 a.m. UTC | #20
On 17/03/2021 02:42, Martin K. Petersen wrote:
> 
> Johannes,
> 
>> Martin ping?
> 
> When I originally read the thread I was left with the impression that we
> were just papering over an underlying blk-mq issue. However, I read the
> whole thing again and feel this fix is acceptable.
> 

Should I resend the patch with the Fixes line applied, or can you take it as is?
Martin K. Petersen March 19, 2021, 2:21 a.m. UTC | #21
Johannes,

> Should I resend the patch with the Fixes line applied, or can you take
> it as is?

It's already in scsi-fixes.
Martin K. Petersen March 19, 2021, 3:45 a.m. UTC | #22
On Wed, 10 Mar 2021 18:48:06 +0900, Johannes Thumshirn wrote:

> Recent changes changed the completion of SCSI commands from Soft-IRQ
> context to IRQ context. This triggers the following warning, when we're
> completing writes to zoned block devices that go through the zone append
> emulation:
> 
>  CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2
>  Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015
>  RIP: 0010:__local_bh_disable_ip+0x3f/0x50
>  RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006
>  RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013
>  RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd
>  RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0
>  R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200
>  R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218
>  FS:  0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000
>  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>  CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0
>  Call Trace:
>   <IRQ>
>   _raw_spin_lock_bh+0x18/0x40
>   sd_zbc_complete+0x43d/0x1150
>   sd_done+0x631/0x1040
>   ? mark_lock+0xe4/0x2fd0
>   ? provisioning_mode_store+0x3f0/0x3f0
>   scsi_finish_command+0x31b/0x5c0
>   _scsih_io_done+0x960/0x29e0 [mpt3sas]
>   ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas]
>   ? __lock_acquire+0x166b/0x58b0
>   ? _get_st_from_smid+0x4a/0x80 [mpt3sas]
>   _base_process_reply_queue+0x23f/0x26e0 [mpt3sas]
>   ? lock_is_held_type+0x98/0x110
>   ? find_held_lock+0x2c/0x110
>   ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas]
>   _base_interrupt+0x8d/0xd0 [mpt3sas]
>   ? rcu_read_lock_sched_held+0x3f/0x70
>   __handle_irq_event_percpu+0x24d/0x600
>   handle_irq_event+0xef/0x240
>   ? handle_irq_event_percpu+0x110/0x110
>   handle_edge_irq+0x1f6/0xb60
>   __common_interrupt+0x75/0x160
>   common_interrupt+0x7b/0xa0
>   </IRQ>
>   asm_common_interrupt+0x1e/0x40
> 
> [...]

Applied to 5.12/scsi-fixes, thanks!

[1/1] scsi: sd_zbc: update write pointer offset cache
      https://git.kernel.org/mkp/scsi/c/2db4215f4755
Johannes Thumshirn March 19, 2021, 8:15 a.m. UTC | #23
On 19/03/2021 04:46, Martin K. Petersen wrote:
> 
> Applied to 5.12/scsi-fixes, thanks!
> 
> [1/1] scsi: sd_zbc: update write pointer offset cache
>       https://git.kernel.org/mkp/scsi/c/2db4215f4755
> 

Thanks!
diff mbox series

Patch

diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index ee558675eab4..994f1b8e3504 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -280,27 +280,28 @@  static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
 static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
 {
 	struct scsi_disk *sdkp;
+	unsigned long flags;
 	unsigned int zno;
 	int ret;
 
 	sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
 
-	spin_lock_bh(&sdkp->zones_wp_offset_lock);
+	spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
 	for (zno = 0; zno < sdkp->nr_zones; zno++) {
 		if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
 			continue;
 
-		spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+		spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
 		ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
 					     SD_BUF_SIZE,
 					     zno * sdkp->zone_blocks, true);
-		spin_lock_bh(&sdkp->zones_wp_offset_lock);
+		spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
 		if (!ret)
 			sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
 					    zno, sd_zbc_update_wp_offset_cb,
 					    sdkp);
 	}
-	spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+	spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
 
 	scsi_device_put(sdkp->device);
 }
@@ -324,6 +325,7 @@  blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
 	struct request *rq = cmd->request;
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	unsigned int wp_offset, zno = blk_rq_zone_no(rq);
+	unsigned long flags;
 	blk_status_t ret;
 
 	ret = sd_zbc_cmnd_checks(cmd);
@@ -337,7 +339,7 @@  blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
 	if (!blk_req_zone_write_trylock(rq))
 		return BLK_STS_ZONE_RESOURCE;
 
-	spin_lock_bh(&sdkp->zones_wp_offset_lock);
+	spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
 	wp_offset = sdkp->zones_wp_offset[zno];
 	switch (wp_offset) {
 	case SD_ZBC_INVALID_WP_OFST:
@@ -366,7 +368,7 @@  blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
 
 		*lba += wp_offset;
 	}
-	spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+	spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
 	if (ret)
 		blk_req_zone_write_unlock(rq);
 	return ret;
@@ -445,6 +447,7 @@  static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	unsigned int zno = blk_rq_zone_no(rq);
 	enum req_opf op = req_op(rq);
+	unsigned long flags;
 
 	/*
 	 * If we got an error for a command that needs updating the write
@@ -452,7 +455,7 @@  static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
 	 * invalid to force an update from disk the next time a zone append
 	 * command is issued.
 	 */
-	spin_lock_bh(&sdkp->zones_wp_offset_lock);
+	spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
 
 	if (result && op != REQ_OP_ZONE_RESET_ALL) {
 		if (op == REQ_OP_ZONE_APPEND) {
@@ -496,7 +499,7 @@  static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
 	}
 
 unlock_wp_offset:
-	spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+	spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
 
 	return good_bytes;
 }