Message ID | 3cfebe48d09db73041b7849be71ffbcec7ee40b3.1615369586.git.johannes.thumshirn@wdc.com (mailing list archive) |
---|---|
State | Accepted |
Headers | show |
Series | scsi: sd_zbc: update write pointer offset cache | expand |
On 2021/03/10 18:48, Johannes Thumshirn wrote: > Recent changes changed the completion of SCSI commands from Soft-IRQ > context to IRQ context. This triggers the following warning, when we're > completing writes to zoned block devices that go through the zone append > emulation: > > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2 > Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 > RIP: 0010:__local_bh_disable_ip+0x3f/0x50 > RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006 > RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013 > RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd > RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0 > R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200 > R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218 > FS: 0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0 > Call Trace: > <IRQ> > _raw_spin_lock_bh+0x18/0x40 > sd_zbc_complete+0x43d/0x1150 > sd_done+0x631/0x1040 > ? mark_lock+0xe4/0x2fd0 > ? provisioning_mode_store+0x3f0/0x3f0 > scsi_finish_command+0x31b/0x5c0 > _scsih_io_done+0x960/0x29e0 [mpt3sas] > ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas] > ? __lock_acquire+0x166b/0x58b0 > ? _get_st_from_smid+0x4a/0x80 [mpt3sas] > _base_process_reply_queue+0x23f/0x26e0 [mpt3sas] > ? lock_is_held_type+0x98/0x110 > ? find_held_lock+0x2c/0x110 > ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas] > _base_interrupt+0x8d/0xd0 [mpt3sas] > ? rcu_read_lock_sched_held+0x3f/0x70 > __handle_irq_event_percpu+0x24d/0x600 > handle_irq_event+0xef/0x240 > ? handle_irq_event_percpu+0x110/0x110 > handle_edge_irq+0x1f6/0xb60 > __common_interrupt+0x75/0x160 > common_interrupt+0x7b/0xa0 > </IRQ> > asm_common_interrupt+0x1e/0x40 > > Don't use spin_lock_bh() to protect the update of the write pointer offset > cache, but use spin_lock_irqsave() for it. > > Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com> > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> > --- > drivers/scsi/sd_zbc.c | 19 +++++++++++-------- > 1 file changed, 11 insertions(+), 8 deletions(-) > > diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c > index ee558675eab4..994f1b8e3504 100644 > --- a/drivers/scsi/sd_zbc.c > +++ b/drivers/scsi/sd_zbc.c > @@ -280,27 +280,28 @@ static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx, > static void sd_zbc_update_wp_offset_workfn(struct work_struct *work) > { > struct scsi_disk *sdkp; > + unsigned long flags; > unsigned int zno; > int ret; > > sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work); > > - spin_lock_bh(&sdkp->zones_wp_offset_lock); > + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); > for (zno = 0; zno < sdkp->nr_zones; zno++) { > if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST) > continue; > > - spin_unlock_bh(&sdkp->zones_wp_offset_lock); > + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); > ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf, > SD_BUF_SIZE, > zno * sdkp->zone_blocks, true); > - spin_lock_bh(&sdkp->zones_wp_offset_lock); > + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); > if (!ret) > sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64, > zno, sd_zbc_update_wp_offset_cb, > sdkp); > } > - spin_unlock_bh(&sdkp->zones_wp_offset_lock); > + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); > > scsi_device_put(sdkp->device); > } > @@ -324,6 +325,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, > struct request *rq = cmd->request; > struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); > unsigned int wp_offset, zno = blk_rq_zone_no(rq); > + unsigned long flags; > blk_status_t ret; > > ret = sd_zbc_cmnd_checks(cmd); > @@ -337,7 +339,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, > if (!blk_req_zone_write_trylock(rq)) > return BLK_STS_ZONE_RESOURCE; > > - spin_lock_bh(&sdkp->zones_wp_offset_lock); > + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); > wp_offset = sdkp->zones_wp_offset[zno]; > switch (wp_offset) { > case SD_ZBC_INVALID_WP_OFST: > @@ -366,7 +368,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, > > *lba += wp_offset; > } > - spin_unlock_bh(&sdkp->zones_wp_offset_lock); > + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); > if (ret) > blk_req_zone_write_unlock(rq); > return ret; > @@ -445,6 +447,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, > struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); > unsigned int zno = blk_rq_zone_no(rq); > enum req_opf op = req_op(rq); > + unsigned long flags; > > /* > * If we got an error for a command that needs updating the write > @@ -452,7 +455,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, > * invalid to force an update from disk the next time a zone append > * command is issued. > */ > - spin_lock_bh(&sdkp->zones_wp_offset_lock); > + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); > > if (result && op != REQ_OP_ZONE_RESET_ALL) { > if (op == REQ_OP_ZONE_APPEND) { > @@ -496,7 +499,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, > } > > unlock_wp_offset: > - spin_unlock_bh(&sdkp->zones_wp_offset_lock); > + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); > > return good_bytes; > } > Looks good to me. Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
On Mar 10, 2021 / 18:48, Johannes Thumshirn wrote: > Recent changes changed the completion of SCSI commands from Soft-IRQ > context to IRQ context. This triggers the following warning, when we're > completing writes to zoned block devices that go through the zone append > emulation: > > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2 > Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 > RIP: 0010:__local_bh_disable_ip+0x3f/0x50 > RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006 > RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013 > RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd > RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0 > R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200 > R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218 > FS: 0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0 > Call Trace: > <IRQ> > _raw_spin_lock_bh+0x18/0x40 > sd_zbc_complete+0x43d/0x1150 > sd_done+0x631/0x1040 > ? mark_lock+0xe4/0x2fd0 > ? provisioning_mode_store+0x3f0/0x3f0 > scsi_finish_command+0x31b/0x5c0 > _scsih_io_done+0x960/0x29e0 [mpt3sas] > ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas] > ? __lock_acquire+0x166b/0x58b0 > ? _get_st_from_smid+0x4a/0x80 [mpt3sas] > _base_process_reply_queue+0x23f/0x26e0 [mpt3sas] > ? lock_is_held_type+0x98/0x110 > ? find_held_lock+0x2c/0x110 > ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas] > _base_interrupt+0x8d/0xd0 [mpt3sas] > ? rcu_read_lock_sched_held+0x3f/0x70 > __handle_irq_event_percpu+0x24d/0x600 > handle_irq_event+0xef/0x240 > ? handle_irq_event_percpu+0x110/0x110 > handle_edge_irq+0x1f6/0xb60 > __common_interrupt+0x75/0x160 > common_interrupt+0x7b/0xa0 > </IRQ> > asm_common_interrupt+0x1e/0x40 > > Don't use spin_lock_bh() to protect the update of the write pointer offset > cache, but use spin_lock_irqsave() for it. > > Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com> > Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> I tested and confirmed that this patch fixes the WARNING. Thanks! Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
On 3/10/21 1:48 AM, Johannes Thumshirn wrote:
> Recent changes [ ... ]
Please add Fixes: and/or Cc: stable tags as appropriate.
Thanks,
Bart.
On 11/03/2021 16:13, Bart Van Assche wrote: > On 3/10/21 1:48 AM, Johannes Thumshirn wrote: >> Recent changes [ ... ] > > Please add Fixes: and/or Cc: stable tags as appropriate. I couldn't pin down the offending commit and I can't reproduce it locally as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2.
On 3/11/21 7:18 AM, Johannes Thumshirn wrote: > On 11/03/2021 16:13, Bart Van Assche wrote: >> On 3/10/21 1:48 AM, Johannes Thumshirn wrote: >>> Recent changes [ ... ] >> >> Please add Fixes: and/or Cc: stable tags as appropriate. > > I couldn't pin down the offending commit and I can't reproduce it locally > as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2. That's weird. Did Shinichiro use a HBA? Could this be the result of a behavior change in the HBA driver? Thanks, Bart.
On 11/03/2021 16:48, Bart Van Assche wrote: > On 3/11/21 7:18 AM, Johannes Thumshirn wrote: >> On 11/03/2021 16:13, Bart Van Assche wrote: >>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote: >>>> Recent changes [ ... ] >>> >>> Please add Fixes: and/or Cc: stable tags as appropriate. >> >> I couldn't pin down the offending commit and I can't reproduce it locally >> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2. > > That's weird. Did Shinichiro use a HBA? Could this be the result of a > behavior change in the HBA driver? Yes I've looked at the commits in mpt3sas, but can't really pinpoint the offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared host tagset for CPU hotplug") is the only one that /looks/ as if it could be causing it, but I don't know mpt3sas well enough. FWIW added Sreekanth
On 3/11/21 7:54 AM, Johannes Thumshirn wrote: > Yes I've looked at the commits in mpt3sas, but can't really pinpoint the > offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared > host tagset for CPU hotplug") is the only one that /looks/ as if it could > be causing it, but I don't know mpt3sas well enough. > > FWIW added Sreekanth Hi Johannes, I'd like to see a full root-cause analysis before proceeding with this patch because as far as I can see the call chain for scsi_finish_command() is as follows (entirely in soft-IRQ context): blk_done_softirq() blk_complete_reqs() scsi_softirq_done() scsi_finish_command() Thanks, Bart.
On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote: > On 11/03/2021 16:48, Bart Van Assche wrote: > > On 3/11/21 7:18 AM, Johannes Thumshirn wrote: > >> On 11/03/2021 16:13, Bart Van Assche wrote: > >>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote: > >>>> Recent changes [ ... ] > >>> > >>> Please add Fixes: and/or Cc: stable tags as appropriate. > >> > >> I couldn't pin down the offending commit and I can't reproduce it locally > >> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2. > > > > That's weird. Did Shinichiro use a HBA? Could this be the result of a > > behavior change in the HBA driver? > > Yes I've looked at the commits in mpt3sas, but can't really pinpoint the > offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared > host tagset for CPU hotplug") is the only one that /looks/ as if it could > be causing it, but I don't know mpt3sas well enough. > > FWIW added Sreekanth The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA Broadcom 9400. It can be recreated by running blktests block/004 on the drive (after reboot). It is also recreated with SATA SMR drive with the HBA, but not observed with SATA drives connected to AHCI. I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose it indicates that the commit changed HBA driver behavior.
On 12/03/2021 05:38, Shinichiro Kawasaki wrote: > The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA > Broadcom 9400. It can be recreated by running blktests block/004 on the drive > (after reboot). It is also recreated with SATA SMR drive with the HBA, but not > observed with SATA drives connected to AHCI. > > I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose > it indicates that the commit changed HBA driver behavior. Alright then it's: Fixes: 664f0dce2058 ("scsi: mpt3sas: Add support for shared host tagset for CPU hotplug") Martin, can you pick it up for the fixes queue?
On 2021/03/12 13:38, Shinichiro Kawasaki wrote: > On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote: >> On 11/03/2021 16:48, Bart Van Assche wrote: >>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote: >>>> On 11/03/2021 16:13, Bart Van Assche wrote: >>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote: >>>>>> Recent changes [ ... ] >>>>> >>>>> Please add Fixes: and/or Cc: stable tags as appropriate. >>>> >>>> I couldn't pin down the offending commit and I can't reproduce it locally >>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2. >>> >>> That's weird. Did Shinichiro use a HBA? Could this be the result of a >>> behavior change in the HBA driver? >> >> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the >> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared >> host tagset for CPU hotplug") is the only one that /looks/ as if it could >> be causing it, but I don't know mpt3sas well enough. >> >> FWIW added Sreekanth > > The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA > Broadcom 9400. It can be recreated by running blktests block/004 on the drive > (after reboot). It is also recreated with SATA SMR drive with the HBA, but not > observed with SATA drives connected to AHCI. > > I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose > it indicates that the commit changed HBA driver behavior. Can you send the warning splat with backtrace ?
On 12/03/2021 08:27, Damien Le Moal wrote: > On 2021/03/12 13:38, Shinichiro Kawasaki wrote: >> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote: >>> On 11/03/2021 16:48, Bart Van Assche wrote: >>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote: >>>>> On 11/03/2021 16:13, Bart Van Assche wrote: >>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote: >>>>>>> Recent changes [ ... ] >>>>>> >>>>>> Please add Fixes: and/or Cc: stable tags as appropriate. >>>>> >>>>> I couldn't pin down the offending commit and I can't reproduce it locally >>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2. >>>> >>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a >>>> behavior change in the HBA driver? >>> >>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the >>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared >>> host tagset for CPU hotplug") is the only one that /looks/ as if it could >>> be causing it, but I don't know mpt3sas well enough. >>> >>> FWIW added Sreekanth >> >> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA >> Broadcom 9400. It can be recreated by running blktests block/004 on the drive >> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not >> observed with SATA drives connected to AHCI. >> >> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose >> it indicates that the commit changed HBA driver behavior. > > Can you send the warning splat with backtrace ? > The warning splat is in the commit message: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2 Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 RIP: 0010:__local_bh_disable_ip+0x3f/0x50 RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006 RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013 RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0 R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200 R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218 FS: 0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0 Call Trace: <IRQ> _raw_spin_lock_bh+0x18/0x40 sd_zbc_complete+0x43d/0x1150 sd_done+0x631/0x1040 ? mark_lock+0xe4/0x2fd0 ? provisioning_mode_store+0x3f0/0x3f0 scsi_finish_command+0x31b/0x5c0 _scsih_io_done+0x960/0x29e0 [mpt3sas] ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas] ? __lock_acquire+0x166b/0x58b0 ? _get_st_from_smid+0x4a/0x80 [mpt3sas] _base_process_reply_queue+0x23f/0x26e0 [mpt3sas] ? lock_is_held_type+0x98/0x110 ? find_held_lock+0x2c/0x110 ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas] _base_interrupt+0x8d/0xd0 [mpt3sas] ? rcu_read_lock_sched_held+0x3f/0x70 __handle_irq_event_percpu+0x24d/0x600 handle_irq_event+0xef/0x240 ? handle_irq_event_percpu+0x110/0x110 handle_edge_irq+0x1f6/0xb60 __common_interrupt+0x75/0x160 common_interrupt+0x7b/0xa0 </IRQ> asm_common_interrupt+0x1e/0x40
On 2021/03/12 16:59, Johannes Thumshirn wrote: > On 12/03/2021 08:27, Damien Le Moal wrote: >> On 2021/03/12 13:38, Shinichiro Kawasaki wrote: >>> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote: >>>> On 11/03/2021 16:48, Bart Van Assche wrote: >>>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote: >>>>>> On 11/03/2021 16:13, Bart Van Assche wrote: >>>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote: >>>>>>>> Recent changes [ ... ] >>>>>>> >>>>>>> Please add Fixes: and/or Cc: stable tags as appropriate. >>>>>> >>>>>> I couldn't pin down the offending commit and I can't reproduce it locally >>>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2. >>>>> >>>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a >>>>> behavior change in the HBA driver? >>>> >>>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the >>>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared >>>> host tagset for CPU hotplug") is the only one that /looks/ as if it could >>>> be causing it, but I don't know mpt3sas well enough. >>>> >>>> FWIW added Sreekanth >>> >>> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA >>> Broadcom 9400. It can be recreated by running blktests block/004 on the drive >>> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not >>> observed with SATA drives connected to AHCI. >>> >>> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose >>> it indicates that the commit changed HBA driver behavior. >> >> Can you send the warning splat with backtrace ? >> > > The warning splat is in the commit message: > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2 > Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 > RIP: 0010:__local_bh_disable_ip+0x3f/0x50 > RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006 > RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013 > RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd > RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0 > R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200 > R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218 > FS: 0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0 > Call Trace: > <IRQ> > _raw_spin_lock_bh+0x18/0x40 > sd_zbc_complete+0x43d/0x1150 > sd_done+0x631/0x1040 > ? mark_lock+0xe4/0x2fd0 > ? provisioning_mode_store+0x3f0/0x3f0 > scsi_finish_command+0x31b/0x5c0 > _scsih_io_done+0x960/0x29e0 [mpt3sas] > ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas] > ? __lock_acquire+0x166b/0x58b0 > ? _get_st_from_smid+0x4a/0x80 [mpt3sas] > _base_process_reply_queue+0x23f/0x26e0 [mpt3sas] > ? lock_is_held_type+0x98/0x110 > ? find_held_lock+0x2c/0x110 > ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas] > _base_interrupt+0x8d/0xd0 [mpt3sas] > ? rcu_read_lock_sched_held+0x3f/0x70 > __handle_irq_event_percpu+0x24d/0x600 > handle_irq_event+0xef/0x240 > ? handle_irq_event_percpu+0x110/0x110 > handle_edge_irq+0x1f6/0xb60 > __common_interrupt+0x75/0x160 > common_interrupt+0x7b/0xa0 > </IRQ> > asm_common_interrupt+0x1e/0x40 > Looking at patch 664f0dce2058, all that seems to be done is to enable nr_hw_queue > 1. I do not see any change of locking context or irq handling. From the backtrace, it does not look like scsi_finish_command() is called from softirq... Probably a change in that area is responsible ?
On 12/03/2021 09:20, Damien Le Moal wrote: > On 2021/03/12 16:59, Johannes Thumshirn wrote: >> On 12/03/2021 08:27, Damien Le Moal wrote: >>> On 2021/03/12 13:38, Shinichiro Kawasaki wrote: >>>> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote: >>>>> On 11/03/2021 16:48, Bart Van Assche wrote: >>>>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote: >>>>>>> On 11/03/2021 16:13, Bart Van Assche wrote: >>>>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote: >>>>>>>>> Recent changes [ ... ] >>>>>>>> >>>>>>>> Please add Fixes: and/or Cc: stable tags as appropriate. >>>>>>> >>>>>>> I couldn't pin down the offending commit and I can't reproduce it locally >>>>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2. >>>>>> >>>>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a >>>>>> behavior change in the HBA driver? >>>>> >>>>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the >>>>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared >>>>> host tagset for CPU hotplug") is the only one that /looks/ as if it could >>>>> be causing it, but I don't know mpt3sas well enough. >>>>> >>>>> FWIW added Sreekanth >>>> >>>> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA >>>> Broadcom 9400. It can be recreated by running blktests block/004 on the drive >>>> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not >>>> observed with SATA drives connected to AHCI. >>>> >>>> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose >>>> it indicates that the commit changed HBA driver behavior. >>> >>> Can you send the warning splat with backtrace ? >>> >> >> The warning splat is in the commit message: >> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2 >> Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 >> RIP: 0010:__local_bh_disable_ip+0x3f/0x50 >> RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006 >> RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013 >> RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd >> RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0 >> R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200 >> R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218 >> FS: 0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000 >> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >> CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0 >> Call Trace: >> <IRQ> >> _raw_spin_lock_bh+0x18/0x40 >> sd_zbc_complete+0x43d/0x1150 >> sd_done+0x631/0x1040 >> ? mark_lock+0xe4/0x2fd0 >> ? provisioning_mode_store+0x3f0/0x3f0 >> scsi_finish_command+0x31b/0x5c0 >> _scsih_io_done+0x960/0x29e0 [mpt3sas] >> ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas] >> ? __lock_acquire+0x166b/0x58b0 >> ? _get_st_from_smid+0x4a/0x80 [mpt3sas] >> _base_process_reply_queue+0x23f/0x26e0 [mpt3sas] >> ? lock_is_held_type+0x98/0x110 >> ? find_held_lock+0x2c/0x110 >> ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas] >> _base_interrupt+0x8d/0xd0 [mpt3sas] >> ? rcu_read_lock_sched_held+0x3f/0x70 >> __handle_irq_event_percpu+0x24d/0x600 >> handle_irq_event+0xef/0x240 >> ? handle_irq_event_percpu+0x110/0x110 >> handle_edge_irq+0x1f6/0xb60 >> __common_interrupt+0x75/0x160 >> common_interrupt+0x7b/0xa0 >> </IRQ> >> asm_common_interrupt+0x1e/0x40 >> > > Looking at patch 664f0dce2058, all that seems to be done is to enable > nr_hw_queue > 1. I do not see any change of locking context or irq handling. > From the backtrace, it does not look like scsi_finish_command() is called from > softirq... Probably a change in that area is responsible ? > In scsi_lib.c we only have these two patches in that area: 684da7628d93 ("block: remove unnecessary argument from blk_execute_rq") 962c8dcdd5fa ("scsi: core: Add a new error code DID_TRANSPORT_MARGINAL in scsi.h") and none of them can cause the failure either. In block we have: 0a2efafbb1c7 ("blk-mq: Always complete remote completions requests in softirq") but this doesn't look guilty as well, all it does is raising a softirq for all block completions local and remote.
On Mar 12, 2021 / 08:58, Johannes Thumshirn wrote: > On 12/03/2021 09:20, Damien Le Moal wrote: > > On 2021/03/12 16:59, Johannes Thumshirn wrote: > >> On 12/03/2021 08:27, Damien Le Moal wrote: > >>> On 2021/03/12 13:38, Shinichiro Kawasaki wrote: > >>>> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote: > >>>>> On 11/03/2021 16:48, Bart Van Assche wrote: > >>>>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote: > >>>>>>> On 11/03/2021 16:13, Bart Van Assche wrote: > >>>>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote: > >>>>>>>>> Recent changes [ ... ] > >>>>>>>> > >>>>>>>> Please add Fixes: and/or Cc: stable tags as appropriate. > >>>>>>> > >>>>>>> I couldn't pin down the offending commit and I can't reproduce it locally > >>>>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2. > >>>>>> > >>>>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a > >>>>>> behavior change in the HBA driver? > >>>>> > >>>>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the > >>>>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared > >>>>> host tagset for CPU hotplug") is the only one that /looks/ as if it could > >>>>> be causing it, but I don't know mpt3sas well enough. > >>>>> > >>>>> FWIW added Sreekanth > >>>> > >>>> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA > >>>> Broadcom 9400. It can be recreated by running blktests block/004 on the drive > >>>> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not > >>>> observed with SATA drives connected to AHCI. > >>>> > >>>> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose > >>>> it indicates that the commit changed HBA driver behavior. > >>> > >>> Can you send the warning splat with backtrace ? > >>> > >> > >> The warning splat is in the commit message: > >> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2 > >> Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 > >> RIP: 0010:__local_bh_disable_ip+0x3f/0x50 > >> RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006 > >> RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013 > >> RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd > >> RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0 > >> R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200 > >> R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218 > >> FS: 0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000 > >> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > >> CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0 > >> Call Trace: > >> <IRQ> > >> _raw_spin_lock_bh+0x18/0x40 > >> sd_zbc_complete+0x43d/0x1150 > >> sd_done+0x631/0x1040 > >> ? mark_lock+0xe4/0x2fd0 > >> ? provisioning_mode_store+0x3f0/0x3f0 > >> scsi_finish_command+0x31b/0x5c0 > >> _scsih_io_done+0x960/0x29e0 [mpt3sas] > >> ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas] > >> ? __lock_acquire+0x166b/0x58b0 > >> ? _get_st_from_smid+0x4a/0x80 [mpt3sas] > >> _base_process_reply_queue+0x23f/0x26e0 [mpt3sas] > >> ? lock_is_held_type+0x98/0x110 > >> ? find_held_lock+0x2c/0x110 > >> ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas] > >> _base_interrupt+0x8d/0xd0 [mpt3sas] > >> ? rcu_read_lock_sched_held+0x3f/0x70 > >> __handle_irq_event_percpu+0x24d/0x600 > >> handle_irq_event+0xef/0x240 > >> ? handle_irq_event_percpu+0x110/0x110 > >> handle_edge_irq+0x1f6/0xb60 > >> __common_interrupt+0x75/0x160 > >> common_interrupt+0x7b/0xa0 > >> </IRQ> > >> asm_common_interrupt+0x1e/0x40 > >> > > > > Looking at patch 664f0dce2058, all that seems to be done is to enable > > nr_hw_queue > 1. I do not see any change of locking context or irq handling. > > From the backtrace, it does not look like scsi_finish_command() is called from > > softirq... Probably a change in that area is responsible ? > > > > > In scsi_lib.c we only have these two patches in that area: > > 684da7628d93 ("block: remove unnecessary argument from blk_execute_rq") > 962c8dcdd5fa ("scsi: core: Add a new error code DID_TRANSPORT_MARGINAL in scsi.h") > > and none of them can cause the failure either. In block we have: > > 0a2efafbb1c7 ("blk-mq: Always complete remote completions requests in softirq") > > but this doesn't look guilty as well, all it does is raising a softirq for all > block completions local and remote. In blk_mq_complete_request_remote(), I found the following code. if (rq->q->nr_hw_queues == 1) { blk_mq_raise_softirq(rq); return true; } return false; My mere guess is that the commit 664f0dce2058 changed the shost->nr_hw_queue from zero to a value larger than 1 (with my test system, it is 8), it is propagated to rq->q->nr_hw_queues, then blk_mq_raise_softirq() is no longer called. The call stack I assume is as follows: without calling blk_mq_raise_softirq(), there are all executed in IRQ context, probably. _scsih_io_done() scmd->scsi_done() = scsi_mq_done() blk_mq_complete_request() blk_mq_complete_request_remote() ... did not call blk_mq_raise_softirq() rq->q->mq_ops->complete() = scsi_soft_irq_done() scsi_finish_command() drv->done() = sd_done() Will confirm this guess further.
On 12/03/2021 11:05, Shinichiro Kawasaki wrote: > On Mar 12, 2021 / 08:58, Johannes Thumshirn wrote: >> On 12/03/2021 09:20, Damien Le Moal wrote: >>> On 2021/03/12 16:59, Johannes Thumshirn wrote: >>>> On 12/03/2021 08:27, Damien Le Moal wrote: >>>>> On 2021/03/12 13:38, Shinichiro Kawasaki wrote: >>>>>> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote: >>>>>>> On 11/03/2021 16:48, Bart Van Assche wrote: >>>>>>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote: >>>>>>>>> On 11/03/2021 16:13, Bart Van Assche wrote: >>>>>>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote: >>>>>>>>>>> Recent changes [ ... ] >>>>>>>>>> >>>>>>>>>> Please add Fixes: and/or Cc: stable tags as appropriate. >>>>>>>>> >>>>>>>>> I couldn't pin down the offending commit and I can't reproduce it locally >>>>>>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2. >>>>>>>> >>>>>>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a >>>>>>>> behavior change in the HBA driver? >>>>>>> >>>>>>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the >>>>>>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared >>>>>>> host tagset for CPU hotplug") is the only one that /looks/ as if it could >>>>>>> be causing it, but I don't know mpt3sas well enough. >>>>>>> >>>>>>> FWIW added Sreekanth >>>>>> >>>>>> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA >>>>>> Broadcom 9400. It can be recreated by running blktests block/004 on the drive >>>>>> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not >>>>>> observed with SATA drives connected to AHCI. >>>>>> >>>>>> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose >>>>>> it indicates that the commit changed HBA driver behavior. >>>>> >>>>> Can you send the warning splat with backtrace ? >>>>> >>>> >>>> The warning splat is in the commit message: >>>> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2 >>>> Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 >>>> RIP: 0010:__local_bh_disable_ip+0x3f/0x50 >>>> RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006 >>>> RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013 >>>> RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd >>>> RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0 >>>> R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200 >>>> R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218 >>>> FS: 0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000 >>>> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 >>>> CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0 >>>> Call Trace: >>>> <IRQ> >>>> _raw_spin_lock_bh+0x18/0x40 >>>> sd_zbc_complete+0x43d/0x1150 >>>> sd_done+0x631/0x1040 >>>> ? mark_lock+0xe4/0x2fd0 >>>> ? provisioning_mode_store+0x3f0/0x3f0 >>>> scsi_finish_command+0x31b/0x5c0 >>>> _scsih_io_done+0x960/0x29e0 [mpt3sas] >>>> ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas] >>>> ? __lock_acquire+0x166b/0x58b0 >>>> ? _get_st_from_smid+0x4a/0x80 [mpt3sas] >>>> _base_process_reply_queue+0x23f/0x26e0 [mpt3sas] >>>> ? lock_is_held_type+0x98/0x110 >>>> ? find_held_lock+0x2c/0x110 >>>> ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas] >>>> _base_interrupt+0x8d/0xd0 [mpt3sas] >>>> ? rcu_read_lock_sched_held+0x3f/0x70 >>>> __handle_irq_event_percpu+0x24d/0x600 >>>> handle_irq_event+0xef/0x240 >>>> ? handle_irq_event_percpu+0x110/0x110 >>>> handle_edge_irq+0x1f6/0xb60 >>>> __common_interrupt+0x75/0x160 >>>> common_interrupt+0x7b/0xa0 >>>> </IRQ> >>>> asm_common_interrupt+0x1e/0x40 >>>> >>> >>> Looking at patch 664f0dce2058, all that seems to be done is to enable >>> nr_hw_queue > 1. I do not see any change of locking context or irq handling. >>> From the backtrace, it does not look like scsi_finish_command() is called from >>> softirq... Probably a change in that area is responsible ? >>> >> >> >> In scsi_lib.c we only have these two patches in that area: >> >> 684da7628d93 ("block: remove unnecessary argument from blk_execute_rq") >> 962c8dcdd5fa ("scsi: core: Add a new error code DID_TRANSPORT_MARGINAL in scsi.h") >> >> and none of them can cause the failure either. In block we have: >> >> 0a2efafbb1c7 ("blk-mq: Always complete remote completions requests in softirq") >> >> but this doesn't look guilty as well, all it does is raising a softirq for all >> block completions local and remote. > > In blk_mq_complete_request_remote(), I found the following code. > > if (rq->q->nr_hw_queues == 1) { > blk_mq_raise_softirq(rq); > return true; > } > return false; > > My mere guess is that the commit 664f0dce2058 changed the shost->nr_hw_queue > from zero to a value larger than 1 (with my test system, it is 8), it is > propagated to rq->q->nr_hw_queues, then blk_mq_raise_softirq() is no longer > called. > > The call stack I assume is as follows: without calling blk_mq_raise_softirq(), > there are all executed in IRQ context, probably. > > _scsih_io_done() > scmd->scsi_done() = scsi_mq_done() > blk_mq_complete_request() > blk_mq_complete_request_remote() ... did not call blk_mq_raise_softirq() > rq->q->mq_ops->complete() = scsi_soft_irq_done() > scsi_finish_command() > drv->done() = sd_done() > > Will confirm this guess further. > But commit 0a2efafbb1c7 ("blk-mq: Always complete remote completions requests in softirq") changed it to: - /* - * For most of single queue controllers, there is only one irq vector - * for handling I/O completion, and the only irq's affinity is set - * to all possible CPUs. On most of ARCHs, this affinity means the irq - * is handled on one specific CPU. - * - * So complete I/O requests in softirq context in case of single queue - * devices to avoid degrading I/O performance due to irqsoff latency. - */ - if (rq->q->nr_hw_queues == 1) - blk_mq_trigger_softirq(rq); - else - rq->q->mq_ops->complete(rq); + blk_mq_trigger_softirq(rq); } So to my understanding, we will always complete in a softirq.
On Mar 12, 2021 / 10:28, Johannes Thumshirn wrote: > On 12/03/2021 11:05, Shinichiro Kawasaki wrote: > > On Mar 12, 2021 / 08:58, Johannes Thumshirn wrote: > >> On 12/03/2021 09:20, Damien Le Moal wrote: > >>> On 2021/03/12 16:59, Johannes Thumshirn wrote: > >>>> On 12/03/2021 08:27, Damien Le Moal wrote: > >>>>> On 2021/03/12 13:38, Shinichiro Kawasaki wrote: > >>>>>> On Mar 11, 2021 / 15:54, Johannes Thumshirn wrote: > >>>>>>> On 11/03/2021 16:48, Bart Van Assche wrote: > >>>>>>>> On 3/11/21 7:18 AM, Johannes Thumshirn wrote: > >>>>>>>>> On 11/03/2021 16:13, Bart Van Assche wrote: > >>>>>>>>>> On 3/10/21 1:48 AM, Johannes Thumshirn wrote: > >>>>>>>>>>> Recent changes [ ... ] > >>>>>>>>>> > >>>>>>>>>> Please add Fixes: and/or Cc: stable tags as appropriate. > >>>>>>>>> > >>>>>>>>> I couldn't pin down the offending commit and I can't reproduce it locally > >>>>>>>>> as well, so I opted out of this. But it must be something between v5.11 and v5.12-rc2. > >>>>>>>> > >>>>>>>> That's weird. Did Shinichiro use a HBA? Could this be the result of a > >>>>>>>> behavior change in the HBA driver? > >>>>>>> > >>>>>>> Yes I've looked at the commits in mpt3sas, but can't really pinpoint the > >>>>>>> offending commit TBH. 664f0dce2058 ("scsi: mpt3sas: Add support for shared > >>>>>>> host tagset for CPU hotplug") is the only one that /looks/ as if it could > >>>>>>> be causing it, but I don't know mpt3sas well enough. > >>>>>>> > >>>>>>> FWIW added Sreekanth > >>>>>> > >>>>>> The WARNING was found in kernel v5.12-rc2 test with a SAS SMR drive and HBA > >>>>>> Broadcom 9400. It can be recreated by running blktests block/004 on the drive > >>>>>> (after reboot). It is also recreated with SATA SMR drive with the HBA, but not > >>>>>> observed with SATA drives connected to AHCI. > >>>>>> > >>>>>> I reverted the commit 664f0dce2058, then the WARNING disappeared. I suppose > >>>>>> it indicates that the commit changed HBA driver behavior. > >>>>> > >>>>> Can you send the warning splat with backtrace ? > >>>>> > >>>> > >>>> The warning splat is in the commit message: > >>>> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2 > >>>> Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 > >>>> RIP: 0010:__local_bh_disable_ip+0x3f/0x50 > >>>> RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006 > >>>> RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013 > >>>> RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd > >>>> RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0 > >>>> R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200 > >>>> R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218 > >>>> FS: 0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000 > >>>> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > >>>> CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0 > >>>> Call Trace: > >>>> <IRQ> > >>>> _raw_spin_lock_bh+0x18/0x40 > >>>> sd_zbc_complete+0x43d/0x1150 > >>>> sd_done+0x631/0x1040 > >>>> ? mark_lock+0xe4/0x2fd0 > >>>> ? provisioning_mode_store+0x3f0/0x3f0 > >>>> scsi_finish_command+0x31b/0x5c0 > >>>> _scsih_io_done+0x960/0x29e0 [mpt3sas] > >>>> ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas] > >>>> ? __lock_acquire+0x166b/0x58b0 > >>>> ? _get_st_from_smid+0x4a/0x80 [mpt3sas] > >>>> _base_process_reply_queue+0x23f/0x26e0 [mpt3sas] > >>>> ? lock_is_held_type+0x98/0x110 > >>>> ? find_held_lock+0x2c/0x110 > >>>> ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas] > >>>> _base_interrupt+0x8d/0xd0 [mpt3sas] > >>>> ? rcu_read_lock_sched_held+0x3f/0x70 > >>>> __handle_irq_event_percpu+0x24d/0x600 > >>>> handle_irq_event+0xef/0x240 > >>>> ? handle_irq_event_percpu+0x110/0x110 > >>>> handle_edge_irq+0x1f6/0xb60 > >>>> __common_interrupt+0x75/0x160 > >>>> common_interrupt+0x7b/0xa0 > >>>> </IRQ> > >>>> asm_common_interrupt+0x1e/0x40 > >>>> > >>> > >>> Looking at patch 664f0dce2058, all that seems to be done is to enable > >>> nr_hw_queue > 1. I do not see any change of locking context or irq handling. > >>> From the backtrace, it does not look like scsi_finish_command() is called from > >>> softirq... Probably a change in that area is responsible ? > >>> > >> > >> > >> In scsi_lib.c we only have these two patches in that area: > >> > >> 684da7628d93 ("block: remove unnecessary argument from blk_execute_rq") > >> 962c8dcdd5fa ("scsi: core: Add a new error code DID_TRANSPORT_MARGINAL in scsi.h") > >> > >> and none of them can cause the failure either. In block we have: > >> > >> 0a2efafbb1c7 ("blk-mq: Always complete remote completions requests in softirq") > >> > >> but this doesn't look guilty as well, all it does is raising a softirq for all > >> block completions local and remote. > > > > In blk_mq_complete_request_remote(), I found the following code. > > > > if (rq->q->nr_hw_queues == 1) { > > blk_mq_raise_softirq(rq); > > return true; > > } > > return false; > > > > My mere guess is that the commit 664f0dce2058 changed the shost->nr_hw_queue > > from zero to a value larger than 1 (with my test system, it is 8), it is > > propagated to rq->q->nr_hw_queues, then blk_mq_raise_softirq() is no longer > > called. > > > > The call stack I assume is as follows: without calling blk_mq_raise_softirq(), > > there are all executed in IRQ context, probably. > > > > _scsih_io_done() > > scmd->scsi_done() = scsi_mq_done() > > blk_mq_complete_request() > > blk_mq_complete_request_remote() ... did not call blk_mq_raise_softirq() > > rq->q->mq_ops->complete() = scsi_soft_irq_done() > > scsi_finish_command() > > drv->done() = sd_done() > > > > Will confirm this guess further. > > > > But commit 0a2efafbb1c7 ("blk-mq: Always complete remote completions requests > in softirq") changed it to: > > > - /* > - * For most of single queue controllers, there is only one irq vector > - * for handling I/O completion, and the only irq's affinity is set > - * to all possible CPUs. On most of ARCHs, this affinity means the irq > - * is handled on one specific CPU. > - * > - * So complete I/O requests in softirq context in case of single queue > - * devices to avoid degrading I/O performance due to irqsoff latency. > - */ > - if (rq->q->nr_hw_queues == 1) > - blk_mq_trigger_softirq(rq); > - else > - rq->q->mq_ops->complete(rq); > + blk_mq_trigger_softirq(rq); > } > > So to my understanding, we will always complete in a softirq. > My understanding is the change above in __blk_mq_complete_request_remote() is used for IPI (it is triggered in blk_mq_complete_send_ipi()). Let me quote blk_mq_complete_request_remote() below (similar name but without underscores). If blk_mq_complete_need_ipi(rq) returns false, blk_mq_complete_send_ipi(rq) is not called. In this case, the commit 0a2efafbb1c7 does not affect. bool blk_mq_complete_request_remote(struct request *rq) { WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); /* * For a polled request, always complete locallly, it's pointless * to redirect the completion. */ if (rq->cmd_flags & REQ_HIPRI) return false; if (blk_mq_complete_need_ipi(rq)) { blk_mq_complete_send_ipi(rq); return true; } if (rq->q->nr_hw_queues == 1) { blk_mq_raise_softirq(rq); return true; } return false; } With my test environment and some debug prints, I confirmed these two: - The commit 664f0dce2058 changed q->nr_hw_queues value of drives on HBA from 1 to 8. - The commit 664f0dce2058 changed the blk_mq_complete_request_remote() return value from true to false. This indicates that blk_mq_complete_need_ipi(rq) returns false. So now I believe the commit 664f0dce2058 changed scsi_finish_command() context from soft-IRQ to IRQ.
On 12/03/2021 12:48, Shinichiro Kawasaki wrote: > So now I believe the commit 664f0dce2058 changed scsi_finish_command() > context from soft-IRQ to IRQ. That would fit you examination that a revert of 664f0dce2058 fixes the WARN.
Martin ping?
Johannes,
> Martin ping?
When I originally read the thread I was left with the impression that we
were just papering over an underlying blk-mq issue. However, I read the
whole thing again and feel this fix is acceptable.
On 17/03/2021 02:42, Martin K. Petersen wrote: > > Johannes, > >> Martin ping? > > When I originally read the thread I was left with the impression that we > were just papering over an underlying blk-mq issue. However, I read the > whole thing again and feel this fix is acceptable. > Should I resend the patch with the Fixes line applied, or can you take it as is?
Johannes, > Should I resend the patch with the Fixes line applied, or can you take > it as is? It's already in scsi-fixes.
On Wed, 10 Mar 2021 18:48:06 +0900, Johannes Thumshirn wrote: > Recent changes changed the completion of SCSI commands from Soft-IRQ > context to IRQ context. This triggers the following warning, when we're > completing writes to zoned block devices that go through the zone append > emulation: > > CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2 > Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 > RIP: 0010:__local_bh_disable_ip+0x3f/0x50 > RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006 > RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013 > RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd > RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0 > R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200 > R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218 > FS: 0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0 > Call Trace: > <IRQ> > _raw_spin_lock_bh+0x18/0x40 > sd_zbc_complete+0x43d/0x1150 > sd_done+0x631/0x1040 > ? mark_lock+0xe4/0x2fd0 > ? provisioning_mode_store+0x3f0/0x3f0 > scsi_finish_command+0x31b/0x5c0 > _scsih_io_done+0x960/0x29e0 [mpt3sas] > ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas] > ? __lock_acquire+0x166b/0x58b0 > ? _get_st_from_smid+0x4a/0x80 [mpt3sas] > _base_process_reply_queue+0x23f/0x26e0 [mpt3sas] > ? lock_is_held_type+0x98/0x110 > ? find_held_lock+0x2c/0x110 > ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas] > _base_interrupt+0x8d/0xd0 [mpt3sas] > ? rcu_read_lock_sched_held+0x3f/0x70 > __handle_irq_event_percpu+0x24d/0x600 > handle_irq_event+0xef/0x240 > ? handle_irq_event_percpu+0x110/0x110 > handle_edge_irq+0x1f6/0xb60 > __common_interrupt+0x75/0x160 > common_interrupt+0x7b/0xa0 > </IRQ> > asm_common_interrupt+0x1e/0x40 > > [...] Applied to 5.12/scsi-fixes, thanks! [1/1] scsi: sd_zbc: update write pointer offset cache https://git.kernel.org/mkp/scsi/c/2db4215f4755
On 19/03/2021 04:46, Martin K. Petersen wrote: > > Applied to 5.12/scsi-fixes, thanks! > > [1/1] scsi: sd_zbc: update write pointer offset cache > https://git.kernel.org/mkp/scsi/c/2db4215f4755 > Thanks!
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index ee558675eab4..994f1b8e3504 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -280,27 +280,28 @@ static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx, static void sd_zbc_update_wp_offset_workfn(struct work_struct *work) { struct scsi_disk *sdkp; + unsigned long flags; unsigned int zno; int ret; sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work); - spin_lock_bh(&sdkp->zones_wp_offset_lock); + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); for (zno = 0; zno < sdkp->nr_zones; zno++) { if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST) continue; - spin_unlock_bh(&sdkp->zones_wp_offset_lock); + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf, SD_BUF_SIZE, zno * sdkp->zone_blocks, true); - spin_lock_bh(&sdkp->zones_wp_offset_lock); + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); if (!ret) sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64, zno, sd_zbc_update_wp_offset_cb, sdkp); } - spin_unlock_bh(&sdkp->zones_wp_offset_lock); + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); scsi_device_put(sdkp->device); } @@ -324,6 +325,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, struct request *rq = cmd->request; struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); unsigned int wp_offset, zno = blk_rq_zone_no(rq); + unsigned long flags; blk_status_t ret; ret = sd_zbc_cmnd_checks(cmd); @@ -337,7 +339,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, if (!blk_req_zone_write_trylock(rq)) return BLK_STS_ZONE_RESOURCE; - spin_lock_bh(&sdkp->zones_wp_offset_lock); + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); wp_offset = sdkp->zones_wp_offset[zno]; switch (wp_offset) { case SD_ZBC_INVALID_WP_OFST: @@ -366,7 +368,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, *lba += wp_offset; } - spin_unlock_bh(&sdkp->zones_wp_offset_lock); + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); if (ret) blk_req_zone_write_unlock(rq); return ret; @@ -445,6 +447,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); unsigned int zno = blk_rq_zone_no(rq); enum req_opf op = req_op(rq); + unsigned long flags; /* * If we got an error for a command that needs updating the write @@ -452,7 +455,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, * invalid to force an update from disk the next time a zone append * command is issued. */ - spin_lock_bh(&sdkp->zones_wp_offset_lock); + spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags); if (result && op != REQ_OP_ZONE_RESET_ALL) { if (op == REQ_OP_ZONE_APPEND) { @@ -496,7 +499,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, } unlock_wp_offset: - spin_unlock_bh(&sdkp->zones_wp_offset_lock); + spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags); return good_bytes; }
Recent changes changed the completion of SCSI commands from Soft-IRQ context to IRQ context. This triggers the following warning, when we're completing writes to zoned block devices that go through the zone append emulation: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc2+ #2 Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 RIP: 0010:__local_bh_disable_ip+0x3f/0x50 RSP: 0018:ffff8883e1409ba8 EFLAGS: 00010006 RAX: 0000000080010001 RBX: 0000000000000001 RCX: 0000000000000013 RDX: ffff888129e4d200 RSI: 0000000000000201 RDI: ffffffff915b9dbd RBP: ffff888113e9a540 R08: ffff888113e9a540 R09: 00000000000077f0 R10: 0000000000080000 R11: 0000000000000001 R12: ffff888129e4d200 R13: 0000000000001000 R14: 00000000000077f0 R15: ffff888129e4d218 FS: 0000000000000000(0000) GS:ffff8883e1400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2f8418ebc0 CR3: 000000021202a006 CR4: 00000000001706f0 Call Trace: <IRQ> _raw_spin_lock_bh+0x18/0x40 sd_zbc_complete+0x43d/0x1150 sd_done+0x631/0x1040 ? mark_lock+0xe4/0x2fd0 ? provisioning_mode_store+0x3f0/0x3f0 scsi_finish_command+0x31b/0x5c0 _scsih_io_done+0x960/0x29e0 [mpt3sas] ? mpt3sas_scsih_scsi_lookup_get+0x1c7/0x340 [mpt3sas] ? __lock_acquire+0x166b/0x58b0 ? _get_st_from_smid+0x4a/0x80 [mpt3sas] _base_process_reply_queue+0x23f/0x26e0 [mpt3sas] ? lock_is_held_type+0x98/0x110 ? find_held_lock+0x2c/0x110 ? mpt3sas_base_sync_reply_irqs+0x360/0x360 [mpt3sas] _base_interrupt+0x8d/0xd0 [mpt3sas] ? rcu_read_lock_sched_held+0x3f/0x70 __handle_irq_event_percpu+0x24d/0x600 handle_irq_event+0xef/0x240 ? handle_irq_event_percpu+0x110/0x110 handle_edge_irq+0x1f6/0xb60 __common_interrupt+0x75/0x160 common_interrupt+0x7b/0xa0 </IRQ> asm_common_interrupt+0x1e/0x40 Don't use spin_lock_bh() to protect the update of the write pointer offset cache, but use spin_lock_irqsave() for it. Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> --- drivers/scsi/sd_zbc.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-)