@@ -4983,7 +4983,8 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
spin_unlock_irqrestore(&sqp->qc_lock, iflags);
if (unlikely(aborted)) {
if (sdebug_verbose)
- pr_info("bypassing scsi_done() due to aborted cmd\n");
+ pr_info("bypassing scsi_done() due to aborted cmd, kicking-off EH\n");
+ blk_abort_request(scsi_cmd_to_rq(scp));
return;
}
scsi_done(scp); /* callback to mid level */
@@ -5712,8 +5713,13 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
sd_dp->issuing_cpu = raw_smp_processor_id();
} else { /* jdelay < 0, use work queue */
if (unlikely((sdebug_opts & SDEBUG_OPT_CMD_ABORT) &&
- atomic_read(&sdeb_inject_pending)))
+ atomic_read(&sdeb_inject_pending))) {
sd_dp->aborted = true;
+ atomic_set(&sdeb_inject_pending, 0);
+ sdev_printk(KERN_INFO, sdp, "abort request tag=%#x\n",
+ blk_mq_unique_tag_to_tag(get_tag(cmnd)));
+ }
+
if (polled) {
sd_dp->cmpl_ts = ns_to_ktime(ns_from_boot);
spin_lock_irqsave(&sqp->qc_lock, iflags);
@@ -5738,13 +5744,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
}
if (sdebug_statistics)
sd_dp->issuing_cpu = raw_smp_processor_id();
- if (unlikely(sd_dp->aborted)) {
- sdev_printk(KERN_INFO, sdp, "abort request tag %d\n",
- scsi_cmd_to_rq(cmnd)->tag);
- blk_abort_request(scsi_cmd_to_rq(cmnd));
- atomic_set(&sdeb_inject_pending, 0);
- sd_dp->aborted = false;
- }
}
return 0;
The command abort feature allows us to test aborting a command which has timed-out. The idea is that for specific commands we just don't call scsi_done() and allow the request to timeout, which ensures SCSI EH kicks-in we try to abort the command. Since commit 4a0c6f432d15 ("scsi: scsi_debug: Add new defer type for mq_poll") this does not seem to work. The issue is that we clear the sd_dp->aborted flag in schedule_resp() before the completion callback has run. When the completion callback actually runs, it calls scsi_done() as normal as sd_dp->aborted unset. This is all very racy. Fix by not clearing sd_dp->aborted in schedule_resp(). Also move the call to blk_abort_request() from schedule_resp() to sdebug_q_cmd_complete(), which makes the code have a more logical sequence. I also note that this feature only works for commands which are classed as "SDEG_RES_IMMED_MASK", but only practically triggered with prior RW commands. So for my experiment I need to run fio to trigger the error on the "nth" command (see inject_on_this_cmd()), and then run something like sg_sync to queue a command to actually trigger the abort. Signed-off-by: John Garry <john.g.garry@oracle.com> --- drivers/scsi/scsi_debug.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-)