Message ID | 1435676153-27822-1-git-send-email-sagig@mellanox.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 6/30/15, 9:55 AM, Sagi Grimberg wrote: > From: Ariel Nahum <arieln@mellanox.com> > > Connection last_ping is not being updated when iscsi_send_nopout fails. > Not updating the last_ping will cause firing a timer to a past time > (last_ping + ping_tmo < current_time) which triggers an infinite loop of > iscsi_check_transport_timeouts() and hogs the cpu. > > Fix this issue by checking the return value of iscsi_send_nopout. > If it fails set the next_timeout to one second later. > > Signed-off-by: Ariel Nahum <arieln@mellanox.com> > Signed-off-by: Sagi Grimberg <sagig@mellanox.com> > --- > drivers/scsi/libiscsi.c | 15 ++++++++++----- > 1 files changed, 10 insertions(+), 5 deletions(-) > > diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c > index 8053f24..1ea4213 100644 > --- a/drivers/scsi/libiscsi.c > +++ b/drivers/scsi/libiscsi.c > @@ -979,13 +979,13 @@ static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr) > wake_up(&conn->ehwait); > } > > -static void iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) > +static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) > { > struct iscsi_nopout hdr; > struct iscsi_task *task; > > if (!rhdr && conn->ping_task) > - return; > + return -EINVAL; > > memset(&hdr, 0, sizeof(struct iscsi_nopout)); > hdr.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE; > @@ -999,13 +999,16 @@ static void iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) > hdr.ttt = RESERVED_ITT; > > task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0); > - if (!task) > + if (!task) { Are you hitting the failure case in the first chunk of the patch or the failure right above? If the latter, why is it failing? > iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n"); > + return -EIO; > + } > else if (!rhdr) { I think the coding style is wrong. It should be: if () { } else if { } > /* only track our nops */ > conn->ping_task = task; > conn->last_ping = jiffies; > } > + return 0; > } > > static int iscsi_nop_out_rsp(struct iscsi_task *task, > @@ -2095,8 +2098,10 @@ static void iscsi_check_transport_timeouts(unsigned long data) > if (time_before_eq(last_recv + recv_timeout, jiffies)) { > /* send a ping to try to provoke some traffic */ > ISCSI_DBG_CONN(conn, "Sending nopout as ping\n"); > - iscsi_send_nopout(conn, NULL); > - next_timeout = conn->last_ping + (conn->ping_timeout * HZ); > + if (iscsi_send_nopout(conn, NULL)) > + next_timeout = jiffies + (1 * HZ); > + else > + next_timeout = conn->last_ping + (conn->ping_timeout * HZ); > } else > next_timeout = last_recv + recv_timeout; > > n -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 7/2/2015 11:11 PM, Mike Christie wrote: > On 6/30/15, 9:55 AM, Sagi Grimberg wrote: >> From: Ariel Nahum <arieln@mellanox.com> >> >> Connection last_ping is not being updated when iscsi_send_nopout fails. >> Not updating the last_ping will cause firing a timer to a past time >> (last_ping + ping_tmo < current_time) which triggers an infinite loop of >> iscsi_check_transport_timeouts() and hogs the cpu. >> >> Fix this issue by checking the return value of iscsi_send_nopout. >> If it fails set the next_timeout to one second later. >> >> Signed-off-by: Ariel Nahum <arieln@mellanox.com> >> Signed-off-by: Sagi Grimberg <sagig@mellanox.com> >> --- >> drivers/scsi/libiscsi.c | 15 ++++++++++----- >> 1 files changed, 10 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c >> index 8053f24..1ea4213 100644 >> --- a/drivers/scsi/libiscsi.c >> +++ b/drivers/scsi/libiscsi.c >> @@ -979,13 +979,13 @@ static void iscsi_tmf_rsp(struct iscsi_conn >> *conn, struct iscsi_hdr *hdr) >> wake_up(&conn->ehwait); >> } >> >> -static void iscsi_send_nopout(struct iscsi_conn *conn, struct >> iscsi_nopin *rhdr) >> +static int iscsi_send_nopout(struct iscsi_conn *conn, struct >> iscsi_nopin *rhdr) >> { >> struct iscsi_nopout hdr; >> struct iscsi_task *task; >> >> if (!rhdr && conn->ping_task) >> - return; >> + return -EINVAL; >> >> memset(&hdr, 0, sizeof(struct iscsi_nopout)); >> hdr.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE; >> @@ -999,13 +999,16 @@ static void iscsi_send_nopout(struct iscsi_conn >> *conn, struct iscsi_nopin *rhdr) >> hdr.ttt = RESERVED_ITT; >> >> task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, >> NULL, 0); >> - if (!task) >> + if (!task) { > > > Are you hitting the failure case in the first chunk of the patch or the > failure right above? If the latter, why is it failing? __iscsi_conn_send_pdu() might fail in various places but Iin our case session->tt->xmit_task() might fail in a couple of scenarios: 1. Hot device removal: In this case the iser transport receives an async event of a device removal and alerts the iscsi layer of a connection failure, but it may still race with an async timer driven nopout sends. 2. Device catastrophic error recovery: In this scenario, the RDMA device enters a catastrophic error condition and initiates a recovery sequence. At this period, the HW interfaces (packet send) are blocked and immediately failed. This condition propagates to iscsi layer which should take into account that xmit_task() might fail. > >> iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n"); >> + return -EIO; >> + } >> else if (!rhdr) { > > I think the coding style is wrong. It should be: > > if () { > > } else if { > > } Yep, we'll fix that in v1. -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 8053f24..1ea4213 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -979,13 +979,13 @@ static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr) wake_up(&conn->ehwait); } -static void iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) +static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) { struct iscsi_nopout hdr; struct iscsi_task *task; if (!rhdr && conn->ping_task) - return; + return -EINVAL; memset(&hdr, 0, sizeof(struct iscsi_nopout)); hdr.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE; @@ -999,13 +999,16 @@ static void iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) hdr.ttt = RESERVED_ITT; task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0); - if (!task) + if (!task) { iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n"); + return -EIO; + } else if (!rhdr) { /* only track our nops */ conn->ping_task = task; conn->last_ping = jiffies; } + return 0; } static int iscsi_nop_out_rsp(struct iscsi_task *task, @@ -2095,8 +2098,10 @@ static void iscsi_check_transport_timeouts(unsigned long data) if (time_before_eq(last_recv + recv_timeout, jiffies)) { /* send a ping to try to provoke some traffic */ ISCSI_DBG_CONN(conn, "Sending nopout as ping\n"); - iscsi_send_nopout(conn, NULL); - next_timeout = conn->last_ping + (conn->ping_timeout * HZ); + if (iscsi_send_nopout(conn, NULL)) + next_timeout = jiffies + (1 * HZ); + else + next_timeout = conn->last_ping + (conn->ping_timeout * HZ); } else next_timeout = last_recv + recv_timeout;