diff mbox

iscsi-target: Fix iscsit_start_kthreads failure OOPs

Message ID 1436259023-1561-1-git-send-email-nab@daterainc.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nicholas A. Bellinger July 7, 2015, 8:50 a.m. UTC
From: Nicholas Bellinger <nab@linux-iscsi.org>

This patch fixes a regression introduced with the following commit
in v4.0-rc1 code:

    commit 88dcd2dab5c23b1c9cfc396246d8f476c872f0ca
    Author: Nicholas Bellinger <nab@linux-iscsi.org>
    Date:   Thu Feb 26 22:19:15 2015 -0800

        iscsi-target: Convert iscsi_thread_set usage to kthread.h

where iscsit_start_kthreads() failure would result in a NULL pointer
dereference OOPs.

To address this bug, it adds a iscsit_kthread_err() helper to cleanup
both zero_tsih and !zero_tsih cases, and a iscsi_target_tx_thread()
special case to avoid iscsit_take_action_for_connection_exit() during
the late iscsit_start_kthreads() -> kthread_run() failure case for
iscsi_target_rx_thread().

Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: <stable@vger.kernel.org> # v3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c       |  3 ++-
 drivers/target/iscsi/iscsi_target_login.c | 39 +++++++++++++++++++++++++++----
 2 files changed, 36 insertions(+), 6 deletions(-)

Comments

Nicholas A. Bellinger July 7, 2015, 9:01 a.m. UTC | #1
Hey Sagi,

This addresses a regression with traditional iscsi-target that I noticed
recently, but has not been tested with iser-target yet.

Would you mind taking a quick spin with iser-target to verify, and try
to intentionally fail iscsit_start_kthreads() into the two out_bitmap +
out_tx error paths..?

Thanks,

--nab

On Tue, 2015-07-07 at 08:50 +0000, Nicholas A. Bellinger wrote:
> From: Nicholas Bellinger <nab@linux-iscsi.org>
> 
> This patch fixes a regression introduced with the following commit
> in v4.0-rc1 code:
> 
>     commit 88dcd2dab5c23b1c9cfc396246d8f476c872f0ca
>     Author: Nicholas Bellinger <nab@linux-iscsi.org>
>     Date:   Thu Feb 26 22:19:15 2015 -0800
> 
>         iscsi-target: Convert iscsi_thread_set usage to kthread.h
> 
> where iscsit_start_kthreads() failure would result in a NULL pointer
> dereference OOPs.
> 
> To address this bug, it adds a iscsit_kthread_err() helper to cleanup
> both zero_tsih and !zero_tsih cases, and a iscsi_target_tx_thread()
> special case to avoid iscsit_take_action_for_connection_exit() during
> the late iscsit_start_kthreads() -> kthread_run() failure case for
> iscsi_target_rx_thread().
> 
> Cc: Sagi Grimberg <sagig@mellanox.com>
> Cc: <stable@vger.kernel.org> # v3.10+
> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
> ---
>  drivers/target/iscsi/iscsi_target.c       |  3 ++-
>  drivers/target/iscsi/iscsi_target_login.c | 39 +++++++++++++++++++++++++++----
>  2 files changed, 36 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
> index 4e68b62..66655b7 100644
> --- a/drivers/target/iscsi/iscsi_target.c
> +++ b/drivers/target/iscsi/iscsi_target.c
> @@ -3998,7 +3998,8 @@ get_immediate:
>  	}
>  
>  transport_err:
> -	iscsit_take_action_for_connection_exit(conn);
> +	if (conn->rx_thread_active)
> +		iscsit_take_action_for_connection_exit(conn);
>  out:
>  	return 0;
>  }
> diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
> index 3d0fe4f..76dedb6 100644
> --- a/drivers/target/iscsi/iscsi_target_login.c
> +++ b/drivers/target/iscsi/iscsi_target_login.c
> @@ -679,6 +679,7 @@ static int iscsit_start_kthreads(struct iscsi_conn *conn)
>  
>  	return 0;
>  out_tx:
> +	send_sig(SIGINT, conn->tx_thread, 1);
>  	kthread_stop(conn->tx_thread);
>  	conn->tx_thread_active = false;
>  out_bitmap:
> @@ -689,6 +690,23 @@ out_bitmap:
>  	return ret;
>  }
>  
> +static void iscsit_kthread_err(struct iscsi_session *sess,
> +			       struct iscsi_conn *conn, bool new_sess)
> +{
> +	spin_lock_bh(&sess->conn_lock);
> +	list_del(&conn->conn_list);
> +	if (atomic_dec_and_test(&sess->nconn))
> +		sess->session_state = TARG_SESS_STATE_FAILED;
> +	spin_unlock_bh(&sess->conn_lock);
> +
> +	if (!new_sess)
> +		return;
> +
> +	transport_deregister_session_configfs(sess->se_sess);
> +	transport_deregister_session(sess->se_sess);
> +	sess->se_sess = NULL;
> +}
> +
>  int iscsi_post_login_handler(
>  	struct iscsi_np *np,
>  	struct iscsi_conn *conn,
> @@ -740,8 +758,10 @@ int iscsi_post_login_handler(
>  		spin_unlock_bh(&sess->conn_lock);
>  
>  		rc = iscsit_start_kthreads(conn);
> -		if (rc)
> -			return rc;
> +		if (rc) {
> +			iscsit_kthread_err(sess, conn, false);
> +			goto out_old_sess;
> +		}
>  
>  		iscsi_post_login_start_timers(conn);
>  		/*
> @@ -751,7 +771,7 @@ int iscsi_post_login_handler(
>  		iscsit_thread_get_cpumask(conn);
>  		conn->conn_rx_reset_cpumask = 1;
>  		conn->conn_tx_reset_cpumask = 1;
> -
> +out_old_sess:
>  		iscsit_dec_conn_usage_count(conn);
>  		if (stop_timer) {
>  			spin_lock_bh(&se_tpg->session_lock);
> @@ -759,7 +779,7 @@ int iscsi_post_login_handler(
>  			spin_unlock_bh(&se_tpg->session_lock);
>  		}
>  		iscsit_dec_session_usage_count(sess);
> -		return 0;
> +		return rc;
>  	}
>  
>  	iscsi_set_session_parameters(sess->sess_ops, conn->param_list, 1);
> @@ -801,8 +821,17 @@ int iscsi_post_login_handler(
>  	spin_unlock_bh(&se_tpg->session_lock);
>  
>  	rc = iscsit_start_kthreads(conn);
> -	if (rc)
> +	if (rc) {
> +		spin_lock_bh(&se_tpg->session_lock);
> +		if (tpg->tpg_tiqn)
> +			tpg->tpg_tiqn->tiqn_nsessions--;
> +		tpg->nsessions--;
> +		spin_unlock_bh(&se_tpg->session_lock);
> +
> +		iscsit_kthread_err(sess, conn, true);
> +		iscsit_dec_conn_usage_count(conn);
>  		return rc;
> +	}
>  
>  	iscsi_post_login_start_timers(conn);
>  	/*


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg July 7, 2015, 12:04 p.m. UTC | #2
On 7/7/2015 12:01 PM, Nicholas A. Bellinger wrote:
> Hey Sagi,
>
> This addresses a regression with traditional iscsi-target that I noticed
> recently, but has not been tested with iser-target yet.
>
> Would you mind taking a quick spin with iser-target to verify, and try
> to intentionally fail iscsit_start_kthreads() into the two out_bitmap +
> out_tx error paths..?

Umm, I hit a BUG() statement while start to test this.

The easy reproducer is:
- create 20 iser targets (listen on any 0.0.0.0:3260)
- discover targets 2 initiator ports ports (40 target instances)
- run: for i in `seq 100`; do iscsiadm -m node -l && iscsiadm -m node 
-u; done

Note: I wasn't able to reproduce this with iscsi/tcp.

Stack dump:
kernel: isert: isert_wait4logout: conn ffff8803db5a9800
kernel: ------------[ cut here ]------------
kernel: isert: isert_wait4logout: conn ffff8803db5a9800 wait for 
conn_logout_comp
kernel: kernel BUG at drivers/target/iscsi/iscsi_target.c:4465!
kernel: invalid opcode: 0000 [#1] SMP
kernel: Modules linked in: ib_umad ib_ipoib mlx5_ib mlx5_core 
target_core_user uio tcm_loop vhost_scsi tcm_qla2xxx ib_srpt ib_isert 
iscsi_target_mod tcm_fc target_core_file target_core_iblock 
target_core_pscsi target_core_mod vhost rdma_cm ib_cm iw_cm ib_sa ib_mad 
ib_core ib_addr libfc netconsole configfs nfsd exportfs nfsv3 nfs_acl 
rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs fscache lockd grace autofs4 sunrpc 
cpufreq_ondemand ipv6 ext4 jbd2 dm_mirror dm_region_hash dm_log
kernel: isert: isert_wait_conn: Starting conn ffff8803df09c800
kernel: uinput
kernel: isert: isert_wait4cmds: iscsi_conn ffff8803db171000
kernel: iTCO_wdt
kernel: isert: isert_wait4flush: conn ffff8803df09c800
kernel: iTCO_vendor_support microcode serio_raw pcspkr
kernel: isert: isert_release_work: Starting release conn ffff8803db5a9800
kernel: sb_edac
kernel: isert: isert_release_work: Destroying conn ffff8803db5a9800
kernel: edac_core sg ipmi_si ipmi_msghandler acpi_cpufreq i2c_i801 
lpc_ich mfd_core shpchp ioatdma dm_mod igb dca i2c_algo_bit i2c_core ptp 
pps_core wmi ext3(E) jbd(E) mbcache(E) sd_mod(E) ahci(E) libahci(E) 
isci(E) libsas(E)
kernel: isert: isert_release_kref: conn ffff8803db5a9800 final kref 
iscsi_trx/30457
kernel: scsi_transport_sas(E)
kernel: isert: isert_conn_free_fastreg_pool: Freeing conn 
ffff8803db5a9800 fastreg pool
kernel: qla2xxx(E) scsi_transport_fc(E) [last unloaded: uio]
kernel: CPU: 14 PID: 30286 Comm: kworker/u52:6 Tainted: G            E 
  4.1.0-rc1+ #44
kernel: Hardware name: Supermicro SYS-1027R-WRF/X9DRW, BIOS 3.0a 08/08/2013
kernel: Workqueue: isert_comp_wq isert_do_control_comp [ib_isert]
kernel: task: ffff88046f618e50 ti: ffff8803e12f4000 task.ti: 
ffff8803e12f4000
kernel: RIP: 0010:[<ffffffffa05b36d8>]  [<ffffffffa05b36d8>] 
iscsit_close_session+0x1c8/0x230 [iscsi_target_mod]
kernel: RSP: 0018:ffff8803e12f7d38  EFLAGS: 00010296
kernel: RAX: 0000000000000053 RBX: ffff8803db173c00 RCX: 000000000000209c
kernel: RDX: 0000000000000001 RSI: 0000000000000282 RDI: ffffffff81a3b8f0
kernel: RBP: ffff8803e12f7d58 R08: 0000000000000000 R09: ffffffff81d1facb
kernel: R10: 000000000003fa34 R11: 0000000000000053 R12: ffff8803db171000
kernel: R13: ffff88046b99c000 R14: 0000000000000000 R15: ffff88046fad8e05
kernel: FS:  0000000000000000(0000) GS:ffff88047fd00000(0000) 
knlGS:0000000000000000
kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
kernel: CR2: 0000003c8f66e940 CR3: 0000000001a0e000 CR4: 00000000000406e0
kernel: Stack:
kernel: ffff8803e12f7d88 ffff8803db173c00 ffff8803db171000 0000000000000000
kernel: ffff8803e12f7d68 ffffffffa05bdd8d ffff8803e12f7d78 ffffffffa04c014c
kernel: ffff8803e12f7d88 ffffffffa04c0a09 ffff8803e12f7db8 ffffffffa05b346d
kernel: Call Trace:
kernel: [<ffffffffa05bdd8d>] lio_tpg_close_session+0xd/0x10 
[iscsi_target_mod]
kernel: [<ffffffffa04c014c>] target_release_session+0x1c/0x20 
[target_core_mod]
kernel: [<ffffffffa04c0a09>] target_put_session+0x19/0x20 [target_core_mod]
kernel: [<ffffffffa05b346d>] iscsit_logout_post_handler+0x9d/0x140 
[iscsi_target_mod]
kernel: [<ffffffffa0545e5f>] isert_do_control_comp+0xcf/0x120 [ib_isert]
kernel: [<ffffffff8106a736>] process_one_work+0x136/0x3a0
kernel: [<ffffffff8106aab7>] worker_thread+0x117/0x3d0
kernel: [<ffffffff8106a9a0>] ? process_one_work+0x3a0/0x3a0
kernel: [<ffffffff8106a9a0>] ? process_one_work+0x3a0/0x3a0
kernel: [<ffffffff8106f5fe>] kthread+0xce/0xf0
kernel: [<ffffffff8106f530>] ? kthread_freezable_should_stop+0x70/0x70
kernel: [<ffffffff815515d2>] ret_from_fork+0x42/0x70
kernel: [<ffffffff8106f530>] ? kthread_freezable_should_stop+0x70/0x70
kernel: Code: 00 00 00 48 89 df e8 c8 3f ff ff e9 f7 fe ff ff 8b b7 b8 
00 00 00 48 8b 97 f0 01 00 00 31 c0 48 c7 c7 a8 a1 5c a0 e8 25 9b f9 e0 
<0f> 0b eb fe 48 c7 c6 08 a2 5c a0 48 c7 c7 20 15 5d a0 31 c0 e8
kernel: RIP  [<ffffffffa05b36d8>] iscsit_close_session+0x1c8/0x230 
[iscsi_target_mod]
kernel: RSP <ffff8803e12f7d38>
kernel: ---[ end trace a25d406905884052 ]---
kernel: qla2xxx(E) scsi_transport_fc(E) [last unloaded: uio]
kernel: CPU: 14 PID: 30286 Comm: kworker/u52:6 Tainted: G            E 
  4.1.0-rc1+ #44
kernel: Hardware name: Supermicro SYS-1027R-WRF/X9DRW, BIOS 3.0a 08/08/2013
kernel: Workqueue: isert_comp_wq isert_do_control_comp [ib_isert]
kernel: task: ffff88046f618e50 ti: ffff8803e12f4000 task.ti: 
ffff8803e12f4000
kernel: RIP: 0010:[<ffffffffa05b36d8>]  [<ffffffffa05b36d8>] 
iscsit_close_session+0x1c8/0x230 [iscsi_target_mod]
kernel: RSP: 0018:ffff8803e12f7d38  EFLAGS: 00010296
kernel: RAX: 0000000000000053 RBX: ffff8803db173c00 RCX: 000000000000209c
kernel: RDX: 0000000000000001 RSI: 0000000000000282 RDI: ffffffff81a3b8f0
kernel: RBP: ffff8803e12f7d58 R08: 0000000000000000 R09: ffffffff81d1facb
kernel: R10: 000000000003fa34 R11: 0000000000000053 R12: ffff8803db171000
kernel: R13: ffff88046b99c000 R14: 0000000000000000 R15: ffff88046fad8e05
kernel: FS:  0000000000000000(0000) GS:ffff88047fd00000(0000) 
knlGS:0000000000000000
kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
kernel: CR2: 0000003c8f66e940 CR3: 0000000001a0e000 CR4: 00000000000406e0
kernel: Stack:
kernel: ffff8803e12f7d88 ffff8803db173c00 ffff8803db171000 0000000000000000
kernel: ffff8803e12f7d68 ffffffffa05bdd8d ffff8803e12f7d78 ffffffffa04c014c
kernel: ffff8803e12f7d88 ffffffffa04c0a09 ffff8803e12f7db8 ffffffffa05b346d
kernel: Call Trace:
kernel: [<ffffffffa05bdd8d>] lio_tpg_close_session+0xd/0x10 
[iscsi_target_mod]
kernel: [<ffffffffa04c014c>] target_release_session+0x1c/0x20 
[target_core_mod]
kernel: [<ffffffffa04c0a09>] target_put_session+0x19/0x20 [target_core_mod]
kernel: [<ffffffffa05b346d>] iscsit_logout_post_handler+0x9d/0x140 
[iscsi_target_mod]
kernel: [<ffffffffa0545e5f>] isert_do_control_comp+0xcf/0x120 [ib_isert]
kernel: [<ffffffff8106a736>] process_one_work+0x136/0x3a0
kernel: [<ffffffff8106aab7>] worker_thread+0x117/0x3d0
kernel: [<ffffffff8106a9a0>] ? process_one_work+0x3a0/0x3a0
kernel: [<ffffffff8106a9a0>] ? process_one_work+0x3a0/0x3a0
kernel: [<ffffffff8106f5fe>] kthread+0xce/0xf0
kernel: [<ffffffff8106f530>] ? kthread_freezable_should_stop+0x70/0x70
kernel: [<ffffffff815515d2>] ret_from_fork+0x42/0x70
kernel: [<ffffffff8106f530>] ? kthread_freezable_should_stop+0x70/0x70
kernel: Code: 00 00 00 48 89 df e8 c8 3f ff ff e9 f7 fe ff ff 8b b7 b8 
00 00 00 48 8b 97 f0 01 00 00 31 c0 48 c7 c7 a8 a1 5c a0 e8 25 9b f9 e0 
<0f> 0b eb fe 48 c7 c6 08 a2 5c a0 48 c7 c7 20 15 5d a0 31 c0 e8
kernel: RIP  [<ffffffffa05b36d8>] iscsit_close_session+0x1c8/0x230 
[iscsi_target_mod]
kernel: RSP <ffff8803e12f7d38>
kernel: ---[ end trace a25d406905884052 ]---
kernel: isert: isert_cq_comp_err: conn ffff8803df09c800 completing 
wait_comp_err
kernel: isert: isert_wait4logout: conn ffff8803df09c800
kernel: isert: isert_wait4logout: conn ffff8803df09c800 wait for 
conn_logout_comp
kernel: isert: isert_release_work: Starting release conn ffff8803df09c800
kernel: isert: isert_release_work: Destroying conn ffff8803df09c800
kernel: isert: isert_release_kref: conn ffff8803df09c800 final kref 
kworker/u50:3/30347
kernel: isert: isert_conn_free_fastreg_pool: Freeing conn 
ffff8803df09c800 fastreg pool
kernel: BUG: unable to handle kernel paging request at ffffffffffffffd8
kernel: IP: [<ffffffff8106ef5b>] kthread_data+0xb/0x20
kernel: PGD 1a0f067 PUD 1a11067 PMD 0
kernel: Oops: 0000 [#2] SMP
kernel: Modules linked in: ib_umad ib_ipoib mlx5_ib mlx5_core 
target_core_user uio tcm_loop vhost_scsi tcm_qla2xxx ib_srpt ib_isert 
iscsi_target_mod tcm_fc target_core_file target_core_iblock 
target_core_pscsi target_core_mod vhost rdma_cm ib_cm iw_cm ib_sa ib_mad 
ib_core ib_addr libfc netconsole configfs nfsd exportfs nfsv3 nfs_acl 
rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs fscache lockd grace autofs4 sunrpc 
cpufreq_ondemand ipv6 ext4 jbd2 dm_mirror dm_region_hash dm_log uinput 
iTCO_wdt iTCO_vendor_support microcode serio_raw pcspkr sb_edac 
edac_core sg ipmi_si ipmi_msghandler acpi_cpufreq i2c_i801 lpc_ich 
mfd_core shpchp ioatdma dm_mod igb dca i2c_algo_bit i2c_core ptp 
pps_core wmi ext3(E) jbd(E) mbcache(E) sd_mod(E) ahci(E) libahci(E) 
isci(E) libsas(E) scsi_transport_sas(E) qla2xxx(E) scsi_transport_fc(E) 
[last unloaded: uio]
kernel: CPU: 4 PID: 30286 Comm: kworker/u52:6 Tainted: G      D     E 
4.1.0-rc1+ #44
kernel: Hardware name: Supermicro SYS-1027R-WRF/X9DRW, BIOS 3.0a 08/08/2013
kernel: task: ffff88046f618e50 ti: ffff8803e12f4000 task.ti: 
ffff8803e12f4000
kernel: RIP: 0010:[<ffffffff8106ef5b>]  [<ffffffff8106ef5b>] 
kthread_data+0xb/0x20
kernel: RSP: 0018:ffff8803e12f79d8  EFLAGS: 00010096
kernel: RAX: 0000000000000000 RBX: 0000000000000004 RCX: ffffffff81cdcc60
kernel: RDX: ffff88046f618e50 RSI: 0000000000000004 RDI: ffff88046f618e50
kernel: RBP: ffff8803e12f79d8 R08: ffff88046f618ee0 R09: dead000000200200
kernel: R10: dead000000200200 R11: 0000000000000007 R12: 0000000000000004
kernel: R13: ffff88046f619798 R14: 0000000000000001 R15: 0000000000000004
kernel: FS:  0000000000000000(0000) GS:ffff88047fc80000(0000) 
knlGS:0000000000000000
kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
kernel: CR2: 0000000000000028 CR3: 0000000001a0e000 CR4: 00000000000406e0
kernel: Stack:
kernel: ffff8803e12f79f8 ffffffff81068710 ffff8803e12f79f8 ffff88047fc94a80
kernel: ffff8803e12f7a48 ffffffff8154d973 ffff88046f618e50 ffff8803e12f7a38
kernel: ffff88046f6196c8 ffff8803e12f4008 000000000000000b ffff88046f6199a4
kernel: Call Trace:
kernel: [<ffffffff81068710>] wq_worker_sleeping+0x10/0xa0
kernel: [<ffffffff8154d973>] __schedule+0x503/0x710
kernel: [<ffffffff8154dcd9>] schedule+0x39/0x90
kernel: [<ffffffff8105721a>] do_exit+0x2ca/0x470
kernel: [<ffffffff8109e0ec>] ? kmsg_dump+0x9c/0xc0
kernel: [<ffffffff810071fb>] oops_end+0x9b/0xe0
kernel: [<ffffffff81007626>] die+0x56/0x90
kernel: [<ffffffff81004011>] do_trap+0x161/0x170
kernel: [<ffffffff8107020d>] ? __atomic_notifier_call_chain+0xd/0x10
kernel: [<ffffffff81004547>] do_error_trap+0xb7/0x100
kernel: [<ffffffffa05b36d8>] ? iscsit_close_session+0x1c8/0x230 
[iscsi_target_mod]
kernel: [<ffffffff810927d2>] ? down_trylock+0x32/0x50
kernel: [<ffffffff8109ea30>] ? console_trylock+0x10/0x50
kernel: [<ffffffff8100464b>] do_invalid_op+0x1b/0x20
kernel: [<ffffffff81552498>] invalid_op+0x18/0x20
kernel: [<ffffffffa05b36d8>] ? iscsit_close_session+0x1c8/0x230 
[iscsi_target_mod]
kernel: [<ffffffffa05b36d8>] ? iscsit_close_session+0x1c8/0x230 
[iscsi_target_mod]
kernel: [<ffffffffa05bdd8d>] lio_tpg_close_session+0xd/0x10 
[iscsi_target_mod]
kernel: [<ffffffffa04c014c>] target_release_session+0x1c/0x20 
[target_core_mod]
kernel: [<ffffffffa04c0a09>] target_put_session+0x19/0x20 [target_core_mod]
kernel: [<ffffffffa05b346d>] iscsit_logout_post_handler+0x9d/0x140 
[iscsi_target_mod]
kernel: [<ffffffffa0545e5f>] isert_do_control_comp+0xcf/0x120 [ib_isert]
kernel: [<ffffffff8106a736>] process_one_work+0x136/0x3a0
kernel: [<ffffffff8106aab7>] worker_thread+0x117/0x3d0
kernel: [<ffffffff8106a9a0>] ? process_one_work+0x3a0/0x3a0
kernel: [<ffffffff8106a9a0>] ? process_one_work+0x3a0/0x3a0
kernel: [<ffffffff8106f5fe>] kthread+0xce/0xf0
kernel: [<ffffffff8106f530>] ? kthread_freezable_should_stop+0x70/0x70
kernel: [<ffffffff815515d2>] ret_from_fork+0x42/0x70
kernel: [<ffffffff8106f530>] ? kthread_freezable_should_stop+0x70/0x70
kernel: Code: 00 48 89 e5 48 8b 40 c8 c9 48 c1 e8 02 83 e0 01 c3 66 66 
66 66 66 66 2e 0f 1f 84 00 00 00 00 00 48 8b 87 f0 08 00 00 55 48 89 e5 
<48> 8b 40 d8 c9 c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
kernel: RIP  [<ffffffff8106ef5b>] kthread_data+0xb/0x20
kernel: RSP <ffff8803e12f79d8>
kernel: CR2: ffffffffffffffd8
kernel: ---[ end trace a25d406905884053 ]---
kernel: Fixing recursive fault but reboot is needed!

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicholas A. Bellinger July 24, 2015, 3:11 a.m. UTC | #3
Hey Sagi,

Apologies for the extended delay to follow-up on this..

On Tue, 2015-07-07 at 15:04 +0300, Sagi Grimberg wrote:
> On 7/7/2015 12:01 PM, Nicholas A. Bellinger wrote:
> > Hey Sagi,
> >
> > This addresses a regression with traditional iscsi-target that I noticed
> > recently, but has not been tested with iser-target yet.
> >
> > Would you mind taking a quick spin with iser-target to verify, and try
> > to intentionally fail iscsit_start_kthreads() into the two out_bitmap +
> > out_tx error paths..?
> 
> Umm, I hit a BUG() statement while start to test this.
> 
> The easy reproducer is:
> - create 20 iser targets (listen on any 0.0.0.0:3260)
> - discover targets 2 initiator ports ports (40 target instances)
> - run: for i in `seq 100`; do iscsiadm -m node -l && iscsiadm -m node 
> -u; done
> 
> Note: I wasn't able to reproduce this with iscsi/tcp.
> 
> Stack dump:
> kernel: isert: isert_wait4logout: conn ffff8803db5a9800
> kernel: ------------[ cut here ]------------
> kernel: isert: isert_wait4logout: conn ffff8803db5a9800 wait for conn_logout_comp
> kernel: kernel BUG at drivers/target/iscsi/iscsi_target.c:4465!

So the proper resolution for this ended up involving moving the call to
iscsi_start_kthreads() immediately preceding sending the last login
response.

This is to ensure that once the last login response is sent signaling
transition to full-feature-phase, no resource allocation failures should
ever occur.

I've verified this with iscsi/tcp and iser-target, and AFAICT the
failure cases for iscsi_start_kthreads() are now working as expected.

The updated version of this was just sent out as patch #2 in the v4.2-rc
fixes series, and has been pushed to target-pending/master here:

iscsi-target: Fix iscsit_start_kthreads failure OOPs
https://git.kernel.org/cgit/linux/kernel/git/nab/target-pending.git/commit/?id=7238eef0914764211a89c069eef569bc83cb3714

Please review.

--nab




--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 4e68b62..66655b7 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -3998,7 +3998,8 @@  get_immediate:
 	}
 
 transport_err:
-	iscsit_take_action_for_connection_exit(conn);
+	if (conn->rx_thread_active)
+		iscsit_take_action_for_connection_exit(conn);
 out:
 	return 0;
 }
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 3d0fe4f..76dedb6 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -679,6 +679,7 @@  static int iscsit_start_kthreads(struct iscsi_conn *conn)
 
 	return 0;
 out_tx:
+	send_sig(SIGINT, conn->tx_thread, 1);
 	kthread_stop(conn->tx_thread);
 	conn->tx_thread_active = false;
 out_bitmap:
@@ -689,6 +690,23 @@  out_bitmap:
 	return ret;
 }
 
+static void iscsit_kthread_err(struct iscsi_session *sess,
+			       struct iscsi_conn *conn, bool new_sess)
+{
+	spin_lock_bh(&sess->conn_lock);
+	list_del(&conn->conn_list);
+	if (atomic_dec_and_test(&sess->nconn))
+		sess->session_state = TARG_SESS_STATE_FAILED;
+	spin_unlock_bh(&sess->conn_lock);
+
+	if (!new_sess)
+		return;
+
+	transport_deregister_session_configfs(sess->se_sess);
+	transport_deregister_session(sess->se_sess);
+	sess->se_sess = NULL;
+}
+
 int iscsi_post_login_handler(
 	struct iscsi_np *np,
 	struct iscsi_conn *conn,
@@ -740,8 +758,10 @@  int iscsi_post_login_handler(
 		spin_unlock_bh(&sess->conn_lock);
 
 		rc = iscsit_start_kthreads(conn);
-		if (rc)
-			return rc;
+		if (rc) {
+			iscsit_kthread_err(sess, conn, false);
+			goto out_old_sess;
+		}
 
 		iscsi_post_login_start_timers(conn);
 		/*
@@ -751,7 +771,7 @@  int iscsi_post_login_handler(
 		iscsit_thread_get_cpumask(conn);
 		conn->conn_rx_reset_cpumask = 1;
 		conn->conn_tx_reset_cpumask = 1;
-
+out_old_sess:
 		iscsit_dec_conn_usage_count(conn);
 		if (stop_timer) {
 			spin_lock_bh(&se_tpg->session_lock);
@@ -759,7 +779,7 @@  int iscsi_post_login_handler(
 			spin_unlock_bh(&se_tpg->session_lock);
 		}
 		iscsit_dec_session_usage_count(sess);
-		return 0;
+		return rc;
 	}
 
 	iscsi_set_session_parameters(sess->sess_ops, conn->param_list, 1);
@@ -801,8 +821,17 @@  int iscsi_post_login_handler(
 	spin_unlock_bh(&se_tpg->session_lock);
 
 	rc = iscsit_start_kthreads(conn);
-	if (rc)
+	if (rc) {
+		spin_lock_bh(&se_tpg->session_lock);
+		if (tpg->tpg_tiqn)
+			tpg->tpg_tiqn->tiqn_nsessions--;
+		tpg->nsessions--;
+		spin_unlock_bh(&se_tpg->session_lock);
+
+		iscsit_kthread_err(sess, conn, true);
+		iscsit_dec_conn_usage_count(conn);
 		return rc;
+	}
 
 	iscsi_post_login_start_timers(conn);
 	/*