diff mbox

[11/33] target: Avoid that parsing an XCOPY command triggers a deadlock

Message ID 20170523234854.21452-12-bart.vanassche@sandisk.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bart Van Assche May 23, 2017, 11:48 p.m. UTC
Move the target_depend_item() call out of the critical section.
This patch avoids that lockdep reports the following:

Comments

Nicholas A. Bellinger June 2, 2017, 4:37 a.m. UTC | #1
On Tue, 2017-05-23 at 16:48 -0700, Bart Van Assche wrote:
> Move the target_depend_item() call out of the critical section.
> This patch avoids that lockdep reports the following:
> 
> ======================================================
> [ INFO: possible circular locking dependency detected ]
> 4.10.0-rc4-debug+ #1 Not tainted
> -------------------------------------------------------
> rmdir/1799 is trying to acquire lock:
>  (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
> 
> but task is already holding lock:
>  (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #2 (&sb->s_type->i_mutex_key#15){++++++}:
> 
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167eacf>] down_write+0x3f/0x70
> [<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs]
> [<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod]
> [<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod]
> [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
> [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
> [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
> [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
> [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
> [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
> [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
> [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
> [<ffffffff810828c2>] kthread+0x102/0x140
> [<ffffffff81681b21>] ret_from_fork+0x31/0x40
> 
> -> #1 (g_device_mutex){+.+...}:
> 
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
> [<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod]
> [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
> [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
> [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
> [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
> [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
> [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
> [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
> [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
> [<ffffffff810828c2>] kthread+0x102/0x140
> [<ffffffff81681b21>] ret_from_fork+0x31/0x40
> 
> -> #0 (&sess->cmdsn_mutex){+.+.+.}:
> 
> [<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
> [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
> [<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod]
> [<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
> [<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
> [<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
> [<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
> [<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs]
> [<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs]
> [<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs]
> [<ffffffff811c5927>] vfs_rmdir+0xb7/0x140
> [<ffffffff811cb404>] do_rmdir+0x1f4/0x200
> [<ffffffff811cc131>] SyS_rmdir+0x11/0x20
> [<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(&sb->s_type->i_mutex_key#15);
>                                lock(g_device_mutex);
>                                lock(&sb->s_type->i_mutex_key#15);
>   lock(&sess->cmdsn_mutex);
> 
>  *** DEADLOCK ***
> 
> 3 locks held by rmdir/1799:
>  #0:  (sb_writers#10){.+.+.+}, at: [<ffffffff811e059f>] mnt_want_write+0x1f/0x50
>  #1:  (&default_group_class[depth - 1]#2/1){+.+.+.}, at: [<ffffffff811cb36e>] do_rmdir+0x15e/0x200
>  #2:  (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140
> 
> stack backtrace:
> CPU: 1 PID: 1799 Comm: rmdir Not tainted 4.10.0-rc4-debug+ #1
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
> Call Trace:
>  dump_stack+0x86/0xc3
>  print_circular_bug+0x1c7/0x220
>  __lock_acquire+0x10e6/0x1260
>  lock_acquire+0x71/0x90
>  mutex_lock_nested+0x5f/0x670
>  iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
>  iscsit_close_session+0xac/0x200 [iscsi_target_mod]
>  lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
>  target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
>  core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
>  target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
>  config_item_release+0x5a/0xc0 [configfs]
>  config_item_put+0x1d/0x1f [configfs]
>  configfs_rmdir+0x1a6/0x300 [configfs]
>  vfs_rmdir+0xb7/0x140
>  do_rmdir+0x1f4/0x200
>  SyS_rmdir+0x11/0x20
>  entry_SYSCALL_64_fastpath+0x23/0xc6
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Cc: Hannes Reinecke <hare@suse.com>
> Cc: David Disseldorp <ddiss@suse.de>
> ---
>  drivers/target/target_core_xcopy.c | 43 ++++++++++++++++++++------------------
>  1 file changed, 23 insertions(+), 20 deletions(-)
> 

AFAICT with patch #8 in place to do the target_xcopy_locate_se_dev_e4()
outside of target_do_xcopy() while iscsi-target code is holding
sess->cmdsn_mutex, this patch along with patch #9 and #10 are
unnecessary.

Dropping them.

--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Christie June 13, 2017, 11:21 p.m. UTC | #2
On 05/23/2017 06:48 PM, Bart Van Assche wrote:
> Move the target_depend_item() call out of the critical section.
> This patch avoids that lockdep reports the following:
> 
> ======================================================
> [ INFO: possible circular locking dependency detected ]
> 4.10.0-rc4-debug+ #1 Not tainted
> -------------------------------------------------------
> rmdir/1799 is trying to acquire lock:
>  (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
> 
> but task is already holding lock:
>  (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #2 (&sb->s_type->i_mutex_key#15){++++++}:
> 
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167eacf>] down_write+0x3f/0x70
> [<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs]
> [<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod]
> [<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod]
> [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
> [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
> [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
> [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
> [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
> [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
> [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
> [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
> [<ffffffff810828c2>] kthread+0x102/0x140
> [<ffffffff81681b21>] ret_from_fork+0x31/0x40
> 
> -> #1 (g_device_mutex){+.+...}:
> 
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
> [<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod]
> [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
> [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
> [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
> [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
> [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
> [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
> [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
> [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
> [<ffffffff810828c2>] kthread+0x102/0x140
> [<ffffffff81681b21>] ret_from_fork+0x31/0x40
> 
> -> #0 (&sess->cmdsn_mutex){+.+.+.}:
> 
> [<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
> [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
> [<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod]
> [<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
> [<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
> [<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
> [<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
> [<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs]
> [<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs]
> [<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs]
> [<ffffffff811c5927>] vfs_rmdir+0xb7/0x140
> [<ffffffff811cb404>] do_rmdir+0x1f4/0x200
> [<ffffffff811cc131>] SyS_rmdir+0x11/0x20
> [<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(&sb->s_type->i_mutex_key#15);
>                                lock(g_device_mutex);
>                                lock(&sb->s_type->i_mutex_key#15);
>   lock(&sess->cmdsn_mutex);
> 
>  *** DEADLOCK ***
> 
> 3 locks held by rmdir/1799:
>  #0:  (sb_writers#10){.+.+.+}, at: [<ffffffff811e059f>] mnt_want_write+0x1f/0x50
>  #1:  (&default_group_class[depth - 1]#2/1){+.+.+.}, at: [<ffffffff811cb36e>] do_rmdir+0x15e/0x200
>  #2:  (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140
> 
> stack backtrace:
> CPU: 1 PID: 1799 Comm: rmdir Not tainted 4.10.0-rc4-debug+ #1
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
> Call Trace:
>  dump_stack+0x86/0xc3
>  print_circular_bug+0x1c7/0x220
>  __lock_acquire+0x10e6/0x1260
>  lock_acquire+0x71/0x90
>  mutex_lock_nested+0x5f/0x670
>  iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
>  iscsit_close_session+0xac/0x200 [iscsi_target_mod]
>  lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
>  target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
>  core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
>  target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
>  config_item_release+0x5a/0xc0 [configfs]
>  config_item_put+0x1d/0x1f [configfs]
>  configfs_rmdir+0x1a6/0x300 [configfs]
>  vfs_rmdir+0xb7/0x140
>  do_rmdir+0x1f4/0x200
>  SyS_rmdir+0x11/0x20
>  entry_SYSCALL_64_fastpath+0x23/0xc6
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Cc: Hannes Reinecke <hare@suse.com>
> Cc: David Disseldorp <ddiss@suse.de>

I tested the patch here while doing lun unmap + backend device removals
at the same time as xcopys, and it works as expected for me.

Reviewed-by: Mike Christie <mchristi@redhat.com>

--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Christie June 16, 2017, 12:10 a.m. UTC | #3
On 05/23/2017 06:48 PM, Bart Van Assche wrote:
> diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
> index 56738a41e346..27414ab01b24 100644
> --- a/drivers/target/target_core_xcopy.c
> +++ b/drivers/target/target_core_xcopy.c
> @@ -62,6 +62,8 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
>  	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
>  	int rc;
>  
> +	*found_dev = NULL;
> +
>  	mutex_lock(&g_device_mutex);
>  	list_for_each_entry(se_dev, &g_device_list, g_dev_node) {
>  
> @@ -71,32 +73,33 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
>  		memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN);
>  		target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]);
>  
> -		rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN);
> -		if (rc != 0)
> +		if (memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN))
>  			continue;
>  
> -		*found_dev = se_dev;
> -		pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
> +		if (target_get_device(se_dev))

Hey Bart,

I think there is one small problem. Maybe not necessarily with this
patch but the change in it exposes it.

If after we do a successful target_get_device a user app deletes the
backend device (does rmdir
/sys/kernel/config/target/core/fileio_1/somedevice), rmdir will will
return success immediately since we have a done a get above. The app
will then do rmdir on (/sys/kernel/config/target/core/fileio_1).

The problem is that if the second rmdir on fileio_1 runs before
target_put_device below is done, then core_delete_hba will hit the
WARN_ON(hba->dev_count). Note that in this test  target_depend_item will
fail because the first rmdir on "somedevice" has already succeeded.

This does not happen without your patch, because the first rmdir on
/sys/kernel/config/target/core/fileio_1/somedevice would drop the
refcount to zero and the removing thread would either run
target_free_device before target_xcopy_locate_se_dev_e4 can grab the
g_device_mutex. Or, if target_xcopy_locate_se_dev_e4 grabs the
g_device_mutex firs, then target_depend_item will fail since because we
already deleting the device.

> +			*found_dev = se_dev;
> +		break;
> +	}
> +	mutex_unlock(&g_device_mutex);
>  
> -		rc = target_depend_item(&se_dev->dev_group.cg_item);
> -		if (rc != 0) {
> -			pr_err("configfs_depend_item attempt failed:"
> -				" %d for se_dev: %p\n", rc, se_dev);
> -			mutex_unlock(&g_device_mutex);
> -			return rc;
> -		}
> +	if (*found_dev == NULL) {
> +		pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
> +		return -EINVAL;
> +	}
>  
> -		pr_debug("Called configfs_depend_item for se_dev: %p"
> -			" se_dev->se_dev_group: %p\n", se_dev,
> -			&se_dev->dev_group);
> +	pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
>  
> -		mutex_unlock(&g_device_mutex);
> -		return 0;
> -	}
> -	mutex_unlock(&g_device_mutex);
> +	rc = target_depend_item(&se_dev->dev_group.cg_item);
> +	if (rc != 0)
> +		pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n",
> +		       rc, se_dev);
> +	else
> +		pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n",
> +			 se_dev, &se_dev->dev_group);
>  
> -	pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
> -	return -EINVAL;
> +	target_put_device(se_dev);
> +
> +	return rc;
>  }
>  
>  static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,
> 

--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Christie June 16, 2017, 6:50 a.m. UTC | #4
On 05/23/2017 06:48 PM, Bart Van Assche wrote:
> Move the target_depend_item() call out of the critical section.
> This patch avoids that lockdep reports the following:
> 
> ======================================================
> [ INFO: possible circular locking dependency detected ]
> 4.10.0-rc4-debug+ #1 Not tainted
> -------------------------------------------------------
> rmdir/1799 is trying to acquire lock:
>  (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
> 
> but task is already holding lock:
>  (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #2 (&sb->s_type->i_mutex_key#15){++++++}:
> 
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167eacf>] down_write+0x3f/0x70
> [<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs]
> [<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod]
> [<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod]
> [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
> [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
> [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
> [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
> [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
> [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
> [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
> [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
> [<ffffffff810828c2>] kthread+0x102/0x140
> [<ffffffff81681b21>] ret_from_fork+0x31/0x40
> 
> -> #1 (g_device_mutex){+.+...}:
> 
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
> [<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod]
> [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
> [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
> [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
> [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
> [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
> [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
> [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
> [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
> [<ffffffff810828c2>] kthread+0x102/0x140
> [<ffffffff81681b21>] ret_from_fork+0x31/0x40
> 
> -> #0 (&sess->cmdsn_mutex){+.+.+.}:
> 
> [<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
> [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
> [<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod]
> [<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
> [<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
> [<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
> [<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
> [<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs]
> [<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs]
> [<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs]
> [<ffffffff811c5927>] vfs_rmdir+0xb7/0x140
> [<ffffffff811cb404>] do_rmdir+0x1f4/0x200
> [<ffffffff811cc131>] SyS_rmdir+0x11/0x20
> [<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(&sb->s_type->i_mutex_key#15);
>                                lock(g_device_mutex);
>                                lock(&sb->s_type->i_mutex_key#15);
>   lock(&sess->cmdsn_mutex);
> 


Hey Bart, is the g_device_mutex an issue here? It is not clear to me.

It looks like the problem that is reported is that:

iscsit_sequence_cmd will grab the cmdsn mutex and then if it is a xcopy
command we can end up calling configfs_depend_item which grabs the
configfs/fs lock.

And, if you remove a acl, it is saying we grab the same configfs/fs lock
and then grab the cmdsn_mutex during cleanup, so here we have a issue.
--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bart Van Assche June 16, 2017, 3:18 p.m. UTC | #5
On Thu, 2017-06-15 at 19:10 -0500, Mike Christie wrote:
> I think there is one small problem. Maybe not necessarily with this
> patch but the change in it exposes it.
> 
> If after we do a successful target_get_device a user app deletes the
> backend device (does rmdir
> /sys/kernel/config/target/core/fileio_1/somedevice), rmdir will will
> return success immediately since we have a done a get above. The app
> will then do rmdir on (/sys/kernel/config/target/core/fileio_1).
> 
> The problem is that if the second rmdir on fileio_1 runs before
> target_put_device below is done, then core_delete_hba will hit the
> WARN_ON(hba->dev_count). Note that in this test  target_depend_item will
> fail because the first rmdir on "somedevice" has already succeeded.
> 
> This does not happen without your patch, because the first rmdir on
> /sys/kernel/config/target/core/fileio_1/somedevice would drop the
> refcount to zero and the removing thread would either run
> target_free_device before target_xcopy_locate_se_dev_e4 can grab the
> g_device_mutex. Or, if target_xcopy_locate_se_dev_e4 grabs the
> g_device_mutex firs, then target_depend_item will fail since because we
> already deleting the device.

Hello Mike,

Had you noticed that due to patch "target: Fix a deadlock between the XCOPY
code and iSCSI session shutdown" (commit 06546e842c24 in Nic's for-next
branch) this patch is no longer necessary? Please let me know if you would
need this patch anyway for another purpose than for fixing the deadlock
explained in the description of this patch.

Bart.--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

======================================================
[ INFO: possible circular locking dependency detected ]
4.10.0-rc4-debug+ #1 Not tainted
-------------------------------------------------------
rmdir/1799 is trying to acquire lock:
 (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]

but task is already holding lock:
 (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&sb->s_type->i_mutex_key#15){++++++}:

[<ffffffff810b4731>] lock_acquire+0x71/0x90
[<ffffffff8167eacf>] down_write+0x3f/0x70
[<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs]
[<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod]
[<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod]
[<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
[<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
[<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
[<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
[<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
[<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
[<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
[<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
[<ffffffff810828c2>] kthread+0x102/0x140
[<ffffffff81681b21>] ret_from_fork+0x31/0x40

-> #1 (g_device_mutex){+.+...}:

[<ffffffff810b4731>] lock_acquire+0x71/0x90
[<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
[<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod]
[<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
[<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
[<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
[<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
[<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
[<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
[<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
[<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
[<ffffffff810828c2>] kthread+0x102/0x140
[<ffffffff81681b21>] ret_from_fork+0x31/0x40

-> #0 (&sess->cmdsn_mutex){+.+.+.}:

[<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260
[<ffffffff810b4731>] lock_acquire+0x71/0x90
[<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
[<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
[<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod]
[<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
[<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
[<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
[<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
[<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs]
[<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs]
[<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs]
[<ffffffff811c5927>] vfs_rmdir+0xb7/0x140
[<ffffffff811cb404>] do_rmdir+0x1f4/0x200
[<ffffffff811cc131>] SyS_rmdir+0x11/0x20
[<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6

other info that might help us debug this:

Chain exists of:
  &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&sb->s_type->i_mutex_key#15);
                               lock(g_device_mutex);
                               lock(&sb->s_type->i_mutex_key#15);
  lock(&sess->cmdsn_mutex);

 *** DEADLOCK ***

3 locks held by rmdir/1799:
 #0:  (sb_writers#10){.+.+.+}, at: [<ffffffff811e059f>] mnt_want_write+0x1f/0x50
 #1:  (&default_group_class[depth - 1]#2/1){+.+.+.}, at: [<ffffffff811cb36e>] do_rmdir+0x15e/0x200
 #2:  (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140

stack backtrace:
CPU: 1 PID: 1799 Comm: rmdir Not tainted 4.10.0-rc4-debug+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
Call Trace:
 dump_stack+0x86/0xc3
 print_circular_bug+0x1c7/0x220
 __lock_acquire+0x10e6/0x1260
 lock_acquire+0x71/0x90
 mutex_lock_nested+0x5f/0x670
 iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
 iscsit_close_session+0xac/0x200 [iscsi_target_mod]
 lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
 target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
 core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
 target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
 config_item_release+0x5a/0xc0 [configfs]
 config_item_put+0x1d/0x1f [configfs]
 configfs_rmdir+0x1a6/0x300 [configfs]
 vfs_rmdir+0xb7/0x140
 do_rmdir+0x1f4/0x200
 SyS_rmdir+0x11/0x20
 entry_SYSCALL_64_fastpath+0x23/0xc6

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: David Disseldorp <ddiss@suse.de>
---
 drivers/target/target_core_xcopy.c | 43 ++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 56738a41e346..27414ab01b24 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -62,6 +62,8 @@  static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
 	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
 	int rc;
 
+	*found_dev = NULL;
+
 	mutex_lock(&g_device_mutex);
 	list_for_each_entry(se_dev, &g_device_list, g_dev_node) {
 
@@ -71,32 +73,33 @@  static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
 		memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN);
 		target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]);
 
-		rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN);
-		if (rc != 0)
+		if (memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN))
 			continue;
 
-		*found_dev = se_dev;
-		pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
+		if (target_get_device(se_dev))
+			*found_dev = se_dev;
+		break;
+	}
+	mutex_unlock(&g_device_mutex);
 
-		rc = target_depend_item(&se_dev->dev_group.cg_item);
-		if (rc != 0) {
-			pr_err("configfs_depend_item attempt failed:"
-				" %d for se_dev: %p\n", rc, se_dev);
-			mutex_unlock(&g_device_mutex);
-			return rc;
-		}
+	if (*found_dev == NULL) {
+		pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
+		return -EINVAL;
+	}
 
-		pr_debug("Called configfs_depend_item for se_dev: %p"
-			" se_dev->se_dev_group: %p\n", se_dev,
-			&se_dev->dev_group);
+	pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
 
-		mutex_unlock(&g_device_mutex);
-		return 0;
-	}
-	mutex_unlock(&g_device_mutex);
+	rc = target_depend_item(&se_dev->dev_group.cg_item);
+	if (rc != 0)
+		pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n",
+		       rc, se_dev);
+	else
+		pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n",
+			 se_dev, &se_dev->dev_group);
 
-	pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
-	return -EINVAL;
+	target_put_device(se_dev);
+
+	return rc;
 }
 
 static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,