diff mbox

[v4,37/37] target: Avoid that XCOPY commands trigger a deadlock

Message ID 20170208222507.25715-38-bart.vanassche@sandisk.com (mailing list archive)
State Superseded
Headers show

Commit Message

Bart Van Assche Feb. 8, 2017, 10:25 p.m. UTC
Move the target_depend_item() call out of the critical section.
This patch avoids that lockdep reports the following:

Comments

Nicholas A. Bellinger Feb. 9, 2017, 11:34 a.m. UTC | #1
On Wed, 2017-02-08 at 14:25 -0800, Bart Van Assche wrote:
> Move the target_depend_item() call out of the critical section.
> This patch avoids that lockdep reports the following:
> 
> ======================================================
> [ INFO: possible circular locking dependency detected ]
> 4.10.0-rc4-debug+ #1 Not tainted
> -------------------------------------------------------
> rmdir/1799 is trying to acquire lock:
>  (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
> 
> but task is already holding lock:
>  (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140
> 
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> 
> -> #2 (&sb->s_type->i_mutex_key#15){++++++}:
> 
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167eacf>] down_write+0x3f/0x70
> [<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs]
> [<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod]
> [<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod]
> [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
> [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
> [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
> [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
> [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
> [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
> [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
> [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
> [<ffffffff810828c2>] kthread+0x102/0x140
> [<ffffffff81681b21>] ret_from_fork+0x31/0x40
> 
> -> #1 (g_device_mutex){+.+...}:
> 
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
> [<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod]
> [<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
> [<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
> [<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
> [<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
> [<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
> [<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
> [<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
> [<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
> [<ffffffff810828c2>] kthread+0x102/0x140
> [<ffffffff81681b21>] ret_from_fork+0x31/0x40
> 
> -> #0 (&sess->cmdsn_mutex){+.+.+.}:
> 
> [<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260
> [<ffffffff810b4731>] lock_acquire+0x71/0x90
> [<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
> [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
> [<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod]
> [<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
> [<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
> [<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
> [<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
> [<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs]
> [<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs]
> [<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs]
> [<ffffffff811c5927>] vfs_rmdir+0xb7/0x140
> [<ffffffff811cb404>] do_rmdir+0x1f4/0x200
> [<ffffffff811cc131>] SyS_rmdir+0x11/0x20
> [<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6
> 
> other info that might help us debug this:
> 
> Chain exists of:
>   &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15
> 
>  Possible unsafe locking scenario:
> 
>        CPU0                    CPU1
>        ----                    ----
>   lock(&sb->s_type->i_mutex_key#15);
>                                lock(g_device_mutex);
>                                lock(&sb->s_type->i_mutex_key#15);
>   lock(&sess->cmdsn_mutex);
> 
>  *** DEADLOCK ***
> 
> 3 locks held by rmdir/1799:
>  #0:  (sb_writers#10){.+.+.+}, at: [<ffffffff811e059f>] mnt_want_write+0x1f/0x50
>  #1:  (&default_group_class[depth - 1]#2/1){+.+.+.}, at: [<ffffffff811cb36e>] do_rmdir+0x15e/0x200
>  #2:  (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140
> 
> stack backtrace:
> CPU: 1 PID: 1799 Comm: rmdir Not tainted 4.10.0-rc4-debug+ #1
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
> Call Trace:
>  dump_stack+0x86/0xc3
>  print_circular_bug+0x1c7/0x220
>  __lock_acquire+0x10e6/0x1260
>  lock_acquire+0x71/0x90
>  mutex_lock_nested+0x5f/0x670
>  iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
>  iscsit_close_session+0xac/0x200 [iscsi_target_mod]
>  lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
>  target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
>  core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
>  target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
>  config_item_release+0x5a/0xc0 [configfs]
>  config_item_put+0x1d/0x1f [configfs]
>  configfs_rmdir+0x1a6/0x300 [configfs]
>  vfs_rmdir+0xb7/0x140
>  do_rmdir+0x1f4/0x200
>  SyS_rmdir+0x11/0x20
>  entry_SYSCALL_64_fastpath+0x23/0xc6
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Cc: Hannes Reinecke <hare@suse.com>
> Cc: David Disseldorp <ddiss@suse.de>
> ---
>  drivers/target/target_core_xcopy.c | 43 ++++++++++++++++++++------------------
>  1 file changed, 23 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
> index 180d8d718db5..623835371f2f 100644
> --- a/drivers/target/target_core_xcopy.c
> +++ b/drivers/target/target_core_xcopy.c
> @@ -60,6 +60,8 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
>  	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
>  	int rc;
>  
> +	*found_dev = NULL;
> +
>  	mutex_lock(&g_device_mutex);
>  	list_for_each_entry(se_dev, &g_device_list, g_dev_node) {
>  
> @@ -69,32 +71,33 @@ static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
>  		memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN);
>  		target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]);
>  
> -		rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN);
> -		if (rc != 0)
> +		if (memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN))
>  			continue;
>  
> -		*found_dev = se_dev;
> -		pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
> +		if (target_get_device(se_dev))
> +			*found_dev = se_dev;
> +		break;
> +	}
> +	mutex_unlock(&g_device_mutex);
>  
> -		rc = target_depend_item(&se_dev->dev_group.cg_item);
> -		if (rc != 0) {
> -			pr_err("configfs_depend_item attempt failed:"
> -				" %d for se_dev: %p\n", rc, se_dev);
> -			mutex_unlock(&g_device_mutex);
> -			return rc;
> -		}
> +	if (*found_dev == NULL) {
> +		pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
> +		return -EINVAL;
> +	}
>  
> -		pr_debug("Called configfs_depend_item for se_dev: %p"
> -			" se_dev->se_dev_group: %p\n", se_dev,
> -			&se_dev->dev_group);
> +	pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
>  
> -		mutex_unlock(&g_device_mutex);
> -		return 0;
> -	}
> -	mutex_unlock(&g_device_mutex);
> +	rc = target_depend_item(&se_dev->dev_group.cg_item);
> +	if (rc != 0)
> +		pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n",
> +		       rc, se_dev);
> +	else
> +		pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n",
> +			 se_dev, &se_dev->dev_group);
>  
> -	pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
> -	return -EINVAL;
> +	target_put_device(se_dev);
> +
> +	return rc;
>  }
>  
>  static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,

In practice this, this deadlock has never triggered with EXTENDED_COPY
and active I/O session shutdown with iscsi-target.

I do think it's a reasonable approach to avoid it the potential of the
deadlock (or at least make deadlock happy), but #35 needs to get merged
first.

That said, ignoring this patch for now.

--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

======================================================
[ INFO: possible circular locking dependency detected ]
4.10.0-rc4-debug+ #1 Not tainted
-------------------------------------------------------
rmdir/1799 is trying to acquire lock:
 (&sess->cmdsn_mutex){+.+.+.}, at: [<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]

but task is already holding lock:
 (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&sb->s_type->i_mutex_key#15){++++++}:

[<ffffffff810b4731>] lock_acquire+0x71/0x90
[<ffffffff8167eacf>] down_write+0x3f/0x70
[<ffffffffa025315a>] configfs_depend_item+0x3a/0xb0 [configfs]
[<ffffffffa0264053>] target_depend_item+0x13/0x20 [target_core_mod]
[<ffffffffa02891db>] target_xcopy_locate_se_dev_e4+0xdb/0x1a0 [target_core_mod]
[<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
[<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
[<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
[<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
[<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
[<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
[<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
[<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
[<ffffffff810828c2>] kthread+0x102/0x140
[<ffffffff81681b21>] ret_from_fork+0x31/0x40

-> #1 (g_device_mutex){+.+...}:

[<ffffffff810b4731>] lock_acquire+0x71/0x90
[<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
[<ffffffffa0289123>] target_xcopy_locate_se_dev_e4+0x23/0x1a0 [target_core_mod]
[<ffffffffa0289e6c>] target_do_xcopy+0x31c/0x8b0 [target_core_mod]
[<ffffffffa027d9a2>] __target_execute_cmd+0x22/0xa0 [target_core_mod]
[<ffffffffa027e409>] target_execute_cmd.part.20+0x1e9/0x290 [target_core_mod]
[<ffffffffa027e4c3>] target_execute_cmd+0x13/0x20 [target_core_mod]
[<ffffffffa02dc2d2>] iscsit_execute_cmd+0x1d2/0x230 [iscsi_target_mod]
[<ffffffffa02e4acb>] iscsit_sequence_cmd+0x10b/0x190 [iscsi_target_mod]
[<ffffffffa02eb01d>] iscsit_get_rx_pdu+0x37d/0xcd0 [iscsi_target_mod]
[<ffffffffa02ec93e>] iscsi_target_rx_thread+0x6e/0xa0 [iscsi_target_mod]
[<ffffffff810828c2>] kthread+0x102/0x140
[<ffffffff81681b21>] ret_from_fork+0x31/0x40

-> #0 (&sess->cmdsn_mutex){+.+.+.}:

[<ffffffff810b41e6>] __lock_acquire+0x10e6/0x1260
[<ffffffff810b4731>] lock_acquire+0x71/0x90
[<ffffffff8167c49f>] mutex_lock_nested+0x5f/0x670
[<ffffffffa02dc45d>] iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
[<ffffffffa02ecc8c>] iscsit_close_session+0xac/0x200 [iscsi_target_mod]
[<ffffffffa02f27af>] lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
[<ffffffffa02796f3>] target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
[<ffffffffa0279cf1>] core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
[<ffffffffa026c8c0>] target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
[<ffffffffa02560ca>] config_item_release+0x5a/0xc0 [configfs]
[<ffffffffa025614d>] config_item_put+0x1d/0x1f [configfs]
[<ffffffffa02544d6>] configfs_rmdir+0x1a6/0x300 [configfs]
[<ffffffff811c5927>] vfs_rmdir+0xb7/0x140
[<ffffffff811cb404>] do_rmdir+0x1f4/0x200
[<ffffffff811cc131>] SyS_rmdir+0x11/0x20
[<ffffffff81681885>] entry_SYSCALL_64_fastpath+0x23/0xc6

other info that might help us debug this:

Chain exists of:
  &sess->cmdsn_mutex --> g_device_mutex --> &sb->s_type->i_mutex_key#15

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&sb->s_type->i_mutex_key#15);
                               lock(g_device_mutex);
                               lock(&sb->s_type->i_mutex_key#15);
  lock(&sess->cmdsn_mutex);

 *** DEADLOCK ***

3 locks held by rmdir/1799:
 #0:  (sb_writers#10){.+.+.+}, at: [<ffffffff811e059f>] mnt_want_write+0x1f/0x50
 #1:  (&default_group_class[depth - 1]#2/1){+.+.+.}, at: [<ffffffff811cb36e>] do_rmdir+0x15e/0x200
 #2:  (&sb->s_type->i_mutex_key#15){++++++}, at: [<ffffffff811c58c0>] vfs_rmdir+0x50/0x140

stack backtrace:
CPU: 1 PID: 1799 Comm: rmdir Not tainted 4.10.0-rc4-debug+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014
Call Trace:
 dump_stack+0x86/0xc3
 print_circular_bug+0x1c7/0x220
 __lock_acquire+0x10e6/0x1260
 lock_acquire+0x71/0x90
 mutex_lock_nested+0x5f/0x670
 iscsit_free_all_ooo_cmdsns+0x2d/0xb0 [iscsi_target_mod]
 iscsit_close_session+0xac/0x200 [iscsi_target_mod]
 lio_tpg_close_session+0x9f/0xb0 [iscsi_target_mod]
 target_shutdown_sessions+0xc3/0xd0 [target_core_mod]
 core_tpg_del_initiator_node_acl+0x91/0x140 [target_core_mod]
 target_fabric_nacl_base_release+0x20/0x30 [target_core_mod]
 config_item_release+0x5a/0xc0 [configfs]
 config_item_put+0x1d/0x1f [configfs]
 configfs_rmdir+0x1a6/0x300 [configfs]
 vfs_rmdir+0xb7/0x140
 do_rmdir+0x1f4/0x200
 SyS_rmdir+0x11/0x20
 entry_SYSCALL_64_fastpath+0x23/0xc6

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: David Disseldorp <ddiss@suse.de>
---
 drivers/target/target_core_xcopy.c | 43 ++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 180d8d718db5..623835371f2f 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -60,6 +60,8 @@  static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
 	unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
 	int rc;
 
+	*found_dev = NULL;
+
 	mutex_lock(&g_device_mutex);
 	list_for_each_entry(se_dev, &g_device_list, g_dev_node) {
 
@@ -69,32 +71,33 @@  static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn,
 		memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN);
 		target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]);
 
-		rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN);
-		if (rc != 0)
+		if (memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN))
 			continue;
 
-		*found_dev = se_dev;
-		pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
+		if (target_get_device(se_dev))
+			*found_dev = se_dev;
+		break;
+	}
+	mutex_unlock(&g_device_mutex);
 
-		rc = target_depend_item(&se_dev->dev_group.cg_item);
-		if (rc != 0) {
-			pr_err("configfs_depend_item attempt failed:"
-				" %d for se_dev: %p\n", rc, se_dev);
-			mutex_unlock(&g_device_mutex);
-			return rc;
-		}
+	if (*found_dev == NULL) {
+		pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
+		return -EINVAL;
+	}
 
-		pr_debug("Called configfs_depend_item for se_dev: %p"
-			" se_dev->se_dev_group: %p\n", se_dev,
-			&se_dev->dev_group);
+	pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev);
 
-		mutex_unlock(&g_device_mutex);
-		return 0;
-	}
-	mutex_unlock(&g_device_mutex);
+	rc = target_depend_item(&se_dev->dev_group.cg_item);
+	if (rc != 0)
+		pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n",
+		       rc, se_dev);
+	else
+		pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n",
+			 se_dev, &se_dev->dev_group);
 
-	pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
-	return -EINVAL;
+	target_put_device(se_dev);
+
+	return rc;
 }
 
 static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,