Message ID | CAPcyv4jufPjoxKZ+82WBSEHs8VafEfZv1nozDsF29f45=0Us=A@mail.gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 04/29/2017 01:35 PM, Dan Williams wrote: > On Mon, Apr 24, 2017 at 8:13 AM, Yi Zhang <yizhan@redhat.com> wrote: >> Hello >> >> I reproduced ndctl blocked issue on 4.11.0-rc8, here is the reproduce steps and kernel log, could you help check it? Thanks. >> >> Reproduce steps: >> function pmem_btt_dax_switch() { >> sector_size_list="512 520 528 4096 4104 4160 4224" >> for sector_size in $sector_size_list; do >> ndctl create-namespace -f -e namespace${1}.0 --mode=sector -l $sector_size >> ndctl create-namespace -f -e namespace${1}.0 --mode=raw >> ndctl create-namespace -f -e namespace${1}.0 --mode=dax >> done >> } >> for i in 0 1 2 3; do >> pmem_btt_dax_switch $i & >> done > Thanks for the report! > > I couldn't run your script directly, do you have 4 memmap= regions > defined, or...? Here is my environment[1], pls try script [2] [1] # ndctl list -NB { "provider":"ACPI.NFIT", "dev":"ndbus0", "namespaces":[ { "dev":"namespace1.0", "mode":"dax", "size":8453619712, "uuid":"83bb25db-4a60-4613-a1c8-89d0f8f68c0d" }, { "dev":"namespace3.0", "mode":"dax", "size":8453619712, "uuid":"243ab8f4-1d29-43c3-b5f1-be2e470b3a85" }, { "dev":"namespace0.0", "mode":"dax", "size":8453619712, "uuid":"38d3a3ad-b6e2-4e4f-bf58-b793f9c039ea" }, { "dev":"namespace2.0", "mode":"dax", "size":8453619712, "uuid":"f1887ded-7f8b-4fe0-bacb-303f88584711" } ] } [2] #!/bin/bash function pmem_btt_switch() { sector_size_list="512 520 528 4096 4104 4160 4224" for sector_size in $sector_size_list; do ndctl create-namespace -f -e namespace${1}.0 --mode=sector -l $sector_size ndctl create-namespace -f -e namespace${1}.0 --mode=raw ndctl create-namespace -f -e namespace${1}.0 --mode=dax done } num=0 while [ $num -lt 500 ]; do for i in 0 1 2 3; do pmem_btt_switch $i & done wait ((num++)) done > I was able to find a locking problem with a debug patch that turned on > lockdep coverage for the device_lock(). Can you give the attached > patch a try to see if it resolves your lockup? > > This is against latest nvdimm.git/libnvdimm-for-next I tried the attached patch against bellow code[3], seems reproduced this issue again, pls check below log[4] [3] git clone -b libnvdimm-for-next git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm.git [4] [ 637.673117] nd_pmem btt3.0: No existing arenas [ 637.673118] nd_pmem btt1.0: No existing arenas [ 637.692493] pmem1s: detected capacity change from 0 to 7582678528 [ 637.694980] pmem0: detected capacity change from 0 to 8589934592 [ 637.778798] pmem3s: detected capacity change from 0 to 7582678528 [ 637.812557] pmem2: detected capacity change from 0 to 8589934592 [ 637.868725] nd_pmem btt0.0: No existing arenas [ 637.908019] pmem0s: detected capacity change from 0 to 7582678528 [ 637.951486] nd_pmem btt2.0: No existing arenas [ 637.978690] pmem2s: detected capacity change from 0 to 7582678528 [ 638.034491] pmem3: detected capacity change from 0 to 8589934592 [ 638.068756] pmem1: detected capacity change from 0 to 8589934592 [ 638.107573] pmem0: detected capacity change from 0 to 8589934592 [ 638.199041] pmem0: detected capacity change from 0 to 8589934592 [ 638.199041] pmem3: detected capacity change from 0 to 8589934592 [ 638.199098] pmem1: detected capacity change from 0 to 8589934592 [ 638.308711] pmem2: detected capacity change from 0 to 8589934592 [ 865.258436] INFO: task ndctl:21792 blocked for more than 120 seconds. [ 865.292282] Not tainted 4.11.0-rc4+ #1 [ 865.311971] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 865.347460] ndctl D 0 21792 21791 0x00000080 [ 865.371985] Call Trace: [ 865.382907] __schedule+0x289/0x8f0 [ 865.398556] schedule+0x36/0x80 [ 865.412599] schedule_preempt_disabled+0xe/0x10 [ 865.432906] __mutex_lock.isra.8+0x266/0x500 [ 865.452217] ? mntput+0x24/0x40 [ 865.466262] __mutex_lock_slowpath+0x13/0x20 [ 865.486238] mutex_lock+0x2f/0x40 [ 865.501470] region_size_show+0x20/0x70 [device_dax] [ 865.523976] dev_attr_show+0x20/0x50 [ 865.539997] ? mutex_lock+0x12/0x40 [ 865.555749] sysfs_kf_seq_show+0xbf/0x1a0 [ 865.573689] kernfs_seq_show+0x21/0x30 [ 865.590477] seq_read+0x115/0x390 [ 865.605305] ? do_filp_open+0xa5/0x100 [ 865.621728] kernfs_fop_read+0xff/0x180 [ 865.638917] __vfs_read+0x37/0x150 [ 865.653763] ? security_file_permission+0x9d/0xc0 [ 865.674791] vfs_read+0x8c/0x130 [ 865.688887] SyS_read+0x55/0xc0 [ 865.703026] do_syscall_64+0x67/0x180 [ 865.719498] entry_SYSCALL64_slow_path+0x25/0x25 [ 865.740944] RIP: 0033:0x7f1fcf5b47e0 [ 865.757730] RSP: 002b:00007ffca0272138 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 865.793826] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f1fcf5b47e0 [ 865.828479] RDX: 0000000000000400 RSI: 00007ffca0272160 RDI: 0000000000000004 [ 865.861314] RBP: 00000000012c54f0 R08: 00007f1fcf513988 R09: 0000000000000027 [ 865.893963] R10: 000000000000000a R11: 0000000000000246 R12: 00007ffca0272160 [ 865.926206] R13: 00000000012c8260 R14: 00007ffca0272160 R15: 00000000012c8343 [ 865.958196] INFO: task ndctl:21815 blocked for more than 120 seconds. [ 865.987044] Not tainted 4.11.0-rc4+ #1 [ 866.006393] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 866.041532] ndctl D 0 21815 21814 0x00000080 [ 866.066062] Call Trace: [ 866.077166] __schedule+0x289/0x8f0 [ 866.093101] schedule+0x36/0x80 [ 866.107348] schedule_preempt_disabled+0xe/0x10 [ 866.128397] __mutex_lock.isra.8+0x266/0x500 [ 866.150235] ? refcount_dec_and_test+0x11/0x20 [ 866.171204] ? wait_probe_show+0x70/0x70 [libnvdimm] [ 866.195722] __mutex_lock_slowpath+0x13/0x20 [ 866.215658] mutex_lock+0x2f/0x40 [ 866.231182] flush_regions_dimms+0x1b/0x40 [libnvdimm] [ 866.255614] device_for_each_child+0x50/0x90 [ 866.275489] wait_probe_show+0x46/0x70 [libnvdimm] [ 866.299264] dev_attr_show+0x20/0x50 [ 866.318946] ? mutex_lock+0x12/0x40 [ 866.335740] sysfs_kf_seq_show+0xbf/0x1a0 [ 866.354608] kernfs_seq_show+0x21/0x30 [ 866.372248] seq_read+0x115/0x390 [ 866.387589] ? do_filp_open+0xa5/0x100 [ 866.405202] kernfs_fop_read+0xff/0x180 [ 866.423176] __vfs_read+0x37/0x150 [ 866.439121] ? security_file_permission+0x9d/0xc0 [ 866.461188] vfs_read+0x8c/0x130 [ 866.476212] SyS_read+0x55/0xc0 [ 866.490824] do_syscall_64+0x67/0x180 [ 866.507892] entry_SYSCALL64_slow_path+0x25/0x25 [ 866.529750] RIP: 0033:0x7f362c9077e0 [ 866.546371] RSP: 002b:00007ffcc956d2c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 866.582011] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f362c9077e0 [ 866.615492] RDX: 0000000000000400 RSI: 00007ffcc956d2f0 RDI: 0000000000000003 [ 866.649106] RBP: 0000000000000000 R08: 00007f362c866988 R09: 0000000000000046 [ 866.682500] R10: 0000000000000046 R11: 0000000000000246 R12: 00007ffcc956d2f0 [ 866.715817] R13: 0000000000000000 R14: 0000000000001388 R15: 00007ffcc956d2f0 [ 866.749426] INFO: task ndctl:21818 blocked for more than 120 seconds. [ 866.779733] Not tainted 4.11.0-rc4+ #1 [ 866.800190] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 866.840889] ndctl D 0 21818 21816 0x00000080 [ 866.866559] Call Trace: [ 866.877997] __schedule+0x289/0x8f0 [ 866.894153] schedule+0x36/0x80 [ 866.909009] __kernfs_remove+0x169/0x220 [ 866.927326] ? remove_wait_queue+0x60/0x60 [ 866.946570] kernfs_remove_by_name_ns+0x43/0xa0 [ 866.967736] remove_files.isra.1+0x36/0x70 [ 866.986964] sysfs_remove_group+0x44/0x90 [ 867.005536] sysfs_remove_groups+0x2e/0x50 [ 867.025658] dax_region_unregister+0x21/0x40 [device_dax] [ 867.051102] devm_action_release+0xf/0x20 [ 867.069917] release_nodes+0x218/0x260 [ 867.087469] devres_release_all+0x3c/0x60 [ 867.106331] device_release_driver_internal+0x151/0x1f0 [ 867.130551] device_release_driver+0x12/0x20 [ 867.150939] unbind_store+0xba/0xe0 [ 867.167331] drv_attr_store+0x24/0x30 [ 867.184559] sysfs_kf_write+0x3a/0x50 [ 867.201805] kernfs_fop_write+0xff/0x180 [ 867.220111] __vfs_write+0x37/0x160 [ 867.236342] ? selinux_file_permission+0xe5/0x120 [ 867.258348] ? security_file_permission+0x3b/0xc0 [ 867.280189] vfs_write+0xb2/0x1b0 [ 867.295622] ? syscall_trace_enter+0x1d0/0x2b0 [ 867.317865] SyS_write+0x55/0xc0 [ 867.334608] do_syscall_64+0x67/0x180 [ 867.353717] entry_SYSCALL64_slow_path+0x25/0x25 [ 867.375047] RIP: 0033:0x7f6252c56840 [ 867.391768] RSP: 002b:00007ffe36b4cc38 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 867.427789] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f6252c56840 [ 867.461738] RDX: 0000000000000007 RSI: 0000000000b49ba0 RDI: 0000000000000003 [ 867.495312] RBP: 0000000000b49ba0 R08: 00007f6252bb5988 R09: 0000000000000046 [ 867.528377] R10: 00007ffe36b4c950 R11: 0000000000000246 R12: 0000000000000007 [ 867.561816] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000002 [ 867.595562] INFO: task ndctl:21819 blocked for more than 120 seconds. [ 867.625845] Not tainted 4.11.0-rc4+ #1 [ 867.645436] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 867.682041] ndctl D 0 21819 21817 0x00000080 [ 867.708043] Call Trace: [ 867.719637] __schedule+0x289/0x8f0 [ 867.736425] schedule+0x36/0x80 [ 867.751222] schedule_preempt_disabled+0xe/0x10 [ 867.772404] __mutex_lock.isra.8+0x266/0x500 [ 867.792365] ? refcount_dec_and_test+0x11/0x20 [ 867.813359] ? wait_probe_show+0x70/0x70 [libnvdimm] [ 867.838480] __mutex_lock_slowpath+0x13/0x20 [ 867.859706] mutex_lock+0x2f/0x40 [ 867.875377] flush_regions_dimms+0x1b/0x40 [libnvdimm] [ 867.899479] device_for_each_child+0x50/0x90 [ 867.919187] wait_probe_show+0x46/0x70 [libnvdimm] [ 867.940749] dev_attr_show+0x20/0x50 [ 867.957176] ? mutex_lock+0x12/0x40 [ 867.973416] sysfs_kf_seq_show+0xbf/0x1a0 [ 867.992291] kernfs_seq_show+0x21/0x30 [ 868.009818] seq_read+0x115/0x390 [ 868.025295] ? do_filp_open+0xa5/0x100 [ 868.042770] kernfs_fop_read+0xff/0x180 [ 868.060784] __vfs_read+0x37/0x150 [ 868.076791] ? security_file_permission+0x9d/0xc0 [ 868.100003] vfs_read+0x8c/0x130 [ 868.115039] SyS_read+0x55/0xc0 [ 868.129758] do_syscall_64+0x67/0x180 [ 868.146826] entry_SYSCALL64_slow_path+0x25/0x25 [ 868.168842] RIP: 0033:0x7f727cade7e0 [ 868.185547] RSP: 002b:00007ffc3adfdd98 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 868.220933] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f727cade7e0 [ 868.254185] RDX: 0000000000000400 RSI: 00007ffc3adfddc0 RDI: 0000000000000003 [ 868.287336] RBP: 000000000093b570 R08: 000000000093ea50 R09: 00007f727d3da310 [ 868.321130] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc3adfddc0 [ 868.356791] R13: 0000000000000000 R14: 0000000000001388 R15: 00007ffc3adfddc0
From c71720d805401ebfc5d53ed1c4241ec566554e60 Mon Sep 17 00:00:00 2001 From: Dan Williams <dan.j.williams@intel.com> Date: Fri, 28 Apr 2017 22:05:14 -0700 Subject: [PATCH] libnvdimm: fix nvdimm_bus_lock() vs device_lock() ordering A debug patch to turn the standard device_lock() into something that lockdep can analyze yielded the following: ====================================================== [ INFO: possible circular locking dependency detected ] 4.11.0-rc4+ #106 Tainted: G O ------------------------------------------------------- lt-libndctl/1898 is trying to acquire lock: (&dev->nvdimm_mutex/3){+.+.+.}, at: [<ffffffffc023c948>] nd_attach_ndns+0x178/0x1b0 [libnvdimm] but task is already holding lock: (&nvdimm_bus->reconfig_mutex){+.+.+.}, at: [<ffffffffc022e0b1>] nvdimm_bus_lock+0x21/0x30 [libnvdimm] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&nvdimm_bus->reconfig_mutex){+.+.+.}: lock_acquire+0xf6/0x1f0 __mutex_lock+0x88/0x980 mutex_lock_nested+0x1b/0x20 nvdimm_bus_lock+0x21/0x30 [libnvdimm] nvdimm_namespace_capacity+0x1b/0x40 [libnvdimm] nvdimm_namespace_common_probe+0x230/0x510 [libnvdimm] nd_pmem_probe+0x14/0x180 [nd_pmem] nvdimm_bus_probe+0xa9/0x260 [libnvdimm] -> #0 (&dev->nvdimm_mutex/3){+.+.+.}: __lock_acquire+0x1107/0x1280 lock_acquire+0xf6/0x1f0 __mutex_lock+0x88/0x980 mutex_lock_nested+0x1b/0x20 nd_attach_ndns+0x178/0x1b0 [libnvdimm] nd_namespace_store+0x308/0x3c0 [libnvdimm] namespace_store+0x87/0x220 [libnvdimm] In this case '&dev->nvdimm_mutex/3' mirrors '&dev->mutex'. Fix this by replacing the use of device_lock() with nvdimm_bus_lock() to protect nd_{attach,detach}_ndns() operations. Cc: <stable@vger.kernel.org> Fixes: 8c2f7e8658df ("libnvdimm: infrastructure for btt devices") Reported-by: Yi Zhang <yizhan@redhat.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- drivers/nvdimm/btt_devs.c | 2 +- drivers/nvdimm/claim.c | 23 +++++++++++++++-------- drivers/nvdimm/dax_devs.c | 2 +- drivers/nvdimm/pfn_devs.c | 2 +- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 97dd2925ed6e..4b76af2b8715 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -314,7 +314,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) if (rc < 0) { struct nd_btt *nd_btt = to_nd_btt(btt_dev); - __nd_detach_ndns(btt_dev, &nd_btt->ndns); + nd_detach_ndns(btt_dev, &nd_btt->ndns); put_device(btt_dev); } diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 2c19bc7fc056..35b210dc1e56 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -21,8 +21,13 @@ void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns) { struct nd_namespace_common *ndns = *_ndns; + struct nvdimm_bus *nvdimm_bus; - lockdep_assert_held(&ndns->dev.mutex); + if (!ndns) + return; + + nvdimm_bus = walk_to_nvdimm_bus(&ndns->dev); + lockdep_assert_held(&nvdimm_bus->reconfig_mutex); dev_WARN_ONCE(dev, ndns->claim != dev, "%s: invalid claim\n", __func__); ndns->claim = NULL; *_ndns = NULL; @@ -37,18 +42,20 @@ void nd_detach_ndns(struct device *dev, if (!ndns) return; get_device(&ndns->dev); - device_lock(&ndns->dev); + nvdimm_bus_lock(&ndns->dev); __nd_detach_ndns(dev, _ndns); - device_unlock(&ndns->dev); + nvdimm_bus_unlock(&ndns->dev); put_device(&ndns->dev); } bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, struct nd_namespace_common **_ndns) { + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&attach->dev); + if (attach->claim) return false; - lockdep_assert_held(&attach->dev.mutex); + lockdep_assert_held(&nvdimm_bus->reconfig_mutex); dev_WARN_ONCE(dev, *_ndns, "%s: invalid claim\n", __func__); attach->claim = dev; *_ndns = attach; @@ -61,9 +68,9 @@ bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, { bool claimed; - device_lock(&attach->dev); + nvdimm_bus_lock(&attach->dev); claimed = __nd_attach_ndns(dev, attach, _ndns); - device_unlock(&attach->dev); + nvdimm_bus_unlock(&attach->dev); return claimed; } @@ -114,7 +121,7 @@ static void nd_detach_and_reset(struct device *dev, struct nd_namespace_common **_ndns) { /* detach the namespace and destroy / reset the device */ - nd_detach_ndns(dev, _ndns); + __nd_detach_ndns(dev, _ndns); if (is_idle(dev, *_ndns)) { nd_device_unregister(dev, ND_ASYNC); } else if (is_nd_btt(dev)) { @@ -184,7 +191,7 @@ ssize_t nd_namespace_store(struct device *dev, } WARN_ON_ONCE(!is_nvdimm_bus_locked(dev)); - if (!nd_attach_ndns(dev, ndns, _ndns)) { + if (!__nd_attach_ndns(dev, ndns, _ndns)) { dev_dbg(dev, "%s already claimed\n", dev_name(&ndns->dev)); len = -EBUSY; diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index 45fa82cae87c..c1b6556aea6e 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c @@ -124,7 +124,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) dev_dbg(dev, "%s: dax: %s\n", __func__, rc == 0 ? dev_name(dax_dev) : "<none>"); if (rc < 0) { - __nd_detach_ndns(dax_dev, &nd_pfn->ndns); + nd_detach_ndns(dax_dev, &nd_pfn->ndns); put_device(dax_dev); } else __nd_device_register(dax_dev); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 6c033c9a2f06..c38566f4da7d 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -484,7 +484,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) dev_dbg(dev, "%s: pfn: %s\n", __func__, rc == 0 ? dev_name(pfn_dev) : "<none>"); if (rc < 0) { - __nd_detach_ndns(pfn_dev, &nd_pfn->ndns); + nd_detach_ndns(pfn_dev, &nd_pfn->ndns); put_device(pfn_dev); } else __nd_device_register(pfn_dev); -- 2.9.3