Message ID | 20230620132703.20648-3-dwagner@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | More fixes for FC enabling | expand |
> When the host has enabled the udev/systemd autoconnect services for the > fc transport it interacts with blktests and make tests break. > > nvme-cli learned to ignore connects attemps when using the > --context command line option paired with a volatile configuration. Thus > we can mark all the resources created by blktests and avoid any > interaction with the systemd autoconnect scripts. Hmm... is this hapenning with non-fc as well? > > Signed-off-by: Daniel Wagner <dwagner@suse.de> > --- > tests/nvme/rc | 73 ++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 63 insertions(+), 10 deletions(-) > > diff --git a/tests/nvme/rc b/tests/nvme/rc > index 191f3e2e0c43..00117d314a38 100644 > --- a/tests/nvme/rc > +++ b/tests/nvme/rc > @@ -14,8 +14,8 @@ def_remote_wwnn="0x10001100aa000001" > def_remote_wwpn="0x20001100aa000001" > def_local_wwnn="0x10001100aa000002" > def_local_wwpn="0x20001100aa000002" > -def_hostnqn="$(cat /etc/nvme/hostnqn 2> /dev/null)" > -def_hostid="$(cat /etc/nvme/hostid 2> /dev/null)" > +def_hostnqn="nqn.2014-08.org.nvmexpress:uuid:242d4a24-2484-4a80-8234-d0169409c5e8" > +def_hostid="242d4a24-2484-4a80-8234-d0169409c5e8" > nvme_trtype=${nvme_trtype:-"loop"} > nvme_img_size=${nvme_img_size:-"1G"} > nvme_num_iter=${nvme_num_iter:-"1000"} > @@ -161,6 +161,50 @@ _nvme_calc_rand_io_size() { > echo "${io_size_kb}k" > } > > +_nvme_cli_supports_context() { > + if ! nvme connect --help 2>&1 | grep -q context > /dev/null; then > + return 1 > + fi > + return 0 > +} Not a great way to check support. > + > +_setup_nvme_cli() { > + if ! _nvme_cli_supports_context; then > + return > + fi > + > + mkdir -p /run/nvme > + cat >> /run/nvme/blktests.json <<-EOF > + [ > + { > + "hostnqn": "${def_hostnqn}", > + "hostid": "${def_hostid}", > + "subsystems": [ > + { > + "application": "blktests", > + "nqn": "blktests-subsystem-1", > + "ports": [ > + { > + "transport": "fc", > + "traddr": "nn-${def_remote_wwnn}:pn-${def_remote_wwpn}", > + "host_traddr": "nn-${def_local_wwnn}:pn-${def_local_wwpn}" > + } > + ] > + } > + ] > + } > + ] > + EOF > +} > + > +_cleanup_nvme_cli() { > + if ! _nvme_cli_supports_context; then > + return > + fi > + > + rm -f /run/nvme/blktests.json > +} > + > _nvme_fcloop_add_rport() { > local local_wwnn="$1" > local local_wwpn="$2" > @@ -235,6 +279,8 @@ _cleanup_fcloop() { > _nvme_fcloop_del_lport "${local_wwnn}" "${local_wwpn}" > _nvme_fcloop_del_rport "${local_wwnn}" "${local_wwpn}" \ > "${remote_wwnn}" "${remote_wwpn}" > + > + _cleanup_nvme_cli > } > > _cleanup_nvmet() { > @@ -337,6 +383,8 @@ _setup_nvmet() { > def_host_traddr=$(printf "nn-%s:pn-%s" \ > "${def_local_wwnn}" \ > "${def_local_wwpn}") > + > + _setup_nvme_cli > fi > } > > @@ -436,18 +484,18 @@ _nvme_connect_subsys() { > trtype="$1" > subsysnqn="$2" > > - ARGS=(-t "${trtype}" -n "${subsysnqn}") > + ARGS=(-t "${trtype}" > + -n "${subsysnqn}" > + --hostnqn="${hostnqn}" > + --hostid="${hostid}") > + if _nvme_cli_supports_context; then > + ARGS+=(--context="blktests") > + fi > if [[ "${trtype}" == "fc" ]] ; then > ARGS+=(-a "${traddr}" -w "${host_traddr}") > elif [[ "${trtype}" != "loop" ]]; then > ARGS+=(-a "${traddr}" -s "${trsvcid}") > fi > - if [[ "${hostnqn}" != "$def_hostnqn" ]]; then > - ARGS+=(--hostnqn="${hostnqn}") > - fi > - if [[ "${hostid}" != "$def_hostid" ]]; then > - ARGS+=(--hostid="${hostid}") > - fi > if [[ -n "${hostkey}" ]]; then > ARGS+=(--dhchap-secret="${hostkey}") > fi > @@ -482,7 +530,12 @@ _nvme_discover() { > local host_traddr="${3:-$def_host_traddr}" > local trsvcid="${3:-$def_trsvcid}" > > - ARGS=(-t "${trtype}") > + ARGS=(-t "${trtype}" > + --hostnqn="${def_hostnqn}" > + --hostid="${def_hostid}") > + if _nvme_cli_supports_context; then > + ARGS+=(--context="blktests") > + fi > if [[ "${trtype}" = "fc" ]]; then > ARGS+=(-a "${traddr}" -w "${host_traddr}") > elif [[ "${trtype}" != "loop" ]]; then
On Tue, Jun 20, 2023 at 05:07:43PM +0300, Sagi Grimberg wrote: > > > When the host has enabled the udev/systemd autoconnect services for the > > fc transport it interacts with blktests and make tests break. > > > > nvme-cli learned to ignore connects attemps when using the > > --context command line option paired with a volatile configuration. Thus > > we can mark all the resources created by blktests and avoid any > > interaction with the systemd autoconnect scripts. > > Hmm... is this hapenning with non-fc as well? I haven't seen a problem for TCP or RDMA yet but in principle it should also exists. I can do some tracing to see if we have also problem thern. Two of the udev rule match on the subsystem and the event type. > > +_nvme_cli_supports_context() { > > + if ! nvme connect --help 2>&1 | grep -q context > /dev/null; then > > + return 1 > > + fi > > + return 0 > > +} > > Not a great way to check support. Yeah, agree, it's a bit dodgy. I'll try to figure out a different way. Any suggestions?
On Tue, Jun 20, 2023 at 07:43:35PM +0200, Daniel Wagner wrote: > On Tue, Jun 20, 2023 at 05:07:43PM +0300, Sagi Grimberg wrote: > > Hmm... is this hapenning with non-fc as well? > > I haven't seen a problem for TCP or RDMA yet but in principle it should also > exists. I can do some tracing to see if we have also problem thern. Two of the > udev rule match on the subsystem and the event type. The only udev rule which gets triggered during blktests execution is this one: # nvme-fc transport generated events (old-style for compatibility) ACTION=="change", SUBSYSTEM=="fc", ENV{FC_EVENT}=="nvmediscovery", \ ENV{NVMEFC_HOST_TRADDR}=="*", ENV{NVMEFC_TRADDR}=="*", \ RUN+="@SYSTEMCTL@ --no-block restart nvmf-connect@--device=none\t--transport=fc\t--traddr=$env{NVMEFC_TRADDR}\t--trsvcid=none\t--host-traddr=$env{NVMEFC_HOST_TRADDR}.service" That explain why blktests didn't get disturbed by the autoconnect rule as this rule has a match on the fc subsystem.
On Jun 20, 2023 / 15:27, Daniel Wagner wrote: > When the host has enabled the udev/systemd autoconnect services for the > fc transport it interacts with blktests and make tests break. > > nvme-cli learned to ignore connects attemps when using the > --context command line option paired with a volatile configuration. Thus > we can mark all the resources created by blktests and avoid any > interaction with the systemd autoconnect scripts. This sounds a good idea. Question, is "--context" option of the nvme command mandatory to run nvme test with nvme_trtype=fc? Or is it nice-to-have feature depending on the test system OS? If it is mandatory, it's the better to check in _nvme_requires. > > Signed-off-by: Daniel Wagner <dwagner@suse.de> > --- > tests/nvme/rc | 73 ++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 63 insertions(+), 10 deletions(-) > > diff --git a/tests/nvme/rc b/tests/nvme/rc > index 191f3e2e0c43..00117d314a38 100644 > --- a/tests/nvme/rc > +++ b/tests/nvme/rc > @@ -14,8 +14,8 @@ def_remote_wwnn="0x10001100aa000001" > def_remote_wwpn="0x20001100aa000001" > def_local_wwnn="0x10001100aa000002" > def_local_wwpn="0x20001100aa000002" > -def_hostnqn="$(cat /etc/nvme/hostnqn 2> /dev/null)" > -def_hostid="$(cat /etc/nvme/hostid 2> /dev/null)" > +def_hostnqn="nqn.2014-08.org.nvmexpress:uuid:242d4a24-2484-4a80-8234-d0169409c5e8" > +def_hostid="242d4a24-2484-4a80-8234-d0169409c5e8" > nvme_trtype=${nvme_trtype:-"loop"} > nvme_img_size=${nvme_img_size:-"1G"} > nvme_num_iter=${nvme_num_iter:-"1000"} > @@ -161,6 +161,50 @@ _nvme_calc_rand_io_size() { > echo "${io_size_kb}k" > } > > +_nvme_cli_supports_context() { > + if ! nvme connect --help 2>&1 | grep -q context > /dev/null; then > + return 1 > + fi > + return 0 > +} > + > +_setup_nvme_cli() { > + if ! _nvme_cli_supports_context; then > + return > + fi > + > + mkdir -p /run/nvme > + cat >> /run/nvme/blktests.json <<-EOF > + [ > + { > + "hostnqn": "${def_hostnqn}", > + "hostid": "${def_hostid}", > + "subsystems": [ > + { > + "application": "blktests", > + "nqn": "blktests-subsystem-1", > + "ports": [ > + { > + "transport": "fc", > + "traddr": "nn-${def_remote_wwnn}:pn-${def_remote_wwpn}", > + "host_traddr": "nn-${def_local_wwnn}:pn-${def_local_wwpn}" > + } > + ] > + } > + ] > + } > + ] > + EOF > +} > + > +_cleanup_nvme_cli() { > + if ! _nvme_cli_supports_context; then > + return > + fi > + > + rm -f /run/nvme/blktests.json > +} > + > _nvme_fcloop_add_rport() { > local local_wwnn="$1" > local local_wwpn="$2" > @@ -235,6 +279,8 @@ _cleanup_fcloop() { > _nvme_fcloop_del_lport "${local_wwnn}" "${local_wwpn}" > _nvme_fcloop_del_rport "${local_wwnn}" "${local_wwpn}" \ > "${remote_wwnn}" "${remote_wwpn}" > + > + _cleanup_nvme_cli > } > > _cleanup_nvmet() { > @@ -337,6 +383,8 @@ _setup_nvmet() { > def_host_traddr=$(printf "nn-%s:pn-%s" \ > "${def_local_wwnn}" \ > "${def_local_wwpn}") > + > + _setup_nvme_cli Can we move this _setup_nvme_cli call from _setup_nvmet to _setup_fcloop? _cleanup_nvme_cli is called in _cleanup_fcloop. I think it is a bit cleaner since those setup/cleanup functions are called at at same level. > fi > } > > @@ -436,18 +484,18 @@ _nvme_connect_subsys() { > trtype="$1" > subsysnqn="$2" > > - ARGS=(-t "${trtype}" -n "${subsysnqn}") > + ARGS=(-t "${trtype}" > + -n "${subsysnqn}" > + --hostnqn="${hostnqn}" > + --hostid="${hostid}") > + if _nvme_cli_supports_context; then > + ARGS+=(--context="blktests") > + fi > if [[ "${trtype}" == "fc" ]] ; then > ARGS+=(-a "${traddr}" -w "${host_traddr}") > elif [[ "${trtype}" != "loop" ]]; then > ARGS+=(-a "${traddr}" -s "${trsvcid}") > fi > - if [[ "${hostnqn}" != "$def_hostnqn" ]]; then > - ARGS+=(--hostnqn="${hostnqn}") > - fi > - if [[ "${hostid}" != "$def_hostid" ]]; then > - ARGS+=(--hostid="${hostid}") > - fi > if [[ -n "${hostkey}" ]]; then > ARGS+=(--dhchap-secret="${hostkey}") > fi > @@ -482,7 +530,12 @@ _nvme_discover() { > local host_traddr="${3:-$def_host_traddr}" > local trsvcid="${3:-$def_trsvcid}" > > - ARGS=(-t "${trtype}") > + ARGS=(-t "${trtype}" > + --hostnqn="${def_hostnqn}" > + --hostid="${def_hostid}") > + if _nvme_cli_supports_context; then > + ARGS+=(--context="blktests") > + fi > if [[ "${trtype}" = "fc" ]]; then > ARGS+=(-a "${traddr}" -w "${host_traddr}") > elif [[ "${trtype}" != "loop" ]]; then > -- > 2.41.0 >
On Tue, Jun 27, 2023 at 10:22:53AM +0000, Shinichiro Kawasaki wrote: > On Jun 20, 2023 / 15:27, Daniel Wagner wrote: > > When the host has enabled the udev/systemd autoconnect services for the > > fc transport it interacts with blktests and make tests break. > > > > nvme-cli learned to ignore connects attemps when using the > > --context command line option paired with a volatile configuration. Thus > > we can mark all the resources created by blktests and avoid any > > interaction with the systemd autoconnect scripts. > > This sounds a good idea. Question, is "--context" option of the nvme command > mandatory to run nvme test with nvme_trtype=fc? If nvme-cli is called without the '--context' option, the command will be executed. Though if '--context' is provided as option and there is a configuration which matches the connect parameters but doesn't match the context it will ignore the operation. The blktests tests expects that nothing behind it's back is fiddling on the setup while it is running. So far udev didn't trigger for rdma/tcp but with fc it will. Thus, it's mandatory to use either the '--context' parameter or alternatively disable the rule with ln -s /etc/udev/rules.d/70-nvmf-autoconnect.rules /dev/null BTW, when the udev rule is active I observed crashes when running blktests. So there is more to fix, though one thing at the time. > Or is it nice-to-have feature > depending on the test system OS? If it is mandatory, it's the better to check > in _nvme_requires. Well, I didn't want to make this a hard requirement for all tests. I guess we could make it for fc only if this is what you had in mind. The question should it only test for nvme-cli supporting --context or should it be really clever and test if the udev rule is also active (no idea how but I assume it is possible)? > > _cleanup_nvmet() { > > @@ -337,6 +383,8 @@ _setup_nvmet() { > > def_host_traddr=$(printf "nn-%s:pn-%s" \ > > "${def_local_wwnn}" \ > > "${def_local_wwpn}") > > + > > + _setup_nvme_cli > > Can we move this _setup_nvme_cli call from _setup_nvmet to _setup_fcloop? > _cleanup_nvme_cli is called in _cleanup_fcloop. I think it is a bit cleaner > since those setup/cleanup functions are called at at same level. Sure, no problem.
On Jun 28, 2023 / 08:09, Daniel Wagner wrote: > On Tue, Jun 27, 2023 at 10:22:53AM +0000, Shinichiro Kawasaki wrote: > > On Jun 20, 2023 / 15:27, Daniel Wagner wrote: > > > When the host has enabled the udev/systemd autoconnect services for the > > > fc transport it interacts with blktests and make tests break. > > > > > > nvme-cli learned to ignore connects attemps when using the > > > --context command line option paired with a volatile configuration. Thus > > > we can mark all the resources created by blktests and avoid any > > > interaction with the systemd autoconnect scripts. > > > > This sounds a good idea. Question, is "--context" option of the nvme command > > mandatory to run nvme test with nvme_trtype=fc? > > If nvme-cli is called without the '--context' option, the command will be > executed. Though if '--context' is provided as option and there is a > configuration which matches the connect parameters but doesn't match the context > it will ignore the operation. > > The blktests tests expects that nothing behind it's back is fiddling on the > setup while it is running. So far udev didn't trigger for rdma/tcp but with fc > it will. > > Thus, it's mandatory to use either the '--context' parameter or alternatively > disable the rule with > > ln -s /etc/udev/rules.d/70-nvmf-autoconnect.rules /dev/null > > BTW, when the udev rule is active I observed crashes when running blktests. So > there is more to fix, though one thing at the time. > > > Or is it nice-to-have feature > > depending on the test system OS? If it is mandatory, it's the better to check > > in _nvme_requires. > > Well, I didn't want to make this a hard requirement for all tests. I guess we > could make it for fc only if this is what you had in mind. The question should > it only test for nvme-cli supporting --context or should it be really clever and > test if the udev rule is also active (no idea how but I assume it is possible)? Thanks for the explanations. It looks that the requirement check I suggested i _nvme_requires will be will be too much. And I don't have good idea for the udev rule check either, so let's settle this change without the checks.
On Wed, Jun 28, 2023 at 10:04:01AM +0000, Shinichiro Kawasaki wrote: > > BTW, when the udev rule is active I observed crashes when running blktests. So > > there is more to fix, though one thing at the time. FTR, this is typical hanger/crash I see when the udev rule is active: run blktests nvme/005 at 2023-06-28 16:30:07 loop0: detected capacity change from 0 to 32768 nvmet: adding nsid 1 to subsystem blktests-subsystem-1 nvme nvme2: NVME-FC{0}: create association : host wwpn 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN "blktests-subsystem-1" (NULL device *): {0:0} Association created [43] nvmet: ctrl 1 start keep-alive timer for 5 secs nvmet: creating nvm controller 1 for subsystem blktests-subsystem-1 for NQN nqn.2014-08.org.nvmexpress:uuid:242d4a24-2484-4a80-8234-d0169409c5e8. [7] nvmet: adding queue 1 to ctrl 1. [363] nvmet: adding queue 2 to ctrl 1. [43] nvmet: adding queue 3 to ctrl 1. [45] nvmet: adding queue 4 to ctrl 1. nvme nvme2: NVME-FC{0}: new ctrl: NQN "blktests-subsystem-1" nvme nvme3: NVME-FC{1}: create association : host wwpn 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN "nqn.2014-08.org.nvmexpress.discovery" (NULL device *): {0:1} Association created [43] nvmet: ctrl 2 start keep-alive timer for 120 secs nvmet: creating discovery controller 2 for subsystem nqn.2014-08.org.nvmexpress.discovery for NQN nqn.2014-08.org.nvmexpress:uuid:242d4a24-2484-4a80-8234-d0169409c5e8. nvme nvme3: NVME-FC{1}: new ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery" nvme nvme2: NVME-FC{0}: create association : host wwpn 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN "blktests-subsystem-1" (NULL device *): {0:2} Association created [24] nvmet: ctrl 3 start keep-alive timer for 5 secs nvmet: creating nvm controller 3 for subsystem blktests-subsystem-1 for NQN nqn.2014-08.org.nvmexpress:uuid:242d4a24-2484-4a80-8234-d0169409c5e8. [7] nvmet: adding queue 1 to ctrl 3. [24] nvmet: adding queue 2 to ctrl 3. [43] nvmet: adding queue 3 to ctrl 3. [45] nvmet: adding queue 4 to ctrl 3. nvme nvme2: NVME-FC{0}: controller connect complete nvme nvme2: Removing ctrl: NQN "blktests-subsystem-1" block nvme2n1: no available path - failing I/O block nvme2n1: no available path - failing I/O Buffer I/O error on dev nvme2n1, logical block 0, async page read [363] nvmet: ctrl 1 stop keep-alive (NULL device *): {0:0} Association deleted (NULL device *): {0:0} Association freed (NULL device *): Disconnect LS failed: No Association general protection fault, probably for non-canonical address 0xdffffc00000000a4: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000520-0x0000000000000527] CPU: 1 PID: 363 Comm: kworker/1:4 Tainted: G B W 6.4.0-rc2+ #4 451dee9b3635bb59af0c91bc19227b1b3bbbbf3f Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown unknown Workqueue: nvmet-wq fcloop_fcp_recv_work [nvme_fcloop] RIP: 0010:nvmet_execute_disc_get_log_page+0x23f/0x8c0 [nvmet] Code: e8 c6 12 63 e3 4c 89 6c 24 40 48 89 5c 24 08 4c 8b 3b 49 8d 9f 20 05 00 00 48 89 d8 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 48 89 df e8 93 12 63 e3 4c 89 74 24 30 4c 8b 2b RSP: 0018:ffff88811ab378a0 EFLAGS: 00010202 RAX: 00000000000000a4 RBX: 0000000000000520 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffffffffa95d2d70 RBP: ffff88811ab37ab0 R08: dffffc0000000000 R09: fffffbfff52ba5af R10: 0000000000000000 R11: dffffc0000000001 R12: 1ffff11023566f20 R13: ffff888107db3260 R14: ffff888107db3270 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88815a600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000002394000 CR3: 000000011e5e8006 CR4: 0000000000370ee0 Call Trace: <TASK> ? prepare_alloc_pages+0x1c5/0x580 ? __cfi_nvmet_execute_disc_get_log_page+0x10/0x10 [nvmet f0c9e080f6cf521f454ee104d12dafd11a5a7613] ? __alloc_pages+0x30e/0x650 ? slab_post_alloc_hook+0x67/0x350 ? __cfi___alloc_pages+0x10/0x10 ? alloc_pages+0x30e/0x530 ? sgl_alloc_order+0x118/0x320 nvmet_fc_queue_fcp_req+0xa19/0xda0 [nvmet_fc f192e753efa6aca39b2b3b72c07e648aac01348b] ? nvmet_fc_rcv_fcp_req+0x9c0/0x9c0 [nvmet_fc f192e753efa6aca39b2b3b72c07e648aac01348b] ? do_raw_spin_unlock+0x116/0x8a0 ? nvmet_fc_rcv_fcp_req+0x4de/0x9c0 [nvmet_fc f192e753efa6aca39b2b3b72c07e648aac01348b] nvmet_fc_rcv_fcp_req+0x4f0/0x9c0 [nvmet_fc f192e753efa6aca39b2b3b72c07e648aac01348b] fcloop_fcp_recv_work+0x173/0x440 [nvme_fcloop dcecfb508887e13f93cf8f37ef42c0fab90cbd00] process_one_work+0x80f/0xfb0 ? rescuer_thread+0x1130/0x1130 ? do_raw_spin_trylock+0xc9/0x1f0 ? lock_acquired+0x4a/0x9a0 ? wq_worker_sleeping+0x1e/0x200 worker_thread+0x91e/0x1260 ? do_raw_spin_unlock+0x116/0x8a0 kthread+0x25d/0x2f0 ? __cfi_worker_thread+0x10/0x10 ? __cfi_kthread+0x10/0x10 ret_from_fork+0x29/0x50 </TASK>
On 20/06/2023 16:27, Daniel Wagner wrote: > When the host has enabled the udev/systemd autoconnect services for the > fc transport it interacts with blktests and make tests break. > > nvme-cli learned to ignore connects attemps when using the > --context command line option paired with a volatile configuration. Thus > we can mark all the resources created by blktests and avoid any > interaction with the systemd autoconnect scripts. why would we like to ignore connect attempts during blktests ? We define unique nqn/ids/etc. So we never should disturb other running services, unless it uses the same parameters, which shouldn't happen. Agree on setting the hard coded values for hostnqn and hostid instead of reading the /etc/nvme/* files. This should do the work IMO, isn't it ? > > Signed-off-by: Daniel Wagner <dwagner@suse.de> > --- > tests/nvme/rc | 73 ++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 63 insertions(+), 10 deletions(-) > > diff --git a/tests/nvme/rc b/tests/nvme/rc > index 191f3e2e0c43..00117d314a38 100644 > --- a/tests/nvme/rc > +++ b/tests/nvme/rc > @@ -14,8 +14,8 @@ def_remote_wwnn="0x10001100aa000001" > def_remote_wwpn="0x20001100aa000001" > def_local_wwnn="0x10001100aa000002" > def_local_wwpn="0x20001100aa000002" > -def_hostnqn="$(cat /etc/nvme/hostnqn 2> /dev/null)" > -def_hostid="$(cat /etc/nvme/hostid 2> /dev/null)" > +def_hostnqn="nqn.2014-08.org.nvmexpress:uuid:242d4a24-2484-4a80-8234-d0169409c5e8" > +def_hostid="242d4a24-2484-4a80-8234-d0169409c5e8" > nvme_trtype=${nvme_trtype:-"loop"} > nvme_img_size=${nvme_img_size:-"1G"} > nvme_num_iter=${nvme_num_iter:-"1000"} > @@ -161,6 +161,50 @@ _nvme_calc_rand_io_size() { > echo "${io_size_kb}k" > } > > +_nvme_cli_supports_context() { > + if ! nvme connect --help 2>&1 | grep -q context > /dev/null; then > + return 1 > + fi > + return 0 > +} > + > +_setup_nvme_cli() { > + if ! _nvme_cli_supports_context; then > + return > + fi > + > + mkdir -p /run/nvme > + cat >> /run/nvme/blktests.json <<-EOF > + [ > + { > + "hostnqn": "${def_hostnqn}", > + "hostid": "${def_hostid}", > + "subsystems": [ > + { > + "application": "blktests", > + "nqn": "blktests-subsystem-1", > + "ports": [ > + { > + "transport": "fc", > + "traddr": "nn-${def_remote_wwnn}:pn-${def_remote_wwpn}", > + "host_traddr": "nn-${def_local_wwnn}:pn-${def_local_wwpn}" > + } > + ] > + } > + ] > + } > + ] > + EOF > +} > + > +_cleanup_nvme_cli() { > + if ! _nvme_cli_supports_context; then > + return > + fi > + > + rm -f /run/nvme/blktests.json > +} > + > _nvme_fcloop_add_rport() { > local local_wwnn="$1" > local local_wwpn="$2" > @@ -235,6 +279,8 @@ _cleanup_fcloop() { > _nvme_fcloop_del_lport "${local_wwnn}" "${local_wwpn}" > _nvme_fcloop_del_rport "${local_wwnn}" "${local_wwpn}" \ > "${remote_wwnn}" "${remote_wwpn}" > + > + _cleanup_nvme_cli > } > > _cleanup_nvmet() { > @@ -337,6 +383,8 @@ _setup_nvmet() { > def_host_traddr=$(printf "nn-%s:pn-%s" \ > "${def_local_wwnn}" \ > "${def_local_wwpn}") > + > + _setup_nvme_cli > fi > } > > @@ -436,18 +484,18 @@ _nvme_connect_subsys() { > trtype="$1" > subsysnqn="$2" > > - ARGS=(-t "${trtype}" -n "${subsysnqn}") > + ARGS=(-t "${trtype}" > + -n "${subsysnqn}" > + --hostnqn="${hostnqn}" > + --hostid="${hostid}") > + if _nvme_cli_supports_context; then > + ARGS+=(--context="blktests") > + fi > if [[ "${trtype}" == "fc" ]] ; then > ARGS+=(-a "${traddr}" -w "${host_traddr}") > elif [[ "${trtype}" != "loop" ]]; then > ARGS+=(-a "${traddr}" -s "${trsvcid}") > fi > - if [[ "${hostnqn}" != "$def_hostnqn" ]]; then > - ARGS+=(--hostnqn="${hostnqn}") > - fi > - if [[ "${hostid}" != "$def_hostid" ]]; then > - ARGS+=(--hostid="${hostid}") > - fi > if [[ -n "${hostkey}" ]]; then > ARGS+=(--dhchap-secret="${hostkey}") > fi > @@ -482,7 +530,12 @@ _nvme_discover() { > local host_traddr="${3:-$def_host_traddr}" > local trsvcid="${3:-$def_trsvcid}" > > - ARGS=(-t "${trtype}") > + ARGS=(-t "${trtype}" > + --hostnqn="${def_hostnqn}" > + --hostid="${def_hostid}") > + if _nvme_cli_supports_context; then > + ARGS+=(--context="blktests") > + fi > if [[ "${trtype}" = "fc" ]]; then > ARGS+=(-a "${traddr}" -w "${host_traddr}") > elif [[ "${trtype}" != "loop" ]]; then
On Thu, Jul 06, 2023 at 07:11:58PM +0300, Max Gurtovoy wrote: > > > On 20/06/2023 16:27, Daniel Wagner wrote: > > When the host has enabled the udev/systemd autoconnect services for the > > fc transport it interacts with blktests and make tests break. > > > > nvme-cli learned to ignore connects attemps when using the > > --context command line option paired with a volatile configuration. Thus > > we can mark all the resources created by blktests and avoid any > > interaction with the systemd autoconnect scripts. > > why would we like to ignore connect attempts during blktests ? The problem I observed is that there were two connect attempts (one triggered via udev and one via the test itself) which disturbed the test. The interference resulted into a complete hang of the test case: https://lore.kernel.org/linux-nvme/6g53yj5afhhonwf2psf7ft2gkakkwewy7klkix3y3u6uclpa5w@tt2yfzigyxgg/ > We define unique nqn/ids/etc. So we never should disturb other running > services, unless it uses the same parameters, which shouldn't happen. Yes, that should do the decoupling between udev and blktests. > Agree on setting the hard coded values for hostnqn and hostid instead of > reading the /etc/nvme/* files. This should do the work IMO, isn't it ? Do you mean that nvme-cli will ignore the udev trigger when a different hostnqn is invovled? At least for the well-known discover controller the hostqnq doesn't work. Though not sure if I understood you correctly here.
On 10/07/2023 11:27, Daniel Wagner wrote: > On Thu, Jul 06, 2023 at 07:11:58PM +0300, Max Gurtovoy wrote: >> >> >> On 20/06/2023 16:27, Daniel Wagner wrote: >>> When the host has enabled the udev/systemd autoconnect services for the >>> fc transport it interacts with blktests and make tests break. >>> >>> nvme-cli learned to ignore connects attemps when using the >>> --context command line option paired with a volatile configuration. Thus >>> we can mark all the resources created by blktests and avoid any >>> interaction with the systemd autoconnect scripts. >> >> why would we like to ignore connect attempts during blktests ? > > The problem I observed is that there were two connect attempts (one triggered > via udev and one via the test itself) which disturbed the test. The interference > resulted into a complete hang of the test case: > > https://lore.kernel.org/linux-nvme/6g53yj5afhhonwf2psf7ft2gkakkwewy7klkix3y3u6uclpa5w@tt2yfzigyxgg/ > >> We define unique nqn/ids/etc. So we never should disturb other running >> services, unless it uses the same parameters, which shouldn't happen. > > Yes, that should do the decoupling between udev and blktests. > >> Agree on setting the hard coded values for hostnqn and hostid instead of >> reading the /etc/nvme/* files. This should do the work IMO, isn't it ? > > Do you mean that nvme-cli will ignore the udev trigger when a different hostnqn > is invovled? At least for the well-known discover controller the hostqnq > doesn't work. Though not sure if I understood you correctly here. All I was saying is that your issue would have been fixed easily by 2-3 LOC by removing the usage of /etc/nvme/* files that are usually configured by system administrator and use a default values for blktests hostnqn and hostid. The usage of --context in blktests is redundant IMO.
On Mon, Jul 10, 2023 at 12:53:17PM +0300, Max Gurtovoy wrote: > > Agree on setting the hard coded values for hostnqn and hostid instead of > > > reading the /etc/nvme/* files. This should do the work IMO, isn't it ? > > > > Do you mean that nvme-cli will ignore the udev trigger when a different hostnqn > > is invovled? At least for the well-known discover controller the hostqnq > > doesn't work. Though not sure if I understood you correctly here. > > All I was saying is that your issue would have been fixed easily by 2-3 LOC > by removing the usage of /etc/nvme/* files that are usually configured by > system administrator and use a default values for blktests hostnqn and > hostid. Okay. > The usage of --context in blktests is redundant IMO. Right, I made a bit of a mess with the commit message. Sorry about that.
On 10/07/2023 13:19, Daniel Wagner wrote: > On Mon, Jul 10, 2023 at 12:53:17PM +0300, Max Gurtovoy wrote: > > > Agree on setting the hard coded values for hostnqn and hostid instead of >>>> reading the /etc/nvme/* files. This should do the work IMO, isn't it ? >>> >>> Do you mean that nvme-cli will ignore the udev trigger when a different hostnqn >>> is invovled? At least for the well-known discover controller the hostqnq >>> doesn't work. Though not sure if I understood you correctly here. >> >> All I was saying is that your issue would have been fixed easily by 2-3 LOC >> by removing the usage of /etc/nvme/* files that are usually configured by >> system administrator and use a default values for blktests hostnqn and >> hostid. > > Okay. > >> The usage of --context in blktests is redundant IMO. > > Right, I made a bit of a mess with the commit message. Sorry about that. I think it is more than just commit message. A lot of code that we can avoid was added regarding the --context cmdline argument. Maybe it's worth cleaning it..
On Mon, Jul 10, 2023 at 03:31:23PM +0300, Max Gurtovoy wrote: > I think it is more than just commit message. Okay, starting to understand what's the problem. > A lot of code that we can avoid was added regarding the --context cmdline > argument. Correct and it's not optional to get the tests passing for the fc transport. > Maybe it's worth cleaning it.. It really solves the problem that the autoconnect setup of nvme-cli is distrubing the tests (*). The only other way I found to stop the autoconnect is by disabling the udev rule completely. If autoconnect isn't enabled the context isn't necessary. Though changing system configuration from blktests seems at bit excessive. Another option is to detect if autoconect is enabled and report this when starting the tests. In this case we could remove the context part completely. Obviously, I would prefer to keep it but I am certaintaly not against dropping it and make blktests a bit simpler if this is the preference. I just need to remember to disable the autoconnect stuff when using blktests. (*) Sure we can fix this but at this point. Though I think it's a bit strange for a test suite to depend/interact with external components in this way.
On 10/07/2023 18:03, Daniel Wagner wrote: > On Mon, Jul 10, 2023 at 03:31:23PM +0300, Max Gurtovoy wrote: >> I think it is more than just commit message. > > Okay, starting to understand what's the problem. > >> A lot of code that we can avoid was added regarding the --context cmdline >> argument. > > Correct and it's not optional to get the tests passing for the fc transport. why the fc needs the --context to pass tests ? > >> Maybe it's worth cleaning it.. > > It really solves the problem that the autoconnect setup of nvme-cli is > distrubing the tests (*). The only other way I found to stop the autoconnect is > by disabling the udev rule completely. If autoconnect isn't enabled the context > isn't necessary. Though changing system configuration from blktests seems at bit > excessive. we should not stop any autoconnect during blktests. The autoconnect and all the system admin services should run normally. The blktests should not interfere with regular system configuration and should not use any sysadmin file/services/devices. It should create its own subsystems, nqn's, id's, namespaces, etc.. > > Another option is to detect if autoconect is enabled and report this when > starting the tests. In this case we could remove the context part completely. the --context might be used in the some sysadmin scripts or so. I don't see a point to do so in the blktests since we create a new association between initiator and target on each test (or on each group of tests). > Obviously, I would prefer to keep it but I am certaintaly not against dropping > it and make blktests a bit simpler if this is the preference. I just need to > remember to disable the autoconnect stuff when using blktests. > > (*) Sure we can fix this but at this point. Though I think it's a bit strange > for a test suite to depend/interact with external components in this way.
On Mon, Jul 10, 2023 at 07:30:20PM +0300, Max Gurtovoy wrote: > > > On 10/07/2023 18:03, Daniel Wagner wrote: > > On Mon, Jul 10, 2023 at 03:31:23PM +0300, Max Gurtovoy wrote: > > > I think it is more than just commit message. > > > > Okay, starting to understand what's the problem. > > > > > A lot of code that we can avoid was added regarding the --context cmdline > > > argument. > > > > Correct and it's not optional to get the tests passing for the fc transport. > > why the fc needs the --context to pass tests ? A typical nvme test consists out of following steps (nvme/004): // nvme target setup (1) _create_nvmet_subsystem "blktests-subsystem-1" "${loop_dev}" \ "91fdba0d-f87b-4c25-b80f-db7be1418b9e" _add_nvmet_subsys_to_port "${port}" "blktests-subsystem-1" // nvme host setup (2) _nvme_connect_subsys "${nvme_trtype}" blktests-subsystem-1 local nvmedev nvmedev=$(_find_nvme_dev "blktests-subsystem-1") cat "/sys/block/${nvmedev}n1/uuid" cat "/sys/block/${nvmedev}n1/wwid" // nvme host teardown (3) _nvme_disconnect_subsys blktests-subsystem-1 // nvme target teardown (4) _remove_nvmet_subsystem_from_port "${port}" "blktests-subsystem-1" _remove_nvmet_subsystem "blktests-subsystem-1" The corresponding output with --context run blktests nvme/004 at 2023-07-12 13:49:50 // (1) loop0: detected capacity change from 0 to 32768 nvmet: adding nsid 1 to subsystem blktests-subsystem-1 nvme nvme2: NVME-FC{0}: create association : host wwpn 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN "blktests-subsystem-1" (NULL device *): {0:0} Association created [174] nvmet: ctrl 1 start keep-alive timer for 5 secs // (2) nvmet: creating nvm controller 1 for subsystem blktests-subsystem-1 for NQN nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. [374] nvmet: adding queue 1 to ctrl 1. [1138] nvmet: adding queue 2 to ctrl 1. [73] nvmet: adding queue 3 to ctrl 1. [174] nvmet: adding queue 4 to ctrl 1. nvme nvme2: NVME-FC{0}: controller connect complete nvme nvme2: NVME-FC{0}: new ctrl: NQN "blktests-subsystem-1" // (3) nvme nvme2: Removing ctrl: NQN "blktests-subsystem-1" // (4) [1138] nvmet: ctrl 1 stop keep-alive (NULL device *): {0:0} Association deleted (NULL device *): {0:0} Association freed (NULL device *): Disconnect LS failed: No Association and without --context run blktests nvme/004 at 2023-07-12 13:50:33 // (1) loop1: detected capacity change from 0 to 32768 nvmet: adding nsid 1 to subsystem blktests-subsystem-1 nvme nvme2: NVME-FC{0}: create association : host wwpn 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN "nqn.2014-08.org.nvmexpress.discovery" (NULL device *): {0:0} Association created [1235] nvmet: ctrl 1 start keep-alive timer for 120 secs // XXX udev auto connect nvmet: creating discovery controller 1 for subsystem nqn.2014-08.org.nvmexpress.discovery for NQN nqn.2014-08.org.nvmexpress:uuid:242d4a24-2484-4a80-8234-d0169409c5e8. nvme nvme2: NVME-FC{0}: controller connect complete nvme nvme2: NVME-FC{0}: new ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery" nvme nvme3: NVME-FC{1}: create association : host wwpn 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN "blktests-subsystem-1" (NULL device *): {0:1} Association created [73] nvmet: ctrl 2 start keep-alive timer for 5 secs // (2) nvmet: creating nvm controller 2 for subsystem blktests-subsystem-1 for NQN nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. [374] nvmet: adding queue 1 to ctrl 2. [233] nvmet: adding queue 2 to ctrl 2. [73] nvmet: adding queue 3 to ctrl 2. [1235] nvmet: adding queue 4 to ctrl 2. nvme nvme3: NVME-FC{1}: controller connect complete nvme nvme3: NVME-FC{1}: new ctrl: NQN "blktests-subsystem-1" // (3) nvme nvme3: Removing ctrl: NQN "blktests-subsystem-1" general protection fault, probably for non-canonical address 0xdffffc00000000a4: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000520-0x0000000000000527] CPU: 1 PID: 2076 Comm: kworker/1:1 Tainted: G W 6.4.0-rc2+ #7 f2a41a58e59b44ee1bb7bc68087ccbe6d76392dd Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown unknown Workqueue: nvmet-wq fcloop_fcp_recv_work [nvme_fcloop] RIP: 0010:nvmet_execute_disc_get_log_page+0x23f/0x8c0 [nvmet] Code: e8 c6 12 c7 e0 4c 89 6c 24 40 48 89 5c 24 08 4c 8b 3b 49 8d 9f 20 05 00 00 48 89 d8 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 48 89 df e8 93 12 c7 e0 4c 89 74 24 30 4c 8b 2b RSP: 0018:ffff888139a778a0 EFLAGS: 00010202 RAX: 00000000000000a4 RBX: 0000000000000520 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffffffffa8af3a88 RBP: ffff888139a77ab0 R08: dffffc0000000000 R09: fffffbfff515e752 R10: 0000000000000000 R11: dffffc0000000001 R12: 1ffff1102734ef20 R13: ffff888105563260 R14: ffff888105563270 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88815a600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000824220 CR3: 0000000106512005 CR4: 0000000000370ee0 Call Trace: <TASK> ? prepare_alloc_pages+0x1c5/0x580 ? __cfi_nvmet_execute_disc_get_log_page+0x10/0x10 [nvmet 1da13efcd047161c3381cb240a95399f951fd98f] ? __alloc_pages+0x30e/0x650 ? slab_post_alloc_hook+0x67/0x350 ? __cfi___alloc_pages+0x10/0x10 ? alloc_pages+0x30e/0x530 ? sgl_alloc_order+0x118/0x320 nvmet_fc_queue_fcp_req+0xa19/0xda0 [nvmet_fc 11628cdb09a094fd591bfaf88be45b97e3b18e3a] ? nvmet_fc_rcv_fcp_req+0x9c0/0x9c0 [nvmet_fc 11628cdb09a094fd591bfaf88be45b97e3b18e3a] ? lockdep_hardirqs_on_prepare+0x2aa/0x5e0 ? nvmet_fc_rcv_fcp_req+0x4de/0x9c0 [nvmet_fc 11628cdb09a094fd591bfaf88be45b97e3b18e3a] nvmet_fc_rcv_fcp_req+0x4f0/0x9c0 [nvmet_fc 11628cdb09a094fd591bfaf88be45b97e3b18e3a] fcloop_fcp_recv_work+0x173/0x440 [nvme_fcloop 05cf1144b564c4e1626f9f15422ccf61f2af41de] process_one_work+0x80f/0xfb0 ? rescuer_thread+0x1130/0x1130 ? do_raw_spin_trylock+0xc9/0x1f0 ? lock_acquired+0x310/0x9a0 ? worker_thread+0xd5e/0x1260 worker_thread+0x91e/0x1260 ? __cfi_lock_release+0x10/0x10 ? do_raw_spin_unlock+0x116/0x8a0 kthread+0x25d/0x2f0 ? __cfi_worker_thread+0x10/0x10 ? __cfi_kthread+0x10/0x10 ret_from_fork+0x29/0x50 </TASK> > > > Maybe it's worth cleaning it.. > > > > It really solves the problem that the autoconnect setup of nvme-cli is > > distrubing the tests (*). The only other way I found to stop the autoconnect is > > by disabling the udev rule completely. If autoconnect isn't enabled the context > > isn't necessary. Though changing system configuration from blktests seems at bit > > excessive. > > we should not stop any autoconnect during blktests. The autoconnect and all > the system admin services should run normally. I do not agree here. The current blktests are not designed for run as intergration tests. Sure we should also tests this but currently blktests is just not there and tcp/rdma are not actually covered anyway. Thanks, Daniel
On 12/07/2023 15:04, Daniel Wagner wrote: > On Mon, Jul 10, 2023 at 07:30:20PM +0300, Max Gurtovoy wrote: >> >> >> On 10/07/2023 18:03, Daniel Wagner wrote: >>> On Mon, Jul 10, 2023 at 03:31:23PM +0300, Max Gurtovoy wrote: >>>> I think it is more than just commit message. >>> >>> Okay, starting to understand what's the problem. >>> >>>> A lot of code that we can avoid was added regarding the --context cmdline >>>> argument. >>> >>> Correct and it's not optional to get the tests passing for the fc transport. >> >> why the fc needs the --context to pass tests ? > > A typical nvme test consists out of following steps (nvme/004): > > // nvme target setup (1) > _create_nvmet_subsystem "blktests-subsystem-1" "${loop_dev}" \ > "91fdba0d-f87b-4c25-b80f-db7be1418b9e" > _add_nvmet_subsys_to_port "${port}" "blktests-subsystem-1" > > // nvme host setup (2) > _nvme_connect_subsys "${nvme_trtype}" blktests-subsystem-1 > > local nvmedev > nvmedev=$(_find_nvme_dev "blktests-subsystem-1") > cat "/sys/block/${nvmedev}n1/uuid" > cat "/sys/block/${nvmedev}n1/wwid" > > // nvme host teardown (3) > _nvme_disconnect_subsys blktests-subsystem-1 > > // nvme target teardown (4) > _remove_nvmet_subsystem_from_port "${port}" "blktests-subsystem-1" > _remove_nvmet_subsystem "blktests-subsystem-1" > > > The corresponding output with --context > > run blktests nvme/004 at 2023-07-12 13:49:50 > // (1) > loop0: detected capacity change from 0 to 32768 > nvmet: adding nsid 1 to subsystem blktests-subsystem-1 > nvme nvme2: NVME-FC{0}: create association : host wwpn 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN "blktests-subsystem-1" > (NULL device *): {0:0} Association created > [174] nvmet: ctrl 1 start keep-alive timer for 5 secs > // (2) > nvmet: creating nvm controller 1 for subsystem blktests-subsystem-1 for NQN nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. > [374] nvmet: adding queue 1 to ctrl 1. > [1138] nvmet: adding queue 2 to ctrl 1. > [73] nvmet: adding queue 3 to ctrl 1. > [174] nvmet: adding queue 4 to ctrl 1. > nvme nvme2: NVME-FC{0}: controller connect complete > nvme nvme2: NVME-FC{0}: new ctrl: NQN "blktests-subsystem-1" > // (3) > nvme nvme2: Removing ctrl: NQN "blktests-subsystem-1" > // (4) > [1138] nvmet: ctrl 1 stop keep-alive > (NULL device *): {0:0} Association deleted > (NULL device *): {0:0} Association freed > (NULL device *): Disconnect LS failed: No Association > > > and without --context > > run blktests nvme/004 at 2023-07-12 13:50:33 > // (1) > loop1: detected capacity change from 0 to 32768 > nvmet: adding nsid 1 to subsystem blktests-subsystem-1 > nvme nvme2: NVME-FC{0}: create association : host wwpn 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN "nqn.2014-08.org.nvmexpress.discovery" why does this association to discovery controller created ? because of some system service ? can we configure the blktests subsystem not to be discovered or add some access list to it ? > (NULL device *): {0:0} Association created > [1235] nvmet: ctrl 1 start keep-alive timer for 120 secs > // XXX udev auto connect > nvmet: creating discovery controller 1 for subsystem nqn.2014-08.org.nvmexpress.discovery for NQN nqn.2014-08.org.nvmexpress:uuid:242d4a24-2484-4a80-8234-d0169409c5e8. > nvme nvme2: NVME-FC{0}: controller connect complete > nvme nvme2: NVME-FC{0}: new ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery" > nvme nvme3: NVME-FC{1}: create association : host wwpn 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN "blktests-subsystem-1" > (NULL device *): {0:1} Association created > [73] nvmet: ctrl 2 start keep-alive timer for 5 secs > // (2) > nvmet: creating nvm controller 2 for subsystem blktests-subsystem-1 for NQN nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. > [374] nvmet: adding queue 1 to ctrl 2. > [233] nvmet: adding queue 2 to ctrl 2. > [73] nvmet: adding queue 3 to ctrl 2. > [1235] nvmet: adding queue 4 to ctrl 2. > nvme nvme3: NVME-FC{1}: controller connect complete > nvme nvme3: NVME-FC{1}: new ctrl: NQN "blktests-subsystem-1" > // (3) > nvme nvme3: Removing ctrl: NQN "blktests-subsystem-1" bellow sounds like a bug we should fix :) > general protection fault, probably for non-canonical address 0xdffffc00000000a4: 0000 [#1] PREEMPT SMP KASAN NOPTI > KASAN: null-ptr-deref in range [0x0000000000000520-0x0000000000000527] > CPU: 1 PID: 2076 Comm: kworker/1:1 Tainted: G W 6.4.0-rc2+ #7 f2a41a58e59b44ee1bb7bc68087ccbe6d76392dd > Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown unknown > Workqueue: nvmet-wq fcloop_fcp_recv_work [nvme_fcloop] > RIP: 0010:nvmet_execute_disc_get_log_page+0x23f/0x8c0 [nvmet] > Code: e8 c6 12 c7 e0 4c 89 6c 24 40 48 89 5c 24 08 4c 8b 3b 49 8d 9f 20 05 00 00 48 89 d8 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 48 89 df e8 93 12 c7 e0 4c 89 74 24 30 4c 8b 2b > RSP: 0018:ffff888139a778a0 EFLAGS: 00010202 > RAX: 00000000000000a4 RBX: 0000000000000520 RCX: dffffc0000000000 > RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffffffffa8af3a88 > RBP: ffff888139a77ab0 R08: dffffc0000000000 R09: fffffbfff515e752 > R10: 0000000000000000 R11: dffffc0000000001 R12: 1ffff1102734ef20 > R13: ffff888105563260 R14: ffff888105563270 R15: 0000000000000000 > FS: 0000000000000000(0000) GS:ffff88815a600000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 0000000000824220 CR3: 0000000106512005 CR4: 0000000000370ee0 > Call Trace: > <TASK> > ? prepare_alloc_pages+0x1c5/0x580 > ? __cfi_nvmet_execute_disc_get_log_page+0x10/0x10 [nvmet 1da13efcd047161c3381cb240a95399f951fd98f] > ? __alloc_pages+0x30e/0x650 > ? slab_post_alloc_hook+0x67/0x350 > ? __cfi___alloc_pages+0x10/0x10 > ? alloc_pages+0x30e/0x530 > ? sgl_alloc_order+0x118/0x320 > nvmet_fc_queue_fcp_req+0xa19/0xda0 [nvmet_fc 11628cdb09a094fd591bfaf88be45b97e3b18e3a] > ? nvmet_fc_rcv_fcp_req+0x9c0/0x9c0 [nvmet_fc 11628cdb09a094fd591bfaf88be45b97e3b18e3a] > ? lockdep_hardirqs_on_prepare+0x2aa/0x5e0 > ? nvmet_fc_rcv_fcp_req+0x4de/0x9c0 [nvmet_fc 11628cdb09a094fd591bfaf88be45b97e3b18e3a] > nvmet_fc_rcv_fcp_req+0x4f0/0x9c0 [nvmet_fc 11628cdb09a094fd591bfaf88be45b97e3b18e3a] > fcloop_fcp_recv_work+0x173/0x440 [nvme_fcloop 05cf1144b564c4e1626f9f15422ccf61f2af41de] > process_one_work+0x80f/0xfb0 > ? rescuer_thread+0x1130/0x1130 > ? do_raw_spin_trylock+0xc9/0x1f0 > ? lock_acquired+0x310/0x9a0 > ? worker_thread+0xd5e/0x1260 > worker_thread+0x91e/0x1260 > ? __cfi_lock_release+0x10/0x10 > ? do_raw_spin_unlock+0x116/0x8a0 > kthread+0x25d/0x2f0 > ? __cfi_worker_thread+0x10/0x10 > ? __cfi_kthread+0x10/0x10 > ret_from_fork+0x29/0x50 > </TASK> > >>>> Maybe it's worth cleaning it.. >>> >>> It really solves the problem that the autoconnect setup of nvme-cli is >>> distrubing the tests (*). The only other way I found to stop the autoconnect is >>> by disabling the udev rule completely. If autoconnect isn't enabled the context >>> isn't necessary. Though changing system configuration from blktests seems at bit >>> excessive. >> >> we should not stop any autoconnect during blktests. The autoconnect and all >> the system admin services should run normally. > > I do not agree here. The current blktests are not designed for run as > intergration tests. Sure we should also tests this but currently blktests is > just not there and tcp/rdma are not actually covered anyway. what do you mean tcp/rdma not covered ? And maybe we should make several changes in the blktests to make it standalone without interfering the existing configuration make by some system administrator. > > Thanks, > Daniel
On 7/13/23 02:12, Max Gurtovoy wrote: > > > On 12/07/2023 15:04, Daniel Wagner wrote: >> On Mon, Jul 10, 2023 at 07:30:20PM +0300, Max Gurtovoy wrote: >>> >>> >>> On 10/07/2023 18:03, Daniel Wagner wrote: >>>> On Mon, Jul 10, 2023 at 03:31:23PM +0300, Max Gurtovoy wrote: >>>>> I think it is more than just commit message. >>>> >>>> Okay, starting to understand what's the problem. >>>> >>>>> A lot of code that we can avoid was added regarding the --context >>>>> cmdline >>>>> argument. >>>> >>>> Correct and it's not optional to get the tests passing for the fc >>>> transport. >>> >>> why the fc needs the --context to pass tests ? >> >> A typical nvme test consists out of following steps (nvme/004): >> >> // nvme target setup (1) >> _create_nvmet_subsystem "blktests-subsystem-1" "${loop_dev}" \ >> "91fdba0d-f87b-4c25-b80f-db7be1418b9e" >> _add_nvmet_subsys_to_port "${port}" "blktests-subsystem-1" >> >> // nvme host setup (2) >> _nvme_connect_subsys "${nvme_trtype}" blktests-subsystem-1 >> >> local nvmedev >> nvmedev=$(_find_nvme_dev "blktests-subsystem-1") >> cat "/sys/block/${nvmedev}n1/uuid" >> cat "/sys/block/${nvmedev}n1/wwid" >> >> // nvme host teardown (3) >> _nvme_disconnect_subsys blktests-subsystem-1 >> >> // nvme target teardown (4) >> _remove_nvmet_subsystem_from_port "${port}" "blktests-subsystem-1" >> _remove_nvmet_subsystem "blktests-subsystem-1" >> >> >> The corresponding output with --context >> >> run blktests nvme/004 at 2023-07-12 13:49:50 >> // (1) >> loop0: detected capacity change from 0 to 32768 >> nvmet: adding nsid 1 to subsystem blktests-subsystem-1 >> nvme nvme2: NVME-FC{0}: create association : host wwpn >> 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN >> "blktests-subsystem-1" >> (NULL device *): {0:0} Association created >> [174] nvmet: ctrl 1 start keep-alive timer for 5 secs >> // (2) >> nvmet: creating nvm controller 1 for subsystem blktests-subsystem-1 >> for NQN >> nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. >> [374] nvmet: adding queue 1 to ctrl 1. >> [1138] nvmet: adding queue 2 to ctrl 1. >> [73] nvmet: adding queue 3 to ctrl 1. >> [174] nvmet: adding queue 4 to ctrl 1. >> nvme nvme2: NVME-FC{0}: controller connect complete >> nvme nvme2: NVME-FC{0}: new ctrl: NQN "blktests-subsystem-1" >> // (3) >> nvme nvme2: Removing ctrl: NQN "blktests-subsystem-1" >> // (4) >> [1138] nvmet: ctrl 1 stop keep-alive >> (NULL device *): {0:0} Association deleted >> (NULL device *): {0:0} Association freed >> (NULL device *): Disconnect LS failed: No Association >> >> >> and without --context >> >> run blktests nvme/004 at 2023-07-12 13:50:33 >> // (1) >> loop1: detected capacity change from 0 to 32768 >> nvmet: adding nsid 1 to subsystem blktests-subsystem-1 >> nvme nvme2: NVME-FC{0}: create association : host wwpn >> 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN >> "nqn.2014-08.org.nvmexpress.discovery" > > why does this association to discovery controller created ? because of > some system service ? > Yes. There are nvme-autoconnect udev rules and systemd services installed per default (in quite some systems now). And it's really hard (if not impossible) to disable these services (as we cannot be sure how they are named, hence we wouldn't know which service to disable. > can we configure the blktests subsystem not to be discovered or add some > access list to it ? > But that's precisely what the '--context' thing is attempting to do ... [ .. ] >>>> >>>> It really solves the problem that the autoconnect setup of nvme-cli is >>>> distrubing the tests (*). The only other way I found to stop the >>>> autoconnect is by disabling the udev rule completely. If autoconnect >>>> isn't enabled the context isn't necessary. >>>> Though changing system configuration from blktests seems at bit >>>> excessive. >>> >>> we should not stop any autoconnect during blktests. The autoconnect >>> and all the system admin services should run normally. >> >> I do not agree here. The current blktests are not designed for run as >> intergration tests. Sure we should also tests this but currently >> blktests is just not there and tcp/rdma are not actually covered anyway. > > what do you mean tcp/rdma not covered ? > Because there is no autoconnect functionality for tcp/rdma. For FC we have full topology information, and the driver can emit udev messages whenever a NVMe port appears in the fabrics (and the systemd machinery will then start autoconnect). For TCP/RDMA we do not have this, so really there's nothing which could send udev events (discounting things like mDNS and nvme-stas for now). > And maybe we should make several changes in the blktests to make it > standalone without interfering the existing configuration make by some > system administrator. > ?? But this is what we are trying with this patches. The '--context' flag only needs to be set for the blktests, to inform the rest of the system that these subsystems/configuration is special and should be exempted from 'normal' system processing. Cheers, Hannes
On 13/07/2023 9:00, Hannes Reinecke wrote: > On 7/13/23 02:12, Max Gurtovoy wrote: >> >> >> On 12/07/2023 15:04, Daniel Wagner wrote: >>> On Mon, Jul 10, 2023 at 07:30:20PM +0300, Max Gurtovoy wrote: >>>> >>>> >>>> On 10/07/2023 18:03, Daniel Wagner wrote: >>>>> On Mon, Jul 10, 2023 at 03:31:23PM +0300, Max Gurtovoy wrote: >>>>>> I think it is more than just commit message. >>>>> >>>>> Okay, starting to understand what's the problem. >>>>> >>>>>> A lot of code that we can avoid was added regarding the --context >>>>>> cmdline >>>>>> argument. >>>>> >>>>> Correct and it's not optional to get the tests passing for the fc >>>>> transport. >>>> >>>> why the fc needs the --context to pass tests ? >>> >>> A typical nvme test consists out of following steps (nvme/004): >>> >>> // nvme target setup (1) >>> _create_nvmet_subsystem "blktests-subsystem-1" "${loop_dev}" \ >>> "91fdba0d-f87b-4c25-b80f-db7be1418b9e" >>> _add_nvmet_subsys_to_port "${port}" "blktests-subsystem-1" >>> >>> // nvme host setup (2) >>> _nvme_connect_subsys "${nvme_trtype}" blktests-subsystem-1 >>> >>> local nvmedev >>> nvmedev=$(_find_nvme_dev "blktests-subsystem-1") >>> cat "/sys/block/${nvmedev}n1/uuid" >>> cat "/sys/block/${nvmedev}n1/wwid" >>> >>> // nvme host teardown (3) >>> _nvme_disconnect_subsys blktests-subsystem-1 >>> >>> // nvme target teardown (4) >>> _remove_nvmet_subsystem_from_port "${port}" "blktests-subsystem-1" >>> _remove_nvmet_subsystem "blktests-subsystem-1" >>> >>> >>> The corresponding output with --context >>> >>> run blktests nvme/004 at 2023-07-12 13:49:50 >>> // (1) >>> loop0: detected capacity change from 0 to 32768 >>> nvmet: adding nsid 1 to subsystem blktests-subsystem-1 >>> nvme nvme2: NVME-FC{0}: create association : host wwpn >>> 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN >>> "blktests-subsystem-1" >>> (NULL device *): {0:0} Association created >>> [174] nvmet: ctrl 1 start keep-alive timer for 5 secs >>> // (2) >>> nvmet: creating nvm controller 1 for subsystem blktests-subsystem-1 >>> for NQN >>> nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. >>> [374] nvmet: adding queue 1 to ctrl 1. >>> [1138] nvmet: adding queue 2 to ctrl 1. >>> [73] nvmet: adding queue 3 to ctrl 1. >>> [174] nvmet: adding queue 4 to ctrl 1. >>> nvme nvme2: NVME-FC{0}: controller connect complete >>> nvme nvme2: NVME-FC{0}: new ctrl: NQN "blktests-subsystem-1" >>> // (3) >>> nvme nvme2: Removing ctrl: NQN "blktests-subsystem-1" >>> // (4) >>> [1138] nvmet: ctrl 1 stop keep-alive >>> (NULL device *): {0:0} Association deleted >>> (NULL device *): {0:0} Association freed >>> (NULL device *): Disconnect LS failed: No Association >>> >>> >>> and without --context >>> >>> run blktests nvme/004 at 2023-07-12 13:50:33 >>> // (1) >>> loop1: detected capacity change from 0 to 32768 >>> nvmet: adding nsid 1 to subsystem blktests-subsystem-1 >>> nvme nvme2: NVME-FC{0}: create association : host wwpn >>> 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN >>> "nqn.2014-08.org.nvmexpress.discovery" >> >> why does this association to discovery controller created ? because of >> some system service ? >> > Yes. There are nvme-autoconnect udev rules and systemd services > installed per default (in quite some systems now). > And it's really hard (if not impossible) to disable these services (as > we cannot be sure how they are named, hence we wouldn't know which > service to disable. Right. We shouldn't disable them IMO. > >> can we configure the blktests subsystem not to be discovered or add >> some access list to it ? >> > But that's precisely what the '--context' thing is attempting to do ... I'm not sure it is. Exposing the subsystem is from the target configuration side. Additionally, the --context (which is in the initiator/host side), according to Daniel, is there to distinguish between different invocations. I proposed that blktests subsystem will not be part of discoverable fabric or protected somehow by access list. Therefore, no additional invocation will happen. > > [ .. ] >>>>> >>>>> It really solves the problem that the autoconnect setup of nvme-cli is >>>>> distrubing the tests (*). The only other way I found to stop the >>>>> autoconnect is by disabling the udev rule completely. If >>>>> autoconnect isn't enabled the context isn't necessary. >>>>> Though changing system configuration from blktests seems at bit >>>>> excessive. >>>> >>>> we should not stop any autoconnect during blktests. The autoconnect >>>> and all the system admin services should run normally. >>> >>> I do not agree here. The current blktests are not designed for run as >>> intergration tests. Sure we should also tests this but currently >>> blktests is just not there and tcp/rdma are not actually covered anyway. >> >> what do you mean tcp/rdma not covered ? >> > Because there is no autoconnect functionality for tcp/rdma. > For FC we have full topology information, and the driver can emit udev > messages whenever a NVMe port appears in the fabrics (and the systemd > machinery will then start autoconnect). > For TCP/RDMA we do not have this, so really there's nothing which could > send udev events (discounting things like mDNS and nvme-stas for now). > >> And maybe we should make several changes in the blktests to make it >> standalone without interfering the existing configuration make by some >> system administrator. >> > ?? > But this is what we are trying with this patches. > The '--context' flag only needs to be set for the blktests, to inform > the rest of the system that these subsystems/configuration is special > and should be exempted from 'normal' system processing. The --context is initiator configuration. I'm referring to changes in the target configuration. This will guarantee that things will work also in the environment where we have nvme-cli without the --context flag. > > Cheers, > > Hannes
On 7/13/23 10:49, Max Gurtovoy wrote: > > > On 13/07/2023 9:00, Hannes Reinecke wrote: >> On 7/13/23 02:12, Max Gurtovoy wrote: >>> >>> >>> On 12/07/2023 15:04, Daniel Wagner wrote: >>>> On Mon, Jul 10, 2023 at 07:30:20PM +0300, Max Gurtovoy wrote: >>>>> >>>>> >>>>> On 10/07/2023 18:03, Daniel Wagner wrote: >>>>>> On Mon, Jul 10, 2023 at 03:31:23PM +0300, Max Gurtovoy wrote: >>>>>>> I think it is more than just commit message. >>>>>> >>>>>> Okay, starting to understand what's the problem. >>>>>> >>>>>>> A lot of code that we can avoid was added regarding the --context >>>>>>> cmdline >>>>>>> argument. >>>>>> >>>>>> Correct and it's not optional to get the tests passing for the fc >>>>>> transport. >>>>> >>>>> why the fc needs the --context to pass tests ? >>>> >>>> A typical nvme test consists out of following steps (nvme/004): >>>> >>>> // nvme target setup (1) >>>> _create_nvmet_subsystem "blktests-subsystem-1" "${loop_dev}" \ >>>> "91fdba0d-f87b-4c25-b80f-db7be1418b9e" >>>> _add_nvmet_subsys_to_port "${port}" "blktests-subsystem-1" >>>> >>>> // nvme host setup (2) >>>> _nvme_connect_subsys "${nvme_trtype}" blktests-subsystem-1 >>>> >>>> local nvmedev >>>> nvmedev=$(_find_nvme_dev "blktests-subsystem-1") >>>> cat "/sys/block/${nvmedev}n1/uuid" >>>> cat "/sys/block/${nvmedev}n1/wwid" >>>> >>>> // nvme host teardown (3) >>>> _nvme_disconnect_subsys blktests-subsystem-1 >>>> >>>> // nvme target teardown (4) >>>> _remove_nvmet_subsystem_from_port "${port}" "blktests-subsystem-1" >>>> _remove_nvmet_subsystem "blktests-subsystem-1" >>>> >>>> >>>> The corresponding output with --context >>>> >>>> run blktests nvme/004 at 2023-07-12 13:49:50 >>>> // (1) >>>> loop0: detected capacity change from 0 to 32768 >>>> nvmet: adding nsid 1 to subsystem blktests-subsystem-1 >>>> nvme nvme2: NVME-FC{0}: create association : host wwpn >>>> 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN >>>> "blktests-subsystem-1" >>>> (NULL device *): {0:0} Association created >>>> [174] nvmet: ctrl 1 start keep-alive timer for 5 secs >>>> // (2) >>>> nvmet: creating nvm controller 1 for subsystem >>>> blktests-subsystem-1 for NQN >>>> nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349. >>>> [374] nvmet: adding queue 1 to ctrl 1. >>>> [1138] nvmet: adding queue 2 to ctrl 1. >>>> [73] nvmet: adding queue 3 to ctrl 1. >>>> [174] nvmet: adding queue 4 to ctrl 1. >>>> nvme nvme2: NVME-FC{0}: controller connect complete >>>> nvme nvme2: NVME-FC{0}: new ctrl: NQN "blktests-subsystem-1" >>>> // (3) >>>> nvme nvme2: Removing ctrl: NQN "blktests-subsystem-1" >>>> // (4) >>>> [1138] nvmet: ctrl 1 stop keep-alive >>>> (NULL device *): {0:0} Association deleted >>>> (NULL device *): {0:0} Association freed >>>> (NULL device *): Disconnect LS failed: No Association >>>> >>>> >>>> and without --context >>>> >>>> run blktests nvme/004 at 2023-07-12 13:50:33 >>>> // (1) >>>> loop1: detected capacity change from 0 to 32768 >>>> nvmet: adding nsid 1 to subsystem blktests-subsystem-1 >>>> nvme nvme2: NVME-FC{0}: create association : host wwpn >>>> 0x20001100aa000002 rport wwpn 0x20001100aa000001: NQN >>>> "nqn.2014-08.org.nvmexpress.discovery" >>> >>> why does this association to discovery controller created ? because >>> of some system service ? >>> >> Yes. There are nvme-autoconnect udev rules and systemd services >> installed per default (in quite some systems now). >> And it's really hard (if not impossible) to disable these services (as >> we cannot be sure how they are named, hence we wouldn't know which >> service to disable. > > Right. We shouldn't disable them IMO. > >> >>> can we configure the blktests subsystem not to be discovered or add >>> some access list to it ? >>> >> But that's precisely what the '--context' thing is attempting to do ... > > I'm not sure it is. > > Exposing the subsystem is from the target configuration side. > Additionally, the --context (which is in the initiator/host side), > according to Daniel, is there to distinguish between different > invocations. I proposed that blktests subsystem will not be part of > discoverable fabric or protected somehow by access list. Therefore, no > additional invocation will happen. > Hmm. Maybe we can tweak blktest to use it's own HostNQN, and always pass that for any nvme-cli call. Daniel? Cheers, Hannes
On Thu, Jul 13, 2023 at 12:14:14PM +0200, Hannes Reinecke wrote: > > Exposing the subsystem is from the target configuration side. > > Additionally, the --context (which is in the initiator/host side), > > according to Daniel, is there to distinguish between different > > invocations. I proposed that blktests subsystem will not be part of > > discoverable fabric or protected somehow by access list. Therefore, no > > additional invocation will happen. I am confused. This is exactly what the whole --context thing is. > Hmm. Maybe we can tweak blktest to use it's own HostNQN, and always pass > that for any nvme-cli call. This is what the current code already does.
On Thu, Jul 13, 2023 at 12:30:45PM +0200, Daniel Wagner wrote: > On Thu, Jul 13, 2023 at 12:14:14PM +0200, Hannes Reinecke wrote: > > > Exposing the subsystem is from the target configuration side. > > > Additionally, the --context (which is in the initiator/host side), > > > according to Daniel, is there to distinguish between different > > > invocations. I proposed that blktests subsystem will not be part of > > > discoverable fabric or protected somehow by access list. Therefore, no > > > additional invocation will happen. > > I am confused. This is exactly what the whole --context thing is. Ah I think I got it now. You want me to set allow_hosts on the target side too :)
On 13/07/2023 13:44, Daniel Wagner wrote: > On Thu, Jul 13, 2023 at 12:30:45PM +0200, Daniel Wagner wrote: >> On Thu, Jul 13, 2023 at 12:14:14PM +0200, Hannes Reinecke wrote: >>>> Exposing the subsystem is from the target configuration side. >>>> Additionally, the --context (which is in the initiator/host side), >>>> according to Daniel, is there to distinguish between different >>>> invocations. I proposed that blktests subsystem will not be part of >>>> discoverable fabric or protected somehow by access list. Therefore, no >>>> additional invocation will happen. >> >> I am confused. This is exactly what the whole --context thing is. > > Ah I think I got it now. You want me to set allow_hosts on the target side too > :) Yes. With the unique hostId/hostNqn for blktests this should work without the need for --context mechanism, that was probably added for system administrators and not for unit_testing/QA/Verification engineers that run blktests.
diff --git a/tests/nvme/rc b/tests/nvme/rc index 191f3e2e0c43..00117d314a38 100644 --- a/tests/nvme/rc +++ b/tests/nvme/rc @@ -14,8 +14,8 @@ def_remote_wwnn="0x10001100aa000001" def_remote_wwpn="0x20001100aa000001" def_local_wwnn="0x10001100aa000002" def_local_wwpn="0x20001100aa000002" -def_hostnqn="$(cat /etc/nvme/hostnqn 2> /dev/null)" -def_hostid="$(cat /etc/nvme/hostid 2> /dev/null)" +def_hostnqn="nqn.2014-08.org.nvmexpress:uuid:242d4a24-2484-4a80-8234-d0169409c5e8" +def_hostid="242d4a24-2484-4a80-8234-d0169409c5e8" nvme_trtype=${nvme_trtype:-"loop"} nvme_img_size=${nvme_img_size:-"1G"} nvme_num_iter=${nvme_num_iter:-"1000"} @@ -161,6 +161,50 @@ _nvme_calc_rand_io_size() { echo "${io_size_kb}k" } +_nvme_cli_supports_context() { + if ! nvme connect --help 2>&1 | grep -q context > /dev/null; then + return 1 + fi + return 0 +} + +_setup_nvme_cli() { + if ! _nvme_cli_supports_context; then + return + fi + + mkdir -p /run/nvme + cat >> /run/nvme/blktests.json <<-EOF + [ + { + "hostnqn": "${def_hostnqn}", + "hostid": "${def_hostid}", + "subsystems": [ + { + "application": "blktests", + "nqn": "blktests-subsystem-1", + "ports": [ + { + "transport": "fc", + "traddr": "nn-${def_remote_wwnn}:pn-${def_remote_wwpn}", + "host_traddr": "nn-${def_local_wwnn}:pn-${def_local_wwpn}" + } + ] + } + ] + } + ] + EOF +} + +_cleanup_nvme_cli() { + if ! _nvme_cli_supports_context; then + return + fi + + rm -f /run/nvme/blktests.json +} + _nvme_fcloop_add_rport() { local local_wwnn="$1" local local_wwpn="$2" @@ -235,6 +279,8 @@ _cleanup_fcloop() { _nvme_fcloop_del_lport "${local_wwnn}" "${local_wwpn}" _nvme_fcloop_del_rport "${local_wwnn}" "${local_wwpn}" \ "${remote_wwnn}" "${remote_wwpn}" + + _cleanup_nvme_cli } _cleanup_nvmet() { @@ -337,6 +383,8 @@ _setup_nvmet() { def_host_traddr=$(printf "nn-%s:pn-%s" \ "${def_local_wwnn}" \ "${def_local_wwpn}") + + _setup_nvme_cli fi } @@ -436,18 +484,18 @@ _nvme_connect_subsys() { trtype="$1" subsysnqn="$2" - ARGS=(-t "${trtype}" -n "${subsysnqn}") + ARGS=(-t "${trtype}" + -n "${subsysnqn}" + --hostnqn="${hostnqn}" + --hostid="${hostid}") + if _nvme_cli_supports_context; then + ARGS+=(--context="blktests") + fi if [[ "${trtype}" == "fc" ]] ; then ARGS+=(-a "${traddr}" -w "${host_traddr}") elif [[ "${trtype}" != "loop" ]]; then ARGS+=(-a "${traddr}" -s "${trsvcid}") fi - if [[ "${hostnqn}" != "$def_hostnqn" ]]; then - ARGS+=(--hostnqn="${hostnqn}") - fi - if [[ "${hostid}" != "$def_hostid" ]]; then - ARGS+=(--hostid="${hostid}") - fi if [[ -n "${hostkey}" ]]; then ARGS+=(--dhchap-secret="${hostkey}") fi @@ -482,7 +530,12 @@ _nvme_discover() { local host_traddr="${3:-$def_host_traddr}" local trsvcid="${3:-$def_trsvcid}" - ARGS=(-t "${trtype}") + ARGS=(-t "${trtype}" + --hostnqn="${def_hostnqn}" + --hostid="${def_hostid}") + if _nvme_cli_supports_context; then + ARGS+=(--context="blktests") + fi if [[ "${trtype}" = "fc" ]]; then ARGS+=(-a "${traddr}" -w "${host_traddr}") elif [[ "${trtype}" != "loop" ]]; then
When the host has enabled the udev/systemd autoconnect services for the fc transport it interacts with blktests and make tests break. nvme-cli learned to ignore connects attemps when using the --context command line option paired with a volatile configuration. Thus we can mark all the resources created by blktests and avoid any interaction with the systemd autoconnect scripts. Signed-off-by: Daniel Wagner <dwagner@suse.de> --- tests/nvme/rc | 73 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 10 deletions(-)