diff mbox

nvmeof rdma regression issue on 4.14.0-rc1 (or maybe mlx4?)

Message ID 4faccebf-7fcf-58b9-1605-82ee9acc652b@grimberg.me (mailing list archive)
State Deferred
Headers show

Commit Message

Sagi Grimberg Sept. 25, 2017, 7:06 a.m. UTC
> Hi Sagi
> I did git bisect for this issue, seems it was introduced by your patch 
> "Few more patches from the centralization set".
> Here is the testing on the patch, let me know if  you need more info.

Thanks for reporting.

> BAD  148b4e7 nvme-rdma: stop queues instead of simply flipping their state
> BAD  a57bd54 nvme-rdma: introduce configure/destroy io queues
> Log:
> [  127.899255] nvme nvme0: new ctrl: NQN 
> "nqn.2014-08.org.nvmexpress.discovery", addr 172.31.0.90:4420
> [  128.074263] nvme nvme0: creating 40 I/O queues.
> [  128.581822] nvme nvme0: new ctrl: NQN "testnqn", addr 172.31.0.90:4420
> [  177.486110] print_req_error: I/O error, dev nvme0n1, sector 0
> [  191.256637] nvme nvme0: Reconnecting in 10 seconds...
> [  201.855846] nvme nvme0: Connect rejected: status 8 (invalid service ID).
> [  201.863353] nvme nvme0: rdma_resolve_addr wait failed (-104).
> [  201.869824] nvme nvme0: Failed reconnect attempt 1
> [  201.875183] nvme nvme0: Reconnecting in 10 seconds...
> [  212.087828] nvme nvme0: Connect rejected: status 8 (invalid service ID).
> [  212.095330] nvme nvme0: rdma_resolve_addr wait failed (-104).
> [  212.101766] nvme nvme0: Failed reconnect attempt 2
> [  212.107129] nvme nvme0: Reconnecting in 10 seconds...
> [  222.328398] nvme nvme0: Connect rejected: status 8 (invalid service ID).
> [  222.335900] nvme nvme0: rdma_resolve_addr wait failed (-104).
> [  222.342335] nvme nvme0: Failed reconnect attempt 3
> [  222.347699] nvme nvme0: Reconnecting in 10 seconds...
> [  232.567791] nvme nvme0: Connect rejected: status 8 (invalid service ID).
> [  232.575292] nvme nvme0: rdma_resolve_addr wait failed (-104).
> [  232.581730] nvme nvme0: Failed reconnect attempt 4
> [  232.587094] nvme nvme0: Reconnecting in 10 seconds...
> [  242.827727] nvme nvme0: creating 40 I/O queues.
> [  242.832810] DMAR: ERROR: DMA PTE for vPFN 0xe129b already set (to 
> 103c692002 not 1000915003)
> [  242.842265] ------------[ cut here ]------------
> [  242.847437] WARNING: CPU: 0 PID: 783 at 
> drivers/iommu/intel-iommu.c:2299 __domain_mapping+0x363/0x370
> [  242.857755] Modules linked in: nvme_rdma nvme_fabrics nvme_core 
> sch_mqprio ebtable_filter ebtables ip6table_filter ip6_tables 
> iptable_filter bridge 8021q garp mrp stp llc rpcrdma ib_isert 
> iscsi_target_mod ibd
> [  242.936919]  mgag200 i2c_algo_bit drm_kms_helper syscopyarea 
> sysfillrect sysimgblt fb_sys_fops ttm drm mlx4_core tg3 ahci libahci 
> crc32c_intel ptp libata i2c_core pps_core devlink dm_mirror 
> dm_region_hash dmd
> [  242.958625] CPU: 0 PID: 783 Comm: kworker/u368:1 Not tainted 
> 4.13.0-rc7.a57bd54+ #15
> [  242.967304] Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 
> 1.6.2 01/08/2016
> [  242.975687] Workqueue: ib_addr process_one_req [ib_core]
> [  242.981631] task: ffff881019491740 task.stack: ffffc9000b534000
> [  242.989011] RIP: 0010:__domain_mapping+0x363/0x370
> [  242.995108] RSP: 0018:ffffc9000b537a50 EFLAGS: 00010202
> [  243.001694] RAX: 0000000000000004 RBX: 0000001000915003 RCX: 
> 0000000000000000
> [  243.010433] RDX: 0000000000000000 RSI: ffff88103e60e038 RDI: 
> ffff88103e60e038
> [  243.019170] RBP: ffffc9000b537ab0 R08: 0000000000000000 R09: 
> 0000000000000000
> [  243.027893] R10: 00000000000002f7 R11: 0000000001000915 R12: 
> ffff88201ea5c4d8
> [  243.036632] R13: 0000000000000001 R14: 0000000000000001 R15: 
> 00000000000e129b
> [  243.045348] FS:  0000000000000000(0000) GS:ffff88103e600000(0000) 
> knlGS:0000000000000000
> [  243.055142] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  243.062337] CR2: 00007f9d3b0fab70 CR3: 00000020245b7000 CR4: 
> 00000000001406f0
> [  243.071076] Call Trace:
> [  243.074564]  __intel_map_single+0xeb/0x180
> [  243.079903]  intel_alloc_coherent+0xb5/0x130
> [  243.085445]  mlx4_buf_alloc+0xe5/0x1c0 [mlx4_core]
> [  243.091555]  mlx4_ib_alloc_cq_buf.isra.9+0x38/0xd0 [mlx4_ib]
> [  243.098621]  mlx4_ib_create_cq+0x223/0x440 [mlx4_ib]
> [  243.104901]  ? find_gid.isra.5+0x167/0x1f0 [ib_core]
> [  243.111178]  ib_alloc_cq+0x49/0x170 [ib_core]
> [  243.116791]  nvme_rdma_cm_handler+0x3e7/0x886 [nvme_rdma]
> [  243.123557]  ? cma_attach_to_dev+0x17/0x50 [rdma_cm]
> [  243.129838]  ? cma_acquire_dev+0x1e3/0x3b0 [rdma_cm]
> [  243.136115]  ? account_entity_dequeue+0xaa/0xe0
> [  243.141918]  addr_handler+0xa4/0x1c0 [rdma_cm]
> [  243.147604]  process_one_req+0x8d/0x120 [ib_core]
> [  243.153585]  process_one_work+0x149/0x360
> [  243.158807]  worker_thread+0x4d/0x3c0
> [  243.163618]  kthread+0x109/0x140
> [  243.167936]  ? rescuer_thread+0x380/0x380
> [  243.173131]  ? kthread_park+0x60/0x60
> [  243.177930]  ret_from_fork+0x25/0x30

I'm not able to reproduce this.

I still don't understand how we call ib_alloc_cq and get to
a mapped buffer, its calling dma_alloc_coherent.

Can you try the following patch and report what's going on?
--
                 goto requeue;
@@ -938,7 +934,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct 
work_struct *work)
         if (ctrl->ctrl.queue_count > 1) {
                 ret = nvme_rdma_configure_io_queues(ctrl, false);
                 if (ret)
-                       goto requeue;
+                       goto destroy_admin;
         }

         changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -956,6 +952,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct 
work_struct *work)

         return;

+destroy_admin:
+       nvme_rdma_destroy_admin_queue(ctrl, false);
  requeue:
         dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
                         ctrl->ctrl.nr_reconnects);
@@ -990,6 +988,10 @@ static void nvme_rdma_error_recovery_work(struct 
work_struct *work)
         blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
         nvme_start_queues(&ctrl->ctrl);

+       if (ctrl->ctrl.queue_count > 1)
+               nvme_rdma_destroy_io_queues(ctrl, false);
+       nvme_rdma_destroy_admin_queue(ctrl, false);
+
         nvme_rdma_reconnect_or_remove(ctrl);
  }
--

I guess we could see weird phenomenons if we have a resource leak,
but I ran kmemleak and could not get anything in this area...

> Panic after connection with below commits, detailed log here: 
> https://pastebin.com/7z0XSGSd
> 31fdf18     nvme-rdma: reuse configure/destroy_admin_queue
> 3f02fff       nvme-rdma: don't free tagset on resets
> 18398af    nvme-rdma: disable the controller on resets
> b28a308   nvme-rdma: move tagset allocation to a dedicated routine
> 
> good    34b6c23 nvme: Add admin_tagset pointer to nvme_ctrl

Is that a reproducible panic? I'm not seeing this at all.

Can you run gdb on nvme-rdma.ko
$ l *(nvme_rdma_create_ctrl+0x37d)
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Yi Zhang Sept. 25, 2017, 11:01 a.m. UTC | #1
Hi Sagi
Pls see my comments bellow.
> I'm not able to reproduce this.
>
I've reproduce this on below two environment, and the target side I used 
is nullbk
[root@rdma-virt-00 ~]$ lspci | grep -i mel
04:00.0 Network controller: Mellanox Technologies MT27520 Family 
[ConnectX-3 Pro]
04:00.1 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
04:00.2 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
04:00.3 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
04:00.4 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
04:00.5 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
04:00.6 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
04:00.7 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
06:00.0 Network controller: Mellanox Technologies MT27520 Family 
[ConnectX-3 Pro]
06:00.1 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
06:00.2 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
06:00.3 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
06:00.4 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
06:00.5 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
06:00.6 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]
06:00.7 Network controller: Mellanox Technologies MT27500/MT27520 Family 
[ConnectX-3/ConnectX-3 Pro Virtual Function]

[root@rdma-virt-03 ~]$ lspci | grep -i mel
04:00.0 Infiniband controller: Mellanox Technologies MT27700 Family 
[ConnectX-4]
04:00.1 Infiniband controller: Mellanox Technologies MT27700 Family 
[ConnectX-4]
05:00.0 Ethernet controller: Mellanox Technologies MT27710 Family 
[ConnectX-4 Lx]
05:00.1 Ethernet controller: Mellanox Technologies MT27710 Family 
[ConnectX-4 Lx]


> I still don't understand how we call ib_alloc_cq and get to
> a mapped buffer, its calling dma_alloc_coherent.
>
> Can you try the following patch and report what's going on?
> -- 
>
Here is the log, didn't get the error log, but the nvme0n1 device node 
also doesn't exist on host.
[  144.993253] nvme nvme0: new ctrl: NQN 
"nqn.2014-08.org.nvmexpress.discovery", addr 172.31.0.90:4420
[  145.177205] nvme nvme0: creating 40 I/O queues.
[  145.711199] nvme nvme0: new ctrl: NQN "testnqn", addr 172.31.0.90:4420
[  156.482770] nvme nvme0: Reconnecting in 10 seconds...
[  166.881540] nvme nvme0: Connect rejected: status 8 (invalid service ID).
[  166.889043] nvme nvme0: rdma_resolve_addr wait failed (-104).
[  166.895478] nvme nvme0: Failed reconnect attempt 1
[  166.900840] nvme nvme0: Reconnecting in 10 seconds...
[  177.120933] nvme nvme0: Connect rejected: status 8 (invalid service ID).
[  177.128434] nvme nvme0: rdma_resolve_addr wait failed (-104).
[  177.134866] nvme nvme0: Failed reconnect attempt 2
[  177.140227] nvme nvme0: Reconnecting in 10 seconds...
[  187.360819] nvme nvme0: Connect rejected: status 8 (invalid service ID).
[  187.368321] nvme nvme0: rdma_resolve_addr wait failed (-104).
[  187.374752] nvme nvme0: Failed reconnect attempt 3
[  187.380113] nvme nvme0: Reconnecting in 10 seconds...
[  197.620302] nvme nvme0: creating 40 I/O queues.
[  198.227963] nvme nvme0: Successfully reconnected
[  198.228046] nvme nvme0: identifiers changed for nsid 1

[root@rdma-virt-01 ~]$ lsblk
NAME                           MAJ:MIN RM   SIZE RO TYPE MOUNTPOINT
sda                              8:0    0 465.8G  0 disk
├─sda2                           8:2    0 464.8G  0 part
│ ├─rhelaa_rdma--virt--01-swap 253:1    0     4G  0 lvm  [SWAP]
│ ├─rhelaa_rdma--virt--01-home 253:2    0 410.8G  0 lvm  /home
│ └─rhelaa_rdma--virt--01-root 253:0    0    50G  0 lvm  /
└─sda1                           8:1    0     1G  0 part /boot

> -- 
>
> I guess we could see weird phenomenons if we have a resource leak,
> but I ran kmemleak and could not get anything in this area...
>
>> Panic after connection with below commits, detailed log here: 
>> https://pastebin.com/7z0XSGSd
>> 31fdf18     nvme-rdma: reuse configure/destroy_admin_queue
>> 3f02fff       nvme-rdma: don't free tagset on resets
>> 18398af    nvme-rdma: disable the controller on resets
>> b28a308   nvme-rdma: move tagset allocation to a dedicated routine
>>
>> good    34b6c23 nvme: Add admin_tagset pointer to nvme_ctrl
>
> Is that a reproducible panic? I'm not seeing this at all.
>

Yes, I can reproduce every time. And the target side kernel version is 
4.14.0-rc1 during the panic occurred.

> Can you run gdb on nvme-rdma.ko
> $ l *(nvme_rdma_create_ctrl+0x37d)
>
[root@rdma-virt-01 linux ((31fdf18...))]$ gdb 
/usr/lib/modules/4.13.0-rc7.31fdf18+/kernel/drivers/nvme/host/nvme-rdma.ko
GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-100.el7
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later 
<http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from 
/usr/lib/modules/4.13.0-rc7.31fdf18+/kernel/drivers/nvme/host/nvme-rdma.ko...done.
(gdb) l *(nvme_rdma_create_ctrl+0x37d)
0x297d is in nvme_rdma_create_ctrl (drivers/nvme/host/rdma.c:656).
651        struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
652        struct blk_mq_tag_set *set = admin ?
653                &ctrl->admin_tag_set : &ctrl->tag_set;
654
655        blk_mq_free_tag_set(set);
656        nvme_rdma_dev_put(ctrl->device);
657    }
658
659    static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct 
nvme_ctrl *nctrl,
660            bool admin)
(gdb)

> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sagi Grimberg Sept. 25, 2017, 11:15 a.m. UTC | #2
> Here is the log, didn't get the error log, but the nvme0n1 device node 
> also doesn't exist on host.

That's because the identifiers changed maybe? did you have nvm0n2 instead?

> Yes, I can reproduce every time. And the target side kernel version is 
> 4.14.0-rc1 during the panic occurred.

What is the host side kernel version? doesn't look like 4.14.0-rc1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 92a03ff5fb4d..3befaa0c53ff 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -927,10 +927,6 @@  static void nvme_rdma_reconnect_ctrl_work(struct 
work_struct *work)

         ++ctrl->ctrl.nr_reconnects;

-       if (ctrl->ctrl.queue_count > 1)
-               nvme_rdma_destroy_io_queues(ctrl, false);
-
-       nvme_rdma_destroy_admin_queue(ctrl, false);
         ret = nvme_rdma_configure_admin_queue(ctrl, false);
         if (ret)