diff mbox series

[for-rc] RDMA/core: Fix panic when port_data isn't initialized

Message ID 20190523071251.7931-1-kamalheib1@gmail.com (mailing list archive)
State Accepted
Delegated to: Jason Gunthorpe
Headers show
Series [for-rc] RDMA/core: Fix panic when port_data isn't initialized | expand

Commit Message

Kamal Heib May 23, 2019, 7:12 a.m. UTC
This happens if assign_name() return failure when called from
ib_register_device(), that will lead to the following panic in every
time that someone touches the port_data's data members.

All the functions that touch the port_data during teardown are called
from ib_device_release() except free_netdevs() that called from multiple
contexts, that's why the check had to be in both of them.

BUG: unable to handle kernel NULL pointer dereference at 00000000000000c0
PGD 0 P4D 0
Oops: 0002 [#1] SMP PTI
CPU: 19 PID: 1994 Comm: systemd-udevd Not tainted 5.1.0-rc5+ #1
Hardware name: HP ProLiant DL360p Gen8, BIOS P71 12/20/2013
RIP: 0010:_raw_spin_lock_irqsave+0x1e/0x40
Code: 85 ff 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 53 9c 58 66 66 90
66 90 48 89 c3 fa 66 66 90 66 66 90 31 c0 ba 01 00 00 00 <f0> 0f b1 17 0f
94 c2 84 d2 74 05 48 89 d8 5b c3 89 c6 e8 b4 85 8a
RSP: 0018:ffffa8d7079a7c08 EFLAGS: 00010046
RAX: 0000000000000000 RBX: 0000000000000202 RCX: ffffa8d7079a7bf8
RDX: 0000000000000001 RSI: ffff93607c990000 RDI: 00000000000000c0
RBP: 0000000000000001 R08: 0000000000000000 R09: ffffffffc08c4dd8
R10: 0000000000000000 R11: 0000000000000001 R12: 00000000000000c0
R13: ffff93607c990000 R14: ffffffffc05a9740 R15: ffffa8d7079a7e98
FS:  00007f1c6ee438c0(0000) GS:ffff93609f6c0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000000000c0 CR3: 0000000819fca002 CR4: 00000000000606e0
Call Trace:
 free_netdevs+0x4d/0xe0 [ib_core]
 ib_dealloc_device+0x51/0xb0 [ib_core]
 __mlx5_ib_add+0x5e/0x70 [mlx5_ib]
 mlx5_add_device+0x57/0xe0 [mlx5_core]
 mlx5_register_interface+0x85/0xc0 [mlx5_core]
 ? 0xffffffffc0474000
 do_one_initcall+0x4e/0x1d4
 ? _cond_resched+0x15/0x30
 ? kmem_cache_alloc_trace+0x15f/0x1c0
 do_init_module+0x5a/0x218
 load_module+0x186b/0x1e40
 ? m_show+0x1c0/0x1c0
 __do_sys_finit_module+0x94/0xe0
 do_syscall_64+0x5b/0x180
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 8ceb1357b337 ("RDMA/device: Consolidate ib_device per_port data into one place")
Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
---
 drivers/infiniband/core/device.c | 6 ++++++
 1 file changed, 6 insertions(+)

Comments

Jason Gunthorpe May 27, 2019, 5:53 p.m. UTC | #1
On Thu, May 23, 2019 at 10:12:51AM +0300, Kamal Heib wrote:
> This happens if assign_name() return failure when called from
> ib_register_device(), that will lead to the following panic in every
> time that someone touches the port_data's data members.
> 
> All the functions that touch the port_data during teardown are called
> from ib_device_release() except free_netdevs() that called from multiple
> contexts, that's why the check had to be in both of them.
> 
> BUG: unable to handle kernel NULL pointer dereference at 00000000000000c0
> PGD 0 P4D 0
> Oops: 0002 [#1] SMP PTI
> CPU: 19 PID: 1994 Comm: systemd-udevd Not tainted 5.1.0-rc5+ #1
> Hardware name: HP ProLiant DL360p Gen8, BIOS P71 12/20/2013
> RIP: 0010:_raw_spin_lock_irqsave+0x1e/0x40
> Code: 85 ff 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 53 9c 58 66 66 90
> 66 90 48 89 c3 fa 66 66 90 66 66 90 31 c0 ba 01 00 00 00 <f0> 0f b1 17 0f
> 94 c2 84 d2 74 05 48 89 d8 5b c3 89 c6 e8 b4 85 8a
> RSP: 0018:ffffa8d7079a7c08 EFLAGS: 00010046
> RAX: 0000000000000000 RBX: 0000000000000202 RCX: ffffa8d7079a7bf8
> RDX: 0000000000000001 RSI: ffff93607c990000 RDI: 00000000000000c0
> RBP: 0000000000000001 R08: 0000000000000000 R09: ffffffffc08c4dd8
> R10: 0000000000000000 R11: 0000000000000001 R12: 00000000000000c0
> R13: ffff93607c990000 R14: ffffffffc05a9740 R15: ffffa8d7079a7e98
> FS:  00007f1c6ee438c0(0000) GS:ffff93609f6c0000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00000000000000c0 CR3: 0000000819fca002 CR4: 00000000000606e0
> Call Trace:
>  free_netdevs+0x4d/0xe0 [ib_core]
>  ib_dealloc_device+0x51/0xb0 [ib_core]
>  __mlx5_ib_add+0x5e/0x70 [mlx5_ib]
>  mlx5_add_device+0x57/0xe0 [mlx5_core]
>  mlx5_register_interface+0x85/0xc0 [mlx5_core]
>  ? 0xffffffffc0474000
>  do_one_initcall+0x4e/0x1d4
>  ? _cond_resched+0x15/0x30
>  ? kmem_cache_alloc_trace+0x15f/0x1c0
>  do_init_module+0x5a/0x218
>  load_module+0x186b/0x1e40
>  ? m_show+0x1c0/0x1c0
>  __do_sys_finit_module+0x94/0xe0
>  do_syscall_64+0x5b/0x180
>  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> 
> Fixes: 8ceb1357b337 ("RDMA/device: Consolidate ib_device per_port data into one place")
> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
>  drivers/infiniband/core/device.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
> index 78dc07c6ac4b..788bd56b6694 100644
> +++ b/drivers/infiniband/core/device.c
> @@ -472,6 +472,9 @@ static void ib_device_release(struct device *device)
>  {
>  	struct ib_device *dev = container_of(device, struct ib_device, dev);
>  
> +	if (!dev->port_data)
> +		return;
> +

This skips the kfree on the device though. I fixed it like this, and
applied to for-rc

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index cd6b679badfe49..29f7b15c81d946 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -491,14 +491,15 @@ static void ib_device_release(struct device *device)
 
 	free_netdevs(dev);
 	WARN_ON(refcount_read(&dev->refcount));
-	ib_cache_release_one(dev);
-	ib_security_release_port_pkey_list(dev);
-	xa_destroy(&dev->compat_devs);
-	xa_destroy(&dev->client_data);
-	if (dev->port_data)
+	if (dev->port_data) {
+		ib_cache_release_one(dev);
+		ib_security_release_port_pkey_list(dev);
 		kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu,
 				       pdata[0]),
 			  rcu_head);
+	}
+	xa_destroy(&dev->compat_devs);
+	xa_destroy(&dev->client_data);
 	kfree_rcu(dev, rcu_head);
 }
 
@@ -1952,6 +1953,9 @@ static void free_netdevs(struct ib_device *ib_dev)
 	unsigned long flags;
 	unsigned int port;
 
+	if (!ib_dev->port_data)
+		return;
+
 	rdma_for_each_port (ib_dev, port) {
 		struct ib_port_data *pdata = &ib_dev->port_data[port];
 		struct net_device *ndev;
diff mbox series

Patch

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 78dc07c6ac4b..788bd56b6694 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -472,6 +472,9 @@  static void ib_device_release(struct device *device)
 {
 	struct ib_device *dev = container_of(device, struct ib_device, dev);
 
+	if (!dev->port_data)
+		return;
+
 	free_netdevs(dev);
 	WARN_ON(refcount_read(&dev->refcount));
 	ib_cache_release_one(dev);
@@ -1935,6 +1938,9 @@  static void free_netdevs(struct ib_device *ib_dev)
 	unsigned long flags;
 	unsigned int port;
 
+	if (!ib_dev->port_data)
+		return;
+
 	rdma_for_each_port (ib_dev, port) {
 		struct ib_port_data *pdata = &ib_dev->port_data[port];
 		struct net_device *ndev;