diff mbox

[v3] IB/IPoIB: ibX: failed to create mcg debug file

Message ID 1490599139-12665-1-git-send-email-shamir.rabinovitch@oracle.com (mailing list archive)
State Superseded
Headers show

Commit Message

Shamir Rabinovitch March 27, 2017, 7:18 a.m. UTC
When udev renames the netdev devices, ipoib debugfs entries does not
get renamed. As a result, if subsequent probe of ipoib device reuse the
name then creating a debugfs entry for the new device would fail.

Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as part
of ipoib event handling in order to avoid any race condition between these.

Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>

---
v2->v3:
- Move rev change out of commit message
- Fix typo
v1->v2:
- Review comment from Mark Bloch <markb@mellanox.com>
  Verified again and NETDEV_UNREGISTER is called correctly. Debug
  files are removed as expected. Thanks.
- Fix compile warning when CONFIG_INFINIBAND_IPOIB_DEBUG is not set
---

---
 drivers/infiniband/ulp/ipoib/ipoib.h      |    1 +
 drivers/infiniband/ulp/ipoib/ipoib_fs.c   |   28 +++++++++++++++++++
 drivers/infiniband/ulp/ipoib/ipoib_main.c |   41 ++++++++++++++++++++++++++--
 3 files changed, 67 insertions(+), 3 deletions(-)

Comments

Mark Bloch March 27, 2017, 3:06 p.m. UTC | #1
Hi Shamir,

Like I've said in v1 of this patch, I believe we are calling ipoib_delete_debug_files too many times.
We are calling in unconditionally in: ipoib_dev_cleanup
and also in ipoib_netdev_event when we get an NETDEV_UNREGISTER event.

For example, I have a setup with ConnectX-4 dual port configured to be in IB mode.
So I have two ipoib interfaces (ib0, ib1)

When I load and unload mlx5_ib (while ib_ipoib is loaded:

root@dev-r-vrt-175 tools]# ./funccount.py 'ipoib_*_debug_files'
Tracing 2 functions for "ipoib_*_debug_files"... Hit Ctrl-C to end.
^C
FUNC                                    COUNT
ipoib_create_debug_files                    2
ipoib_delete_debug_files                    4
Detaching...

Why not just remove the call in ipoib_dev_cleanup?

On 27/03/2017 10:18, Shamir Rabinovitch wrote:
> When udev renames the netdev devices, ipoib debugfs entries does not 
> get renamed. As a result, if subsequent probe of ipoib device reuse
> the name then creating a debugfs entry for the new device would
> fail.
> 
> Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as
> part of ipoib event handling in order to avoid any race condition
> between these.
> 
> Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com> Signed-off-by:
> Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
> 
> --- v2->v3: - Move rev change out of commit message - Fix typo 
> v1->v2: - Review comment from Mark Bloch <markb@mellanox.com> 
> Verified again and NETDEV_UNREGISTER is called correctly. Debug files
> are removed as expected. Thanks. - Fix compile warning when
> CONFIG_INFINIBAND_IPOIB_DEBUG is not set ---
> 
> --- drivers/infiniband/ulp/ipoib/ipoib.h      |    1 + 
> drivers/infiniband/ulp/ipoib/ipoib_fs.c   |   28 +++++++++++++++++++ 
> drivers/infiniband/ulp/ipoib/ipoib_main.c |   41
> ++++++++++++++++++++++++++-- 3 files changed, 67 insertions(+), 3
> deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h
> b/drivers/infiniband/ulp/ipoib/ipoib.h index da12717..95a457f 100644 
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++
> b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -770,6 +770,7 @@ static
> inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct
> ib_wc *w void ipoib_delete_debug_files(struct net_device *dev); int
> ipoib_register_debugfs(void); void ipoib_unregister_debugfs(void); 
> +void ipoib_debugfs_rename(struct net_device *dev); #else static
> inline void ipoib_create_debug_files(struct net_device *dev) { } 
> static inline void ipoib_delete_debug_files(struct net_device *dev) {
> } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
> b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 6bd5740..d849b1d
> 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++
> b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -259,6 +259,34 @@ static
> int ipoib_path_open(struct inode *inode, struct file *file) .release
> = seq_release };
> 
> +void ipoib_debugfs_rename(struct net_device *dev) +{ +	struct
> ipoib_dev_priv *priv = netdev_priv(dev); +	char name[IFNAMSIZ +
> sizeof "_path"]; + +	if (!ipoib_root) +		return; + +	if
> (priv->mcg_dentry) { +		snprintf(name, sizeof(name), "%s_mcg",
> dev->name); +		priv->mcg_dentry = debugfs_rename(ipoib_root,
> priv->mcg_dentry, +						  ipoib_root, name); +		if
> (!priv->mcg_dentry) +			ipoib_warn(priv, "failed to rename debug file
> %s to %s\n", +				   priv->mcg_dentry->d_iname, name); +	} + +	if
> (priv->path_dentry) { +		snprintf(name, sizeof(name), "%s_path",
> dev->name); +		priv->path_dentry = debugfs_rename(ipoib_root, +
> priv->path_dentry, +						   ipoib_root, name); +		if
> (!priv->path_dentry) +			ipoib_warn(priv, "failed to rename debug
> file %s to %s\n", +				   priv->mcg_dentry->d_iname, name); +	} +} + 
> void ipoib_create_debug_files(struct net_device *dev) { struct
> ipoib_dev_priv *priv = netdev_priv(dev); diff --git
> a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> b/drivers/infiniband/ulp/ipoib/ipoib_main.c index b58d9dc..c84b8ee
> 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++
> b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -108,6 +108,32 @@
> struct ipoib_path_iter { .get_net_dev_by_params =
> ipoib_get_net_dev_by_params, };
> 
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +static int
> ipoib_netdev_event(struct notifier_block *this, +			      unsigned
> long event, void *ptr) +{ +	struct netdev_notifier_info *ni = ptr; +
> struct net_device *dev = ni->dev; + +	if (dev->netdev_ops->ndo_open
> != ipoib_open) +		return NOTIFY_DONE; + +	switch (event) { +	case
> NETDEV_REGISTER: +		ipoib_create_debug_files(dev); +		break; +	case
> NETDEV_CHANGENAME: +		ipoib_debugfs_rename(dev); +		break; +	case
> NETDEV_UNREGISTER: +		ipoib_delete_debug_files(dev); +		break; +	} + 
> +	return NOTIFY_DONE; +} +#endif + int ipoib_open(struct net_device
> *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -2072,8
> +2098,6 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv,
> struct ib_device *hca) goto register_failed; }
> 
> -	ipoib_create_debug_files(priv->dev); - if
> (ipoib_cm_add_mode_attr(priv->dev)) goto sysfs_failed; if
> (ipoib_add_pkey_attr(priv->dev)) @@ -2088,7 +2112,6 @@ int
> ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device
> *hca) return priv->dev;
> 
> sysfs_failed: -	ipoib_delete_debug_files(priv->dev); 
> unregister_netdev(priv->dev);
> 
> register_failed: @@ -2173,6 +2196,12 @@ static void
> ipoib_remove_one(struct ib_device *device, void *client_data) 
> kfree(dev_list); }
> 
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +static struct notifier_block
> ipoib_netdev_notifier = { +	.notifier_call = ipoib_netdev_event, +}; 
> +#endif + static int __init ipoib_init_module(void) { int ret; @@
> -2225,6 +2254,9 @@ static int __init ipoib_init_module(void) if
> (ret) goto err_client;
> 
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG +
> register_netdevice_notifier(&ipoib_netdev_notifier); +#endif return
> 0;
> 
> err_client: @@ -2242,6 +2274,9 @@ static int __init
> ipoib_init_module(void)
> 
> static void __exit ipoib_cleanup_module(void) { +#ifdef
> CONFIG_INFINIBAND_IPOIB_DEBUG +
> unregister_netdevice_notifier(&ipoib_netdev_notifier); +#endif 
> ipoib_netlink_fini(); ib_unregister_client(&ipoib_client); 
> ib_sa_unregister_client(&ipoib_sa_client);
> 

Mark.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky March 27, 2017, 7:55 p.m. UTC | #2
On Mon, Mar 27, 2017 at 03:18:59AM -0400, Shamir Rabinovitch wrote:
> When udev renames the netdev devices, ipoib debugfs entries does not
> get renamed. As a result, if subsequent probe of ipoib device reuse the
> name then creating a debugfs entry for the new device would fail.
>
> Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as part
> of ipoib event handling in order to avoid any race condition between these.
>
> Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
> Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
>
> ---
> v2->v3:
> - Move rev change out of commit message
> - Fix typo
> v1->v2:
> - Review comment from Mark Bloch <markb@mellanox.com>
>   Verified again and NETDEV_UNREGISTER is called correctly. Debug
>   files are removed as expected. Thanks.
> - Fix compile warning when CONFIG_INFINIBAND_IPOIB_DEBUG is not set
> ---
>
> ---
>  drivers/infiniband/ulp/ipoib/ipoib.h      |    1 +
>  drivers/infiniband/ulp/ipoib/ipoib_fs.c   |   28 +++++++++++++++++++
>  drivers/infiniband/ulp/ipoib/ipoib_main.c |   41 ++++++++++++++++++++++++++--
>  3 files changed, 67 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
> index da12717..95a457f 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib.h
> +++ b/drivers/infiniband/ulp/ipoib/ipoib.h
> @@ -770,6 +770,7 @@ static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *w
>  void ipoib_delete_debug_files(struct net_device *dev);
>  int ipoib_register_debugfs(void);
>  void ipoib_unregister_debugfs(void);
> +void ipoib_debugfs_rename(struct net_device *dev);
>  #else
>  static inline void ipoib_create_debug_files(struct net_device *dev) { }
>  static inline void ipoib_delete_debug_files(struct net_device *dev) { }
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
> index 6bd5740..d849b1d 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
> @@ -259,6 +259,34 @@ static int ipoib_path_open(struct inode *inode, struct file *file)
>  	.release = seq_release
>  };
>
> +void ipoib_debugfs_rename(struct net_device *dev)
> +{
> +	struct ipoib_dev_priv *priv = netdev_priv(dev);
> +	char name[IFNAMSIZ + sizeof "_path"];
> +
> +	if (!ipoib_root)
> +		return;
> +
> +	if (priv->mcg_dentry) {
> +		snprintf(name, sizeof(name), "%s_mcg", dev->name);
> +		priv->mcg_dentry = debugfs_rename(ipoib_root, priv->mcg_dentry,
> +						  ipoib_root, name);
> +		if (!priv->mcg_dentry)
> +			ipoib_warn(priv, "failed to rename debug file %s to %s\n",
> +				   priv->mcg_dentry->d_iname, name);
> +	}
> +
> +	if (priv->path_dentry) {
> +		snprintf(name, sizeof(name), "%s_path", dev->name);
> +		priv->path_dentry = debugfs_rename(ipoib_root,
> +						   priv->path_dentry,
> +						   ipoib_root, name);
> +		if (!priv->path_dentry)
> +			ipoib_warn(priv, "failed to rename debug file %s to %s\n",
> +				   priv->mcg_dentry->d_iname, name);
> +	}
> +}
> +
>  void ipoib_create_debug_files(struct net_device *dev)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
> diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> index b58d9dc..c84b8ee 100644
> --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
> +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
> @@ -108,6 +108,32 @@ struct ipoib_path_iter {
>  	.get_net_dev_by_params = ipoib_get_net_dev_by_params,
>  };
>
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> +static int ipoib_netdev_event(struct notifier_block *this,
> +			      unsigned long event, void *ptr)
> +{
> +	struct netdev_notifier_info *ni = ptr;
> +	struct net_device *dev = ni->dev;
> +
> +	if (dev->netdev_ops->ndo_open != ipoib_open)
> +		return NOTIFY_DONE;
> +
> +	switch (event) {
> +	case NETDEV_REGISTER:
> +		ipoib_create_debug_files(dev);
> +		break;
> +	case NETDEV_CHANGENAME:
> +		ipoib_debugfs_rename(dev);

Why do we need explicit ipoib_debugfs_rename function?
Will it work by simply calling ipoib_delete_debug_files
and immediately after that ipoib_create_debug_files?


> +		break;
> +	case NETDEV_UNREGISTER:
> +		ipoib_delete_debug_files(dev);
> +		break;
> +	}
> +
> +	return NOTIFY_DONE;
> +}
> +#endif
> +
>  int ipoib_open(struct net_device *dev)
>  {
>  	struct ipoib_dev_priv *priv = netdev_priv(dev);
> @@ -2072,8 +2098,6 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
>  		goto register_failed;
>  	}
>
> -	ipoib_create_debug_files(priv->dev);
> -
>  	if (ipoib_cm_add_mode_attr(priv->dev))
>  		goto sysfs_failed;
>  	if (ipoib_add_pkey_attr(priv->dev))
> @@ -2088,7 +2112,6 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
>  	return priv->dev;
>
>  sysfs_failed:
> -	ipoib_delete_debug_files(priv->dev);
>  	unregister_netdev(priv->dev);
>
>  register_failed:
> @@ -2173,6 +2196,12 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
>  	kfree(dev_list);
>  }
>
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> +static struct notifier_block ipoib_netdev_notifier = {
> +	.notifier_call = ipoib_netdev_event,
> +};
> +#endif
> +
>  static int __init ipoib_init_module(void)
>  {
>  	int ret;
> @@ -2225,6 +2254,9 @@ static int __init ipoib_init_module(void)
>  	if (ret)
>  		goto err_client;
>
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> +	register_netdevice_notifier(&ipoib_netdev_notifier);
> +#endif
>  	return 0;
>
>  err_client:
> @@ -2242,6 +2274,9 @@ static int __init ipoib_init_module(void)
>
>  static void __exit ipoib_cleanup_module(void)
>  {
> +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> +	unregister_netdevice_notifier(&ipoib_netdev_notifier);
> +#endif
>  	ipoib_netlink_fini();
>  	ib_unregister_client(&ipoib_client);
>  	ib_sa_unregister_client(&ipoib_sa_client);
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Shamir Rabinovitch March 27, 2017, 8:11 p.m. UTC | #3
On Mon, Mar 27, 2017 at 06:06:42PM +0300, Mark Bloch wrote:
> Hi Shamir,
> 
> Like I've said in v1 of this patch, I believe we are calling ipoib_delete_debug_files too many times.
> We are calling in unconditionally in: ipoib_dev_cleanup
> and also in ipoib_netdev_event when we get an NETDEV_UNREGISTER event.
> 
> For example, I have a setup with ConnectX-4 dual port configured to be in IB mode.
> So I have two ipoib interfaces (ib0, ib1)
> 
> When I load and unload mlx5_ib (while ib_ipoib is loaded:
> 
> root@dev-r-vrt-175 tools]# ./funccount.py 'ipoib_*_debug_files'
> Tracing 2 functions for "ipoib_*_debug_files"... Hit Ctrl-C to end.
> ^C
> FUNC                                    COUNT
> ipoib_create_debug_files                    2
> ipoib_delete_debug_files                    4
> Detaching...
> 
> Why not just remove the call in ipoib_dev_cleanup?
> 

Hi Mark,

v3 of this patch works fine on system that has CX3 with 2 ports and the
below udev rules:

# InfiniBand: Mellanox Technologies MT27500 Family [ConnectX-3]
SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
ID=="0002:01:00.0", ATTR{dev_id}=="0x0", KERNEL=="ib*", NAME="ib1"
SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
ID=="0002:01:00.0", ATTR{dev_id}=="0x1", KERNEL=="ib*", NAME="ib0"

On this system, the udev rules rename ib0-ib1 & ib1->ib0 causing small
chaos in the ipoib device names.

The attached logs include the information collected when the openibd
service was started and when it was stopped. You can have a look in the
files and see what are the network events and how they are processed by
the ipoib devices.

I think it will answer your concerns. 

BR, Shamir
mlx4_core: unknown parameter 'module_unload_allowed' ignored
mlx4_core: Mellanox ConnectX core driver v2.2-1 (Feb, 2014)
mlx4_core: Initializing 0002:01:00.0
PCI: Enabling device: (0002:01:00.0), cmd 2
mlx4_core 0002:01:00.0: Old device ETS support detected
mlx4_core 0002:01:00.0: Consider upgrading device FW.
mlx4_core 0002:01:00.0: PCIe link speed is 8.0GT/s, device supports 8.0GT/s
mlx4_core 0002:01:00.0: PCIe link width is x8, device supports x8
mlx4_core: Initializing 0006:01:00.0
PCI: Enabling device: (0006:01:00.0), cmd 2
mlx4_core 0006:01:00.0: Old device ETS support detected
mlx4_core 0006:01:00.0: Consider upgrading device FW.
mlx4_core 0006:01:00.0: PCIe link speed is 8.0GT/s, device supports 8.0GT/s
mlx4_core 0006:01:00.0: PCIe link width is x8, device supports x8
<mlx4_ib> mlx4_ib_add: mlx4_ib: Mellanox ConnectX InfiniBand driver v2.2-1 (Feb 2014)
<mlx4_ib> mlx4_ib_add: counter index 0 for port 1 allocated 0
<mlx4_ib> mlx4_ib_add: counter index 1 for port 2 allocated 0
<mlx4_ib> mlx4_ib_add: counter index 0 for port 1 allocated 0
<mlx4_ib> mlx4_ib_add: counter index 1 for port 2 allocated 0
ib_ipoib: unknown parameter 'module_unload_allowed' ignored
ipoib_netdev_event: dev fff8001f59984000 name ib0 event 0x5
ipoib_netdev_event: dev fff8001f568b4000 name ib1 event 0x5
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x5
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x5
mlx4_core 0002:01:00.0 rename57: renamed from ib1
ipoib_netdev_event: dev fff8001f568b4000 name rename57 event 0xa
mlx4_core 0002:01:00.0 rename56: renamed from ib0
ipoib_netdev_event: dev fff8001f59984000 name rename56 event 0xa
mlx4_core 0002:01:00.0 ib0: renamed from rename57
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0xa
mlx4_core 0002:01:00.0 ib1: renamed from rename56
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0xa
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x17
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x7
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x17
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x7
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0xd
IPv6: ADDRCONF(NETDEV_UP): ib2: link is not ready
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x1
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x4
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0xd
IPv6: ADDRCONF(NETDEV_UP): ib3: link is not ready
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x1
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x17
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x7
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x17
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x7
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0xd
IPv6: ADDRCONF(NETDEV_UP): ib1: link is not ready
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x1
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0xd
IPv6: ADDRCONF(NETDEV_UP): ib0: link is not ready
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x1
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x4
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x4
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x4
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x4
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x4
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x4
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x4
Kernel unaligned access at TPC[107ea098] ipoib_dev_addr_changed_valid+0x58/0x1c0 [ib_ipoib]
Kernel unaligned access at TPC[107ea098] ipoib_dev_addr_changed_valid+0x58/0x1c0 [ib_ipoib]
Kernel unaligned access at TPC[107ea098] ipoib_dev_addr_changed_valid+0x58/0x1c0 [ib_ipoib]
Kernel unaligned access at TPC[107ea098] ipoib_dev_addr_changed_valid+0x58/0x1c0 [ib_ipoib]
IPv6: ADDRCONF(NETDEV_CHANGE): ib3: link becomes ready
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x4
Kernel unaligned access at TPC[107ea098] ipoib_dev_addr_changed_valid+0x58/0x1c0 [ib_ipoib]
IPv6: ADDRCONF(NETDEV_CHANGE): ib1: link becomes ready
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x4
IPv6: ADDRCONF(NETDEV_CHANGE): ib0: link becomes ready
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x4
IPv6: ADDRCONF(NETDEV_CHANGE): ib2: link becomes ready
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x4
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x4
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x9
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x2
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x9
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x2
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x9
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x2
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x9
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x2
ipoib_netdev_event: dev fff8001f59984000 name ib1 event 0x6
ipoib_netdev_event: dev fff8001f568b4000 name ib0 event 0x6
ipoib_netdev_event: dev fff8001f57b4a000 name ib2 event 0x6
ipoib_netdev_event: dev fff8001f54dda000 name ib3 event 0x6
Shamir Rabinovitch March 27, 2017, 8:17 p.m. UTC | #4
On Mon, Mar 27, 2017 at 10:55:00PM +0300, Leon Romanovsky wrote:
> On Mon, Mar 27, 2017 at 03:18:59AM -0400, Shamir Rabinovitch wrote:
> > When udev renames the netdev devices, ipoib debugfs entries does not
> > get renamed. As a result, if subsequent probe of ipoib device reuse the
> > name then creating a debugfs entry for the new device would fail.
> >
> > Also, moved ipoib_create_debug_files and ipoib_delete_debug_files as part
> > of ipoib event handling in order to avoid any race condition between these.
> >
> > Signed-off-by: Vijay Kumar <vijay.ac.kumar@oracle.com>
> > Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
> >
> > ---
> > v2->v3:
> > - Move rev change out of commit message
> > - Fix typo
> > v1->v2:
> > - Review comment from Mark Bloch <markb@mellanox.com>
> >   Verified again and NETDEV_UNREGISTER is called correctly. Debug
> >   files are removed as expected. Thanks.
> > - Fix compile warning when CONFIG_INFINIBAND_IPOIB_DEBUG is not set
> > ---
> >

[...]

> >
> > +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> > +static int ipoib_netdev_event(struct notifier_block *this,
> > +			      unsigned long event, void *ptr)
> > +{
> > +	struct netdev_notifier_info *ni = ptr;
> > +	struct net_device *dev = ni->dev;
> > +
> > +	if (dev->netdev_ops->ndo_open != ipoib_open)
> > +		return NOTIFY_DONE;
> > +
> > +	switch (event) {
> > +	case NETDEV_REGISTER:
> > +		ipoib_create_debug_files(dev);
> > +		break;
> > +	case NETDEV_CHANGENAME:
> > +		ipoib_debugfs_rename(dev);
> 
> Why do we need explicit ipoib_debugfs_rename function?
> Will it work by simply calling ipoib_delete_debug_files
> and immediately after that ipoib_create_debug_files?
> 
> 
> > +		break;
> > +	case NETDEV_UNREGISTER:
> > +		ipoib_delete_debug_files(dev);
> > +		break;
> > +	}
> > +
> > +	return NOTIFY_DONE;
> > +}
> > +#endif
> > +

Hi Leon, 

Good point. 
I will have look on this idea and get back to you.

BR, Shamir
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Shamir Rabinovitch March 28, 2017, 9:19 a.m. UTC | #5
On Mon, Mar 27, 2017 at 11:17:14PM +0300, Shamir Rabinovitch wrote:
> 
> > >
> > > +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> > > +static int ipoib_netdev_event(struct notifier_block *this,
> > > +			      unsigned long event, void *ptr)
> > > +{
> > > +	struct netdev_notifier_info *ni = ptr;
> > > +	struct net_device *dev = ni->dev;
> > > +
> > > +	if (dev->netdev_ops->ndo_open != ipoib_open)
> > > +		return NOTIFY_DONE;
> > > +
> > > +	switch (event) {
> > > +	case NETDEV_REGISTER:
> > > +		ipoib_create_debug_files(dev);
> > > +		break;
> > > +	case NETDEV_CHANGENAME:
> > > +		ipoib_debugfs_rename(dev);
> > 
> > Why do we need explicit ipoib_debugfs_rename function?
> > Will it work by simply calling ipoib_delete_debug_files
> > and immediately after that ipoib_create_debug_files?
> > 
> > 
> > > +		break;
> > > +	case NETDEV_UNREGISTER:
> > > +		ipoib_delete_debug_files(dev);
> > > +		break;
> > > +	}
> > > +
> > > +	return NOTIFY_DONE;
> > > +}
> > > +#endif
> > > +
> 
> Hi Leon, 
> 
> Good point. 
> I will have look on this idea and get back to you.
> 
> BR, Shamir

Hi Leon,

It seems that the difference between using debugfs_rename and a
combination of debugfs_create_file and debugfs_remove is mainly the
atomicy of the rename operation with regard to the file system.
debugfs_rename is atomic operation while the above combination is not.
As result I see kernel panic when the rename operation interleave with
the delete due to module unload. So after carefully considering what you
suggest I think it might introduce unexpected issues.

BR, Shamir  
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Bloch March 28, 2017, 3:45 p.m. UTC | #6
Hi Shamir,

On 27/03/2017 23:11, Shamir Rabinovitch wrote:
> On Mon, Mar 27, 2017 at 06:06:42PM +0300, Mark Bloch wrote:
>> Hi Shamir,
>>
>> Like I've said in v1 of this patch, I believe we are calling ipoib_delete_debug_files too many times.
>> We are calling in unconditionally in: ipoib_dev_cleanup
>> and also in ipoib_netdev_event when we get an NETDEV_UNREGISTER event.
>>
>> For example, I have a setup with ConnectX-4 dual port configured to be in IB mode.
>> So I have two ipoib interfaces (ib0, ib1)
>>
>> When I load and unload mlx5_ib (while ib_ipoib is loaded:
>>
>> root@dev-r-vrt-175 tools]# ./funccount.py 'ipoib_*_debug_files'
>> Tracing 2 functions for "ipoib_*_debug_files"... Hit Ctrl-C to end.
>> ^C
>> FUNC                                    COUNT
>> ipoib_create_debug_files                    2
>> ipoib_delete_debug_files                    4
>> Detaching...
>>
>> Why not just remove the call in ipoib_dev_cleanup?
>>
> 
> Hi Mark,
> 
> v3 of this patch works fine on system that has CX3 with 2 ports and the
> below udev rules:
> 
> # InfiniBand: Mellanox Technologies MT27500 Family [ConnectX-3]
> SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
> ID=="0002:01:00.0", ATTR{dev_id}=="0x0", KERNEL=="ib*", NAME="ib1"
> SUBSYSTEM=="net", ACTION=="add", DRIVERS=="mlx4_core", BUS=="pci",
> ID=="0002:01:00.0", ATTR{dev_id}=="0x1", KERNEL=="ib*", NAME="ib0"
> 
> On this system, the udev rules rename ib0-ib1 & ib1->ib0 causing small
> chaos in the ipoib device names.
> 
> The attached logs include the information collected when the openibd
> service was started and when it was stopped. You can have a look in the
> files and see what are the network events and how they are processed by
> the ipoib devices.
> 
> I think it will answer your concerns. 
> 
> BR, Shamir
> 

I'm not saying it doesn't work, I'm saying works != works correctly.
We are calling ipoib_delete_debug_file too many times, it works by luck/chance.

While testing the patch, I've encountered another issue, running:

modprobe ib_ipoib
echo "0x0043" > /sys/class/net/ib0/create_child
modprobe -r ib_ipoib

and then looking the at the debugfs dir:
[root@dev-r-vrt-175 ~]# ls /sys/kernel/debug/ipoib/
ib0.8043_mcg  ib0.8043_pat1

As you can see the the debugfs entries for the ib0 child weren't removed.
Also notice that after that, I can't load ib_ipoib
[root@dev-r-vrt-175 ~]# modprobe ib_ipoib
modprobe: ERROR: could not insert 'ib_ipoib': Cannot allocate memory

The more interesting issue is, dmesg output has this:
[  467.185609] ib0.8043: failed to create mcg debug file
[  467.192551] ib0.8043: failed to create path debug file

so maybe this is a debugfs bug?

Sorry I can't look into it, I have some internal stuff I need to work on :/

Mark.
 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky March 28, 2017, 5:05 p.m. UTC | #7
On Tue, Mar 28, 2017 at 12:19:41PM +0300, Shamir Rabinovitch wrote:
> On Mon, Mar 27, 2017 at 11:17:14PM +0300, Shamir Rabinovitch wrote:
> >
> > > >
> > > > +#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
> > > > +static int ipoib_netdev_event(struct notifier_block *this,
> > > > +			      unsigned long event, void *ptr)
> > > > +{
> > > > +	struct netdev_notifier_info *ni = ptr;
> > > > +	struct net_device *dev = ni->dev;
> > > > +
> > > > +	if (dev->netdev_ops->ndo_open != ipoib_open)
> > > > +		return NOTIFY_DONE;
> > > > +
> > > > +	switch (event) {
> > > > +	case NETDEV_REGISTER:
> > > > +		ipoib_create_debug_files(dev);
> > > > +		break;
> > > > +	case NETDEV_CHANGENAME:
> > > > +		ipoib_debugfs_rename(dev);
> > >
> > > Why do we need explicit ipoib_debugfs_rename function?
> > > Will it work by simply calling ipoib_delete_debug_files
> > > and immediately after that ipoib_create_debug_files?
> > >
> > >
> > > > +		break;
> > > > +	case NETDEV_UNREGISTER:
> > > > +		ipoib_delete_debug_files(dev);
> > > > +		break;
> > > > +	}
> > > > +
> > > > +	return NOTIFY_DONE;
> > > > +}
> > > > +#endif
> > > > +
> >
> > Hi Leon,
> >
> > Good point.
> > I will have look on this idea and get back to you.
> >
> > BR, Shamir
>
> Hi Leon,
>
> It seems that the difference between using debugfs_rename and a
> combination of debugfs_create_file and debugfs_remove is mainly the
> atomicy of the rename operation with regard to the file system.
> debugfs_rename is atomic operation while the above combination is not.
> As result I see kernel panic when the rename operation interleave with
> the delete due to module unload. So after carefully considering what you
> suggest I think it might introduce unexpected issues.

Thank you for checking it.

There is one more thing which I noticed, you should check the return
values of debugfs_remove and destroy debugfs in case of failures.

>
> BR, Shamir
Shamir Rabinovitch March 29, 2017, 10:20 a.m. UTC | #8
On Tue, Mar 28, 2017 at 08:05:06PM +0300, Leon Romanovsky wrote:
> 
> Thank you for checking it.
> 
> There is one more thing which I noticed, you should check the return
> values of debugfs_remove and destroy debugfs in case of failures.
> 

Hi Leon, 

'debugfs_remove' prototype is "static inline void
debugfs_remove(struct dentry *dentry)". 
If you typed the wrong function please let me know.

BR, Shamir 

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky March 29, 2017, 11:21 a.m. UTC | #9
On Wed, Mar 29, 2017 at 01:20:20PM +0300, Shamir Rabinovitch wrote:
> On Tue, Mar 28, 2017 at 08:05:06PM +0300, Leon Romanovsky wrote:
> >
> > Thank you for checking it.
> >
> > There is one more thing which I noticed, you should check the return
> > values of debugfs_remove and destroy debugfs in case of failures.
> >
>
> Hi Leon,
>
> 'debugfs_remove' prototype is "static inline void
> debugfs_remove(struct dentry *dentry)".
> If you typed the wrong function please let me know.

Sorry, it was my fault, I mistyped and intended to write debugfs_rename
which you used in previous version of your patch.

 linux-rdma git:(master) git grep debugfs_rename
 ....
fs/debugfs/inode.c: * debugfs_rename - rename a file/directory in the debugfs filesystem
fs/debugfs/inode.c:struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
fs/debugfs/inode.c:EXPORT_SYMBOL_GPL(debugfs_rename);
 ....

>
> BR, Shamir
>
diff mbox

Patch

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index da12717..95a457f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -770,6 +770,7 @@  static inline void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *w
 void ipoib_delete_debug_files(struct net_device *dev);
 int ipoib_register_debugfs(void);
 void ipoib_unregister_debugfs(void);
+void ipoib_debugfs_rename(struct net_device *dev);
 #else
 static inline void ipoib_create_debug_files(struct net_device *dev) { }
 static inline void ipoib_delete_debug_files(struct net_device *dev) { }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 6bd5740..d849b1d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -259,6 +259,34 @@  static int ipoib_path_open(struct inode *inode, struct file *file)
 	.release = seq_release
 };
 
+void ipoib_debugfs_rename(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	char name[IFNAMSIZ + sizeof "_path"];
+
+	if (!ipoib_root)
+		return;
+
+	if (priv->mcg_dentry) {
+		snprintf(name, sizeof(name), "%s_mcg", dev->name);
+		priv->mcg_dentry = debugfs_rename(ipoib_root, priv->mcg_dentry,
+						  ipoib_root, name);
+		if (!priv->mcg_dentry)
+			ipoib_warn(priv, "failed to rename debug file %s to %s\n",
+				   priv->mcg_dentry->d_iname, name);
+	}
+
+	if (priv->path_dentry) {
+		snprintf(name, sizeof(name), "%s_path", dev->name);
+		priv->path_dentry = debugfs_rename(ipoib_root,
+						   priv->path_dentry,
+						   ipoib_root, name);
+		if (!priv->path_dentry)
+			ipoib_warn(priv, "failed to rename debug file %s to %s\n",
+				   priv->mcg_dentry->d_iname, name);
+	}
+}
+
 void ipoib_create_debug_files(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index b58d9dc..c84b8ee 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,6 +108,32 @@  struct ipoib_path_iter {
 	.get_net_dev_by_params = ipoib_get_net_dev_by_params,
 };
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static int ipoib_netdev_event(struct notifier_block *this,
+			      unsigned long event, void *ptr)
+{
+	struct netdev_notifier_info *ni = ptr;
+	struct net_device *dev = ni->dev;
+
+	if (dev->netdev_ops->ndo_open != ipoib_open)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		ipoib_create_debug_files(dev);
+		break;
+	case NETDEV_CHANGENAME:
+		ipoib_debugfs_rename(dev);
+		break;
+	case NETDEV_UNREGISTER:
+		ipoib_delete_debug_files(dev);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+#endif
+
 int ipoib_open(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -2072,8 +2098,6 @@  int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
 		goto register_failed;
 	}
 
-	ipoib_create_debug_files(priv->dev);
-
 	if (ipoib_cm_add_mode_attr(priv->dev))
 		goto sysfs_failed;
 	if (ipoib_add_pkey_attr(priv->dev))
@@ -2088,7 +2112,6 @@  int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
 	return priv->dev;
 
 sysfs_failed:
-	ipoib_delete_debug_files(priv->dev);
 	unregister_netdev(priv->dev);
 
 register_failed:
@@ -2173,6 +2196,12 @@  static void ipoib_remove_one(struct ib_device *device, void *client_data)
 	kfree(dev_list);
 }
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+static struct notifier_block ipoib_netdev_notifier = {
+	.notifier_call = ipoib_netdev_event,
+};
+#endif
+
 static int __init ipoib_init_module(void)
 {
 	int ret;
@@ -2225,6 +2254,9 @@  static int __init ipoib_init_module(void)
 	if (ret)
 		goto err_client;
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+	register_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
 	return 0;
 
 err_client:
@@ -2242,6 +2274,9 @@  static int __init ipoib_init_module(void)
 
 static void __exit ipoib_cleanup_module(void)
 {
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+	unregister_netdevice_notifier(&ipoib_netdev_notifier);
+#endif
 	ipoib_netlink_fini();
 	ib_unregister_client(&ipoib_client);
 	ib_sa_unregister_client(&ipoib_sa_client);