diff mbox series

[net-next,02/11] mlxsw: core_linecards: Introduce per line card auxiliary device

Message ID 20220614123326.69745-3-jiri@resnulli.us (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series mlxsw: Implement dev info and dev flash for line cards | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 90 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Jiri Pirko June 14, 2022, 12:33 p.m. UTC
From: Jiri Pirko <jiri@nvidia.com>

In order to be eventually able to expose line card gearbox version and
possibility to flash FW, model the line card as a separate device on
auxiliary bus.

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlxsw/Kconfig   |   1 +
 drivers/net/ethernet/mellanox/mlxsw/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlxsw/core.c    |  13 +-
 drivers/net/ethernet/mellanox/mlxsw/core.h    |  10 ++
 .../mellanox/mlxsw/core_linecard_dev.c        | 152 ++++++++++++++++++
 .../ethernet/mellanox/mlxsw/core_linecards.c  |  10 ++
 6 files changed, 185 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c

Comments

Ido Schimmel June 15, 2022, 2:52 p.m. UTC | #1
> +int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard)
> +{
> +	struct mlxsw_linecard_bdev *linecard_bdev;
> +	int err;
> +	int id;
> +
> +	id = mlxsw_linecard_bdev_id_alloc();
> +	if (id < 0)
> +		return id;
> +
> +	linecard_bdev = kzalloc(sizeof(*linecard_bdev), GFP_KERNEL);
> +	if (!linecard_bdev) {
> +		mlxsw_linecard_bdev_id_free(id);
> +		return -ENOMEM;
> +	}
> +	linecard_bdev->adev.id = id;
> +	linecard_bdev->adev.name = MLXSW_LINECARD_DEV_ID_NAME;
> +	linecard_bdev->adev.dev.release = mlxsw_linecard_bdev_release;
> +	linecard_bdev->adev.dev.parent = linecard->linecards->bus_info->dev;
> +	linecard_bdev->linecard = linecard;
> +
> +	err = auxiliary_device_init(&linecard_bdev->adev);
> +	if (err) {
> +		mlxsw_linecard_bdev_id_free(id);
> +		kfree(linecard_bdev);
> +		return err;
> +	}
> +
> +	err = auxiliary_device_add(&linecard_bdev->adev);
> +	if (err) {
> +		auxiliary_device_uninit(&linecard_bdev->adev);
> +		return err;
> +	}
> +
> +	linecard->bdev = linecard_bdev;
> +	return 0;
> +}

[...]

> +static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev,
> +				     const struct auxiliary_device_id *id)
> +{
> +	struct mlxsw_linecard_bdev *linecard_bdev =
> +			container_of(adev, struct mlxsw_linecard_bdev, adev);
> +	struct mlxsw_linecard_dev *linecard_dev;
> +	struct devlink *devlink;
> +
> +	devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops,
> +				sizeof(*linecard_dev), &adev->dev);
> +	if (!devlink)
> +		return -ENOMEM;
> +	linecard_dev = devlink_priv(devlink);
> +	linecard_dev->linecard = linecard_bdev->linecard;
> +	linecard_bdev->linecard_dev = linecard_dev;
> +
> +	devlink_register(devlink);
> +	return 0;
> +}

[...]

> @@ -252,6 +253,14 @@ mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type,
>  	linecard->provisioned = true;
>  	linecard->hw_revision = hw_revision;
>  	linecard->ini_version = ini_version;
> +
> +	err = mlxsw_linecard_bdev_add(linecard);

If a line card is already provisioned and we are reloading the primary
devlink instance, isn't this going to deadlock on the global (not
per-instance) devlink mutex? It is held throughout the reload operation
and also taken in devlink_register()

My understanding of the auxiliary bus model is that after adding a
device to the bus via auxiliary_device_add(), the probe() function of
the auxiliary driver will be called. In our case, this function acquires
the global devlink mutex in devlink_register().

> +	if (err) {
> +		linecard->provisioned = false;
> +		mlxsw_linecard_provision_fail(linecard);
> +		return err;
> +	}
> +
>  	devlink_linecard_provision_set(linecard->devlink_linecard, type);
>  	return 0;
>  }
Jiri Pirko June 15, 2022, 5:37 p.m. UTC | #2
Wed, Jun 15, 2022 at 04:52:13PM CEST, idosch@nvidia.com wrote:
>> +int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard)
>> +{
>> +	struct mlxsw_linecard_bdev *linecard_bdev;
>> +	int err;
>> +	int id;
>> +
>> +	id = mlxsw_linecard_bdev_id_alloc();
>> +	if (id < 0)
>> +		return id;
>> +
>> +	linecard_bdev = kzalloc(sizeof(*linecard_bdev), GFP_KERNEL);
>> +	if (!linecard_bdev) {
>> +		mlxsw_linecard_bdev_id_free(id);
>> +		return -ENOMEM;
>> +	}
>> +	linecard_bdev->adev.id = id;
>> +	linecard_bdev->adev.name = MLXSW_LINECARD_DEV_ID_NAME;
>> +	linecard_bdev->adev.dev.release = mlxsw_linecard_bdev_release;
>> +	linecard_bdev->adev.dev.parent = linecard->linecards->bus_info->dev;
>> +	linecard_bdev->linecard = linecard;
>> +
>> +	err = auxiliary_device_init(&linecard_bdev->adev);
>> +	if (err) {
>> +		mlxsw_linecard_bdev_id_free(id);
>> +		kfree(linecard_bdev);
>> +		return err;
>> +	}
>> +
>> +	err = auxiliary_device_add(&linecard_bdev->adev);
>> +	if (err) {
>> +		auxiliary_device_uninit(&linecard_bdev->adev);
>> +		return err;
>> +	}
>> +
>> +	linecard->bdev = linecard_bdev;
>> +	return 0;
>> +}
>
>[...]
>
>> +static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev,
>> +				     const struct auxiliary_device_id *id)
>> +{
>> +	struct mlxsw_linecard_bdev *linecard_bdev =
>> +			container_of(adev, struct mlxsw_linecard_bdev, adev);
>> +	struct mlxsw_linecard_dev *linecard_dev;
>> +	struct devlink *devlink;
>> +
>> +	devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops,
>> +				sizeof(*linecard_dev), &adev->dev);
>> +	if (!devlink)
>> +		return -ENOMEM;
>> +	linecard_dev = devlink_priv(devlink);
>> +	linecard_dev->linecard = linecard_bdev->linecard;
>> +	linecard_bdev->linecard_dev = linecard_dev;
>> +
>> +	devlink_register(devlink);
>> +	return 0;
>> +}
>
>[...]
>
>> @@ -252,6 +253,14 @@ mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type,
>>  	linecard->provisioned = true;
>>  	linecard->hw_revision = hw_revision;
>>  	linecard->ini_version = ini_version;
>> +
>> +	err = mlxsw_linecard_bdev_add(linecard);
>
>If a line card is already provisioned and we are reloading the primary
>devlink instance, isn't this going to deadlock on the global (not
>per-instance) devlink mutex? It is held throughout the reload operation
>and also taken in devlink_register()
>
>My understanding of the auxiliary bus model is that after adding a
>device to the bus via auxiliary_device_add(), the probe() function of
>the auxiliary driver will be called. In our case, this function acquires
>the global devlink mutex in devlink_register().

No, the line card auxdev is supposed to be removed during
linecard_fini(). This, I forgot to add, will do in v2.


>
>> +	if (err) {
>> +		linecard->provisioned = false;
>> +		mlxsw_linecard_provision_fail(linecard);
>> +		return err;
>> +	}
>> +
>>  	devlink_linecard_provision_set(linecard->devlink_linecard, type);
>>  	return 0;
>>  }
Ido Schimmel June 16, 2022, 7:11 a.m. UTC | #3
On Wed, Jun 15, 2022 at 07:37:15PM +0200, Jiri Pirko wrote:
> Wed, Jun 15, 2022 at 04:52:13PM CEST, idosch@nvidia.com wrote:
> >> +int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard)
> >> +{
> >> +	struct mlxsw_linecard_bdev *linecard_bdev;
> >> +	int err;
> >> +	int id;
> >> +
> >> +	id = mlxsw_linecard_bdev_id_alloc();
> >> +	if (id < 0)
> >> +		return id;
> >> +
> >> +	linecard_bdev = kzalloc(sizeof(*linecard_bdev), GFP_KERNEL);
> >> +	if (!linecard_bdev) {
> >> +		mlxsw_linecard_bdev_id_free(id);
> >> +		return -ENOMEM;
> >> +	}
> >> +	linecard_bdev->adev.id = id;
> >> +	linecard_bdev->adev.name = MLXSW_LINECARD_DEV_ID_NAME;
> >> +	linecard_bdev->adev.dev.release = mlxsw_linecard_bdev_release;
> >> +	linecard_bdev->adev.dev.parent = linecard->linecards->bus_info->dev;
> >> +	linecard_bdev->linecard = linecard;
> >> +
> >> +	err = auxiliary_device_init(&linecard_bdev->adev);
> >> +	if (err) {
> >> +		mlxsw_linecard_bdev_id_free(id);
> >> +		kfree(linecard_bdev);
> >> +		return err;
> >> +	}
> >> +
> >> +	err = auxiliary_device_add(&linecard_bdev->adev);
> >> +	if (err) {
> >> +		auxiliary_device_uninit(&linecard_bdev->adev);
> >> +		return err;
> >> +	}
> >> +
> >> +	linecard->bdev = linecard_bdev;
> >> +	return 0;
> >> +}
> >
> >[...]
> >
> >> +static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev,
> >> +				     const struct auxiliary_device_id *id)
> >> +{
> >> +	struct mlxsw_linecard_bdev *linecard_bdev =
> >> +			container_of(adev, struct mlxsw_linecard_bdev, adev);
> >> +	struct mlxsw_linecard_dev *linecard_dev;
> >> +	struct devlink *devlink;
> >> +
> >> +	devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops,
> >> +				sizeof(*linecard_dev), &adev->dev);
> >> +	if (!devlink)
> >> +		return -ENOMEM;
> >> +	linecard_dev = devlink_priv(devlink);
> >> +	linecard_dev->linecard = linecard_bdev->linecard;
> >> +	linecard_bdev->linecard_dev = linecard_dev;
> >> +
> >> +	devlink_register(devlink);
> >> +	return 0;
> >> +}
> >
> >[...]
> >
> >> @@ -252,6 +253,14 @@ mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type,
> >>  	linecard->provisioned = true;
> >>  	linecard->hw_revision = hw_revision;
> >>  	linecard->ini_version = ini_version;
> >> +
> >> +	err = mlxsw_linecard_bdev_add(linecard);
> >
> >If a line card is already provisioned and we are reloading the primary
> >devlink instance, isn't this going to deadlock on the global (not
> >per-instance) devlink mutex? It is held throughout the reload operation
> >and also taken in devlink_register()
> >
> >My understanding of the auxiliary bus model is that after adding a
> >device to the bus via auxiliary_device_add(), the probe() function of
> >the auxiliary driver will be called. In our case, this function acquires
> >the global devlink mutex in devlink_register().
> 
> No, the line card auxdev is supposed to be removed during
> linecard_fini(). This, I forgot to add, will do in v2.

mlxsw_linecard_fini() is called as part of reload with the global
devlink mutex held. The removal of the auxdev should prompt the
unregistration of its devlink instance which also takes this mutex. If
this doesn't deadlock, then I'm probably missing something.

Can you test reload with lockdep when line cards are already
provisioned/active?

> 
> 
> >
> >> +	if (err) {
> >> +		linecard->provisioned = false;
> >> +		mlxsw_linecard_provision_fail(linecard);
> >> +		return err;
> >> +	}
> >> +
> >>  	devlink_linecard_provision_set(linecard->devlink_linecard, type);
> >>  	return 0;
> >>  }
Jiri Pirko June 16, 2022, 4:39 p.m. UTC | #4
Thu, Jun 16, 2022 at 09:11:56AM CEST, idosch@nvidia.com wrote:
>On Wed, Jun 15, 2022 at 07:37:15PM +0200, Jiri Pirko wrote:
>> Wed, Jun 15, 2022 at 04:52:13PM CEST, idosch@nvidia.com wrote:
>> >> +int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard)
>> >> +{
>> >> +	struct mlxsw_linecard_bdev *linecard_bdev;
>> >> +	int err;
>> >> +	int id;
>> >> +
>> >> +	id = mlxsw_linecard_bdev_id_alloc();
>> >> +	if (id < 0)
>> >> +		return id;
>> >> +
>> >> +	linecard_bdev = kzalloc(sizeof(*linecard_bdev), GFP_KERNEL);
>> >> +	if (!linecard_bdev) {
>> >> +		mlxsw_linecard_bdev_id_free(id);
>> >> +		return -ENOMEM;
>> >> +	}
>> >> +	linecard_bdev->adev.id = id;
>> >> +	linecard_bdev->adev.name = MLXSW_LINECARD_DEV_ID_NAME;
>> >> +	linecard_bdev->adev.dev.release = mlxsw_linecard_bdev_release;
>> >> +	linecard_bdev->adev.dev.parent = linecard->linecards->bus_info->dev;
>> >> +	linecard_bdev->linecard = linecard;
>> >> +
>> >> +	err = auxiliary_device_init(&linecard_bdev->adev);
>> >> +	if (err) {
>> >> +		mlxsw_linecard_bdev_id_free(id);
>> >> +		kfree(linecard_bdev);
>> >> +		return err;
>> >> +	}
>> >> +
>> >> +	err = auxiliary_device_add(&linecard_bdev->adev);
>> >> +	if (err) {
>> >> +		auxiliary_device_uninit(&linecard_bdev->adev);
>> >> +		return err;
>> >> +	}
>> >> +
>> >> +	linecard->bdev = linecard_bdev;
>> >> +	return 0;
>> >> +}
>> >
>> >[...]
>> >
>> >> +static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev,
>> >> +				     const struct auxiliary_device_id *id)
>> >> +{
>> >> +	struct mlxsw_linecard_bdev *linecard_bdev =
>> >> +			container_of(adev, struct mlxsw_linecard_bdev, adev);
>> >> +	struct mlxsw_linecard_dev *linecard_dev;
>> >> +	struct devlink *devlink;
>> >> +
>> >> +	devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops,
>> >> +				sizeof(*linecard_dev), &adev->dev);
>> >> +	if (!devlink)
>> >> +		return -ENOMEM;
>> >> +	linecard_dev = devlink_priv(devlink);
>> >> +	linecard_dev->linecard = linecard_bdev->linecard;
>> >> +	linecard_bdev->linecard_dev = linecard_dev;
>> >> +
>> >> +	devlink_register(devlink);
>> >> +	return 0;
>> >> +}
>> >
>> >[...]
>> >
>> >> @@ -252,6 +253,14 @@ mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type,
>> >>  	linecard->provisioned = true;
>> >>  	linecard->hw_revision = hw_revision;
>> >>  	linecard->ini_version = ini_version;
>> >> +
>> >> +	err = mlxsw_linecard_bdev_add(linecard);
>> >
>> >If a line card is already provisioned and we are reloading the primary
>> >devlink instance, isn't this going to deadlock on the global (not
>> >per-instance) devlink mutex? It is held throughout the reload operation
>> >and also taken in devlink_register()
>> >
>> >My understanding of the auxiliary bus model is that after adding a
>> >device to the bus via auxiliary_device_add(), the probe() function of
>> >the auxiliary driver will be called. In our case, this function acquires
>> >the global devlink mutex in devlink_register().
>> 
>> No, the line card auxdev is supposed to be removed during
>> linecard_fini(). This, I forgot to add, will do in v2.
>
>mlxsw_linecard_fini() is called as part of reload with the global
>devlink mutex held. The removal of the auxdev should prompt the
>unregistration of its devlink instance which also takes this mutex. If
>this doesn't deadlock, then I'm probably missing something.

You don't miss anything, it really does. Need to remove devlink_mutex
first.


>
>Can you test reload with lockdep when line cards are already
>provisioned/active?
>
>> 
>> 
>> >
>> >> +	if (err) {
>> >> +		linecard->provisioned = false;
>> >> +		mlxsw_linecard_provision_fail(linecard);
>> >> +		return err;
>> >> +	}
>> >> +
>> >>  	devlink_linecard_provision_set(linecard->devlink_linecard, type);
>> >>  	return 0;
>> >>  }
Ido Schimmel June 16, 2022, 5:41 p.m. UTC | #5
On Thu, Jun 16, 2022 at 06:39:59PM +0200, Jiri Pirko wrote:
> Thu, Jun 16, 2022 at 09:11:56AM CEST, idosch@nvidia.com wrote:
> >On Wed, Jun 15, 2022 at 07:37:15PM +0200, Jiri Pirko wrote:
> >> Wed, Jun 15, 2022 at 04:52:13PM CEST, idosch@nvidia.com wrote:
> >> >> +int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard)
> >> >> +{
> >> >> +	struct mlxsw_linecard_bdev *linecard_bdev;
> >> >> +	int err;
> >> >> +	int id;
> >> >> +
> >> >> +	id = mlxsw_linecard_bdev_id_alloc();
> >> >> +	if (id < 0)
> >> >> +		return id;
> >> >> +
> >> >> +	linecard_bdev = kzalloc(sizeof(*linecard_bdev), GFP_KERNEL);
> >> >> +	if (!linecard_bdev) {
> >> >> +		mlxsw_linecard_bdev_id_free(id);
> >> >> +		return -ENOMEM;
> >> >> +	}
> >> >> +	linecard_bdev->adev.id = id;
> >> >> +	linecard_bdev->adev.name = MLXSW_LINECARD_DEV_ID_NAME;
> >> >> +	linecard_bdev->adev.dev.release = mlxsw_linecard_bdev_release;
> >> >> +	linecard_bdev->adev.dev.parent = linecard->linecards->bus_info->dev;
> >> >> +	linecard_bdev->linecard = linecard;
> >> >> +
> >> >> +	err = auxiliary_device_init(&linecard_bdev->adev);
> >> >> +	if (err) {
> >> >> +		mlxsw_linecard_bdev_id_free(id);
> >> >> +		kfree(linecard_bdev);
> >> >> +		return err;
> >> >> +	}
> >> >> +
> >> >> +	err = auxiliary_device_add(&linecard_bdev->adev);
> >> >> +	if (err) {
> >> >> +		auxiliary_device_uninit(&linecard_bdev->adev);
> >> >> +		return err;
> >> >> +	}
> >> >> +
> >> >> +	linecard->bdev = linecard_bdev;
> >> >> +	return 0;
> >> >> +}
> >> >
> >> >[...]
> >> >
> >> >> +static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev,
> >> >> +				     const struct auxiliary_device_id *id)
> >> >> +{
> >> >> +	struct mlxsw_linecard_bdev *linecard_bdev =
> >> >> +			container_of(adev, struct mlxsw_linecard_bdev, adev);
> >> >> +	struct mlxsw_linecard_dev *linecard_dev;
> >> >> +	struct devlink *devlink;
> >> >> +
> >> >> +	devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops,
> >> >> +				sizeof(*linecard_dev), &adev->dev);
> >> >> +	if (!devlink)
> >> >> +		return -ENOMEM;
> >> >> +	linecard_dev = devlink_priv(devlink);
> >> >> +	linecard_dev->linecard = linecard_bdev->linecard;
> >> >> +	linecard_bdev->linecard_dev = linecard_dev;
> >> >> +
> >> >> +	devlink_register(devlink);
> >> >> +	return 0;
> >> >> +}
> >> >
> >> >[...]
> >> >
> >> >> @@ -252,6 +253,14 @@ mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type,
> >> >>  	linecard->provisioned = true;
> >> >>  	linecard->hw_revision = hw_revision;
> >> >>  	linecard->ini_version = ini_version;
> >> >> +
> >> >> +	err = mlxsw_linecard_bdev_add(linecard);
> >> >
> >> >If a line card is already provisioned and we are reloading the primary
> >> >devlink instance, isn't this going to deadlock on the global (not
> >> >per-instance) devlink mutex? It is held throughout the reload operation
> >> >and also taken in devlink_register()
> >> >
> >> >My understanding of the auxiliary bus model is that after adding a
> >> >device to the bus via auxiliary_device_add(), the probe() function of
> >> >the auxiliary driver will be called. In our case, this function acquires
> >> >the global devlink mutex in devlink_register().
> >> 
> >> No, the line card auxdev is supposed to be removed during
> >> linecard_fini(). This, I forgot to add, will do in v2.
> >
> >mlxsw_linecard_fini() is called as part of reload with the global
> >devlink mutex held. The removal of the auxdev should prompt the
> >unregistration of its devlink instance which also takes this mutex. If
> >this doesn't deadlock, then I'm probably missing something.
> 
> You don't miss anything, it really does. Need to remove devlink_mutex
> first.

Can you please send it separately? Will probably need thorough review
and testing...

The comment above devlink_mutex is: "An overall lock guarding every
operation coming from userspace. It also guards devlink devices list and
it is taken when driver registers/unregisters it.", but devlink does not
have "parallel_ops" enabled, so maybe it's enough to only use this lock
to protect the devlink devices list?

> 
> 
> >
> >Can you test reload with lockdep when line cards are already
> >provisioned/active?
> >
> >> 
> >> 
> >> >
> >> >> +	if (err) {
> >> >> +		linecard->provisioned = false;
> >> >> +		mlxsw_linecard_provision_fail(linecard);
> >> >> +		return err;
> >> >> +	}
> >> >> +
> >> >>  	devlink_linecard_provision_set(linecard->devlink_linecard, type);
> >> >>  	return 0;
> >> >>  }
Jiri Pirko June 27, 2022, 8:34 a.m. UTC | #6
Thu, Jun 16, 2022 at 07:41:52PM CEST, idosch@nvidia.com wrote:
>On Thu, Jun 16, 2022 at 06:39:59PM +0200, Jiri Pirko wrote:
>> Thu, Jun 16, 2022 at 09:11:56AM CEST, idosch@nvidia.com wrote:
>> >On Wed, Jun 15, 2022 at 07:37:15PM +0200, Jiri Pirko wrote:
>> >> Wed, Jun 15, 2022 at 04:52:13PM CEST, idosch@nvidia.com wrote:
>> >> >> +int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard)
>> >> >> +{
>> >> >> +	struct mlxsw_linecard_bdev *linecard_bdev;
>> >> >> +	int err;
>> >> >> +	int id;
>> >> >> +
>> >> >> +	id = mlxsw_linecard_bdev_id_alloc();
>> >> >> +	if (id < 0)
>> >> >> +		return id;
>> >> >> +
>> >> >> +	linecard_bdev = kzalloc(sizeof(*linecard_bdev), GFP_KERNEL);
>> >> >> +	if (!linecard_bdev) {
>> >> >> +		mlxsw_linecard_bdev_id_free(id);
>> >> >> +		return -ENOMEM;
>> >> >> +	}
>> >> >> +	linecard_bdev->adev.id = id;
>> >> >> +	linecard_bdev->adev.name = MLXSW_LINECARD_DEV_ID_NAME;
>> >> >> +	linecard_bdev->adev.dev.release = mlxsw_linecard_bdev_release;
>> >> >> +	linecard_bdev->adev.dev.parent = linecard->linecards->bus_info->dev;
>> >> >> +	linecard_bdev->linecard = linecard;
>> >> >> +
>> >> >> +	err = auxiliary_device_init(&linecard_bdev->adev);
>> >> >> +	if (err) {
>> >> >> +		mlxsw_linecard_bdev_id_free(id);
>> >> >> +		kfree(linecard_bdev);
>> >> >> +		return err;
>> >> >> +	}
>> >> >> +
>> >> >> +	err = auxiliary_device_add(&linecard_bdev->adev);
>> >> >> +	if (err) {
>> >> >> +		auxiliary_device_uninit(&linecard_bdev->adev);
>> >> >> +		return err;
>> >> >> +	}
>> >> >> +
>> >> >> +	linecard->bdev = linecard_bdev;
>> >> >> +	return 0;
>> >> >> +}
>> >> >
>> >> >[...]
>> >> >
>> >> >> +static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev,
>> >> >> +				     const struct auxiliary_device_id *id)
>> >> >> +{
>> >> >> +	struct mlxsw_linecard_bdev *linecard_bdev =
>> >> >> +			container_of(adev, struct mlxsw_linecard_bdev, adev);
>> >> >> +	struct mlxsw_linecard_dev *linecard_dev;
>> >> >> +	struct devlink *devlink;
>> >> >> +
>> >> >> +	devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops,
>> >> >> +				sizeof(*linecard_dev), &adev->dev);
>> >> >> +	if (!devlink)
>> >> >> +		return -ENOMEM;
>> >> >> +	linecard_dev = devlink_priv(devlink);
>> >> >> +	linecard_dev->linecard = linecard_bdev->linecard;
>> >> >> +	linecard_bdev->linecard_dev = linecard_dev;
>> >> >> +
>> >> >> +	devlink_register(devlink);
>> >> >> +	return 0;
>> >> >> +}
>> >> >
>> >> >[...]
>> >> >
>> >> >> @@ -252,6 +253,14 @@ mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type,
>> >> >>  	linecard->provisioned = true;
>> >> >>  	linecard->hw_revision = hw_revision;
>> >> >>  	linecard->ini_version = ini_version;
>> >> >> +
>> >> >> +	err = mlxsw_linecard_bdev_add(linecard);
>> >> >
>> >> >If a line card is already provisioned and we are reloading the primary
>> >> >devlink instance, isn't this going to deadlock on the global (not
>> >> >per-instance) devlink mutex? It is held throughout the reload operation
>> >> >and also taken in devlink_register()
>> >> >
>> >> >My understanding of the auxiliary bus model is that after adding a
>> >> >device to the bus via auxiliary_device_add(), the probe() function of
>> >> >the auxiliary driver will be called. In our case, this function acquires
>> >> >the global devlink mutex in devlink_register().
>> >> 
>> >> No, the line card auxdev is supposed to be removed during
>> >> linecard_fini(). This, I forgot to add, will do in v2.
>> >
>> >mlxsw_linecard_fini() is called as part of reload with the global
>> >devlink mutex held. The removal of the auxdev should prompt the
>> >unregistration of its devlink instance which also takes this mutex. If
>> >this doesn't deadlock, then I'm probably missing something.
>> 
>> You don't miss anything, it really does. Need to remove devlink_mutex
>> first.
>
>Can you please send it separately? Will probably need thorough review
>and testing...

Sure.


>
>The comment above devlink_mutex is: "An overall lock guarding every
>operation coming from userspace. It also guards devlink devices list and
>it is taken when driver registers/unregisters it.", but devlink does not
>have "parallel_ops" enabled, so maybe it's enough to only use this lock
>to protect the devlink devices list?

I'm preparing a patchset that will answer all your questions, lets move
the discussion there.


>
>> 
>> 
>> >
>> >Can you test reload with lockdep when line cards are already
>> >provisioned/active?
>> >
>> >> 
>> >> 
>> >> >
>> >> >> +	if (err) {
>> >> >> +		linecard->provisioned = false;
>> >> >> +		mlxsw_linecard_provision_fail(linecard);
>> >> >> +		return err;
>> >> >> +	}
>> >> >> +
>> >> >>  	devlink_linecard_provision_set(linecard->devlink_linecard, type);
>> >> >>  	return 0;
>> >> >>  }
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 4683312861ac..a510bf2cff2f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -7,6 +7,7 @@  config MLXSW_CORE
 	tristate "Mellanox Technologies Switch ASICs support"
 	select NET_DEVLINK
 	select MLXFW
+	select AUXILIARY_BUS
 	help
 	  This driver supports Mellanox Technologies Switch ASICs family.
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 1a465fd5d8b3..c9a773d3158b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -2,7 +2,7 @@ 
 obj-$(CONFIG_MLXSW_CORE)	+= mlxsw_core.o
 mlxsw_core-objs			:= core.o core_acl_flex_keys.o \
 				   core_acl_flex_actions.o core_env.o \
-				   core_linecards.o
+				   core_linecards.o core_linecard_dev.o
 mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o
 mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o
 obj-$(CONFIG_MLXSW_PCI)		+= mlxsw_pci.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index fc52832241b3..8864533281bd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -3331,9 +3331,15 @@  static int __init mlxsw_core_module_init(void)
 {
 	int err;
 
+	err = mlxsw_linecard_driver_register();
+	if (err)
+		return err;
+
 	mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0);
-	if (!mlxsw_wq)
-		return -ENOMEM;
+	if (!mlxsw_wq) {
+		err = -ENOMEM;
+		goto err_alloc_workqueue;
+	}
 	mlxsw_owq = alloc_ordered_workqueue("%s_ordered", 0,
 					    mlxsw_core_driver_name);
 	if (!mlxsw_owq) {
@@ -3344,6 +3350,8 @@  static int __init mlxsw_core_module_init(void)
 
 err_alloc_ordered_workqueue:
 	destroy_workqueue(mlxsw_wq);
+err_alloc_workqueue:
+	mlxsw_linecard_driver_unregister();
 	return err;
 }
 
@@ -3351,6 +3359,7 @@  static void __exit mlxsw_core_module_exit(void)
 {
 	destroy_workqueue(mlxsw_owq);
 	destroy_workqueue(mlxsw_wq);
+	mlxsw_linecard_driver_unregister();
 }
 
 module_init(mlxsw_core_module_init);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index c2a891287047..cda20a4fcbdb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -12,6 +12,7 @@ 
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <linux/net_namespace.h>
+#include <linux/auxiliary_bus.h>
 #include <net/devlink.h>
 
 #include "trap.h"
@@ -567,6 +568,8 @@  enum mlxsw_linecard_status_event_type {
 	MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION,
 };
 
+struct mlxsw_linecard_bdev;
+
 struct mlxsw_linecard {
 	u8 slot_index;
 	struct mlxsw_linecards *linecards;
@@ -581,6 +584,7 @@  struct mlxsw_linecard {
 	   active:1;
 	u16 hw_revision;
 	u16 ini_version;
+	struct mlxsw_linecard_bdev *bdev;
 };
 
 struct mlxsw_linecard_types_info;
@@ -620,4 +624,10 @@  void mlxsw_linecards_event_ops_unregister(struct mlxsw_core *mlxsw_core,
 					  struct mlxsw_linecards_event_ops *ops,
 					  void *priv);
 
+int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard);
+void mlxsw_linecard_bdev_del(struct mlxsw_linecard *linecard);
+
+int mlxsw_linecard_driver_register(void);
+void mlxsw_linecard_driver_unregister(void);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c
new file mode 100644
index 000000000000..af70d3f7a177
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecard_dev.c
@@ -0,0 +1,152 @@ 
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2022 NVIDIA Corporation and Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/auxiliary_bus.h>
+#include <linux/idr.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <net/devlink.h>
+#include "core.h"
+
+#define MLXSW_LINECARD_DEV_ID_NAME "lc"
+
+struct mlxsw_linecard_dev {
+	struct mlxsw_linecard *linecard;
+};
+
+struct mlxsw_linecard_bdev {
+	struct auxiliary_device adev;
+	struct mlxsw_linecard *linecard;
+	struct mlxsw_linecard_dev *linecard_dev;
+};
+
+static DEFINE_IDA(mlxsw_linecard_bdev_ida);
+
+static int mlxsw_linecard_bdev_id_alloc(void)
+{
+	return ida_alloc(&mlxsw_linecard_bdev_ida, GFP_KERNEL);
+}
+
+static void mlxsw_linecard_bdev_id_free(int id)
+{
+	ida_free(&mlxsw_linecard_bdev_ida, id);
+}
+
+static void mlxsw_linecard_bdev_release(struct device *device)
+{
+	struct auxiliary_device *adev =
+			container_of(device, struct auxiliary_device, dev);
+	struct mlxsw_linecard_bdev *linecard_bdev =
+			container_of(adev, struct mlxsw_linecard_bdev, adev);
+
+	mlxsw_linecard_bdev_id_free(adev->id);
+	kfree(linecard_bdev);
+}
+
+int mlxsw_linecard_bdev_add(struct mlxsw_linecard *linecard)
+{
+	struct mlxsw_linecard_bdev *linecard_bdev;
+	int err;
+	int id;
+
+	id = mlxsw_linecard_bdev_id_alloc();
+	if (id < 0)
+		return id;
+
+	linecard_bdev = kzalloc(sizeof(*linecard_bdev), GFP_KERNEL);
+	if (!linecard_bdev) {
+		mlxsw_linecard_bdev_id_free(id);
+		return -ENOMEM;
+	}
+	linecard_bdev->adev.id = id;
+	linecard_bdev->adev.name = MLXSW_LINECARD_DEV_ID_NAME;
+	linecard_bdev->adev.dev.release = mlxsw_linecard_bdev_release;
+	linecard_bdev->adev.dev.parent = linecard->linecards->bus_info->dev;
+	linecard_bdev->linecard = linecard;
+
+	err = auxiliary_device_init(&linecard_bdev->adev);
+	if (err) {
+		mlxsw_linecard_bdev_id_free(id);
+		kfree(linecard_bdev);
+		return err;
+	}
+
+	err = auxiliary_device_add(&linecard_bdev->adev);
+	if (err) {
+		auxiliary_device_uninit(&linecard_bdev->adev);
+		return err;
+	}
+
+	linecard->bdev = linecard_bdev;
+	return 0;
+}
+
+void mlxsw_linecard_bdev_del(struct mlxsw_linecard *linecard)
+{
+	struct mlxsw_linecard_bdev *linecard_bdev = linecard->bdev;
+
+	auxiliary_device_delete(&linecard_bdev->adev);
+	auxiliary_device_uninit(&linecard_bdev->adev);
+}
+
+static const struct devlink_ops mlxsw_linecard_dev_devlink_ops = {
+};
+
+static int mlxsw_linecard_bdev_probe(struct auxiliary_device *adev,
+				     const struct auxiliary_device_id *id)
+{
+	struct mlxsw_linecard_bdev *linecard_bdev =
+			container_of(adev, struct mlxsw_linecard_bdev, adev);
+	struct mlxsw_linecard_dev *linecard_dev;
+	struct devlink *devlink;
+
+	devlink = devlink_alloc(&mlxsw_linecard_dev_devlink_ops,
+				sizeof(*linecard_dev), &adev->dev);
+	if (!devlink)
+		return -ENOMEM;
+	linecard_dev = devlink_priv(devlink);
+	linecard_dev->linecard = linecard_bdev->linecard;
+	linecard_bdev->linecard_dev = linecard_dev;
+
+	devlink_register(devlink);
+	return 0;
+}
+
+static void mlxsw_linecard_bdev_remove(struct auxiliary_device *adev)
+{
+	struct mlxsw_linecard_bdev *linecard_bdev =
+			container_of(adev, struct mlxsw_linecard_bdev, adev);
+	struct devlink *devlink = priv_to_devlink(linecard_bdev->linecard_dev);
+
+	devlink_unregister(devlink);
+	devlink_free(devlink);
+}
+
+static const struct auxiliary_device_id mlxsw_linecard_bdev_id_table[] = {
+	{ .name = KBUILD_MODNAME "." MLXSW_LINECARD_DEV_ID_NAME },
+	{},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlxsw_linecard_bdev_id_table);
+
+static struct auxiliary_driver mlxsw_linecard_driver = {
+	.name = MLXSW_LINECARD_DEV_ID_NAME,
+	.probe = mlxsw_linecard_bdev_probe,
+	.remove = mlxsw_linecard_bdev_remove,
+	.id_table = mlxsw_linecard_bdev_id_table,
+};
+
+int mlxsw_linecard_driver_register(void)
+{
+	return auxiliary_driver_register(&mlxsw_linecard_driver);
+}
+
+void mlxsw_linecard_driver_unregister(void)
+{
+	auxiliary_driver_unregister(&mlxsw_linecard_driver);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
index 5c9869dcf674..ae51944cde0c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_linecards.c
@@ -232,6 +232,7 @@  mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type,
 {
 	struct mlxsw_linecards *linecards = linecard->linecards;
 	const char *type;
+	int err;
 
 	type = mlxsw_linecard_types_lookup(linecards, card_type);
 	mlxsw_linecard_status_event_done(linecard,
@@ -252,6 +253,14 @@  mlxsw_linecard_provision_set(struct mlxsw_linecard *linecard, u8 card_type,
 	linecard->provisioned = true;
 	linecard->hw_revision = hw_revision;
 	linecard->ini_version = ini_version;
+
+	err = mlxsw_linecard_bdev_add(linecard);
+	if (err) {
+		linecard->provisioned = false;
+		mlxsw_linecard_provision_fail(linecard);
+		return err;
+	}
+
 	devlink_linecard_provision_set(linecard->devlink_linecard, type);
 	return 0;
 }
@@ -260,6 +269,7 @@  static void mlxsw_linecard_provision_clear(struct mlxsw_linecard *linecard)
 {
 	mlxsw_linecard_status_event_done(linecard,
 					 MLXSW_LINECARD_STATUS_EVENT_TYPE_UNPROVISION);
+	mlxsw_linecard_bdev_del(linecard);
 	linecard->provisioned = false;
 	devlink_linecard_provision_clear(linecard->devlink_linecard);
 }