diff mbox series

[net] devlink: Hold devlink lock on health reporter dump get

Message ID 1696173580-222744-1-git-send-email-moshe@nvidia.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net] devlink: Hold devlink lock on health reporter dump get | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1342 this patch: 1342
netdev/cc_maintainers warning 2 maintainers not CCed: edumazet@google.com pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 1364 this patch: 1364
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 1365 this patch: 1365
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 96 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Moshe Shemesh Oct. 1, 2023, 3:19 p.m. UTC
Devlink health dump get callback should take devlink lock as any other
devlink callback. Add devlink lock to the callback and to any call for
devlink_health_do_dump().

As devlink lock is added to any callback of dump, the reporter dump_lock
is now redundant and can be removed.

Fixes: d3efc2a6a6d8 ("net: devlink: remove devlink_mutex")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
 net/devlink/health.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

Comments

Simon Horman Oct. 3, 2023, 11:36 a.m. UTC | #1
On Sun, Oct 01, 2023 at 06:19:40PM +0300, Moshe Shemesh wrote:
> Devlink health dump get callback should take devlink lock as any other
> devlink callback. Add devlink lock to the callback and to any call for
> devlink_health_do_dump().
> 
> As devlink lock is added to any callback of dump, the reporter dump_lock
> is now redundant and can be removed.
> 
> Fixes: d3efc2a6a6d8 ("net: devlink: remove devlink_mutex")
> Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
> Reviewed-by: Jiri Pirko <jiri@nvidia.com>

Reviewed-by: Simon Horman <horms@kernel.org>
Jakub Kicinski Oct. 4, 2023, 11:39 p.m. UTC | #2
On Sun, 1 Oct 2023 18:19:40 +0300 Moshe Shemesh wrote:
> Devlink health dump get callback should take devlink lock as any other
> devlink callback. Add devlink lock to the callback and to any call for
> devlink_health_do_dump().
> 
> As devlink lock is added to any callback of dump, the reporter dump_lock
> is now redundant and can be removed.

I love the change but it needs more info in the commit message :)

 1. what exact / example but real problem are you solving?
 2. some words of reassurance that you checked all the drivers
    and the locking change should be safe (none of them take
    instance locks in reporter callbacks etc)
Moshe Shemesh Oct. 5, 2023, 11:59 a.m. UTC | #3
On 10/5/2023 2:39 AM, Jakub Kicinski wrote:
> External email: Use caution opening links or attachments
> 
> 
> On Sun, 1 Oct 2023 18:19:40 +0300 Moshe Shemesh wrote:
>> Devlink health dump get callback should take devlink lock as any other
>> devlink callback. Add devlink lock to the callback and to any call for
>> devlink_health_do_dump().
>>
>> As devlink lock is added to any callback of dump, the reporter dump_lock
>> is now redundant and can be removed.
> 
> I love the change but it needs more info in the commit message :)
> 
>   1. what exact / example but real problem are you solving?

Sure, will send v2

>   2. some words of reassurance that you checked all the drivers
>      and the locking change should be safe (none of them take
>      instance locks in reporter callbacks etc)

That's what I checked, looking at the code, I didn't find any devlink 
instance lock through all drivers health dump callbacks in current code: 
bnxt_fw_dump(), hinic_hw_reporter_dump(), hinic_fw_reporter_dump(), 
rvu_hw_nix_intr_dump() , rvu_hw_nix_gen_dump(), rvu_hw_nix_err_dump(), 
rvu_hw_nix_ras_dump(), rvu_hw_npa_intr_dump(), rvu_hw_npa_gen_dump(), 
rvu_hw_npa_err_dump(), rvu_hw_npa_ras_dump(), 
mlxsw_core_health_fw_fatal_dump(), mlx5e_rx_reporter_dump(), 
mlx5e_tx_reporter_dump(), mlx5_fw_reporter_dump(), 
mlx5_fw_fatal_reporter_dump(), qed_fw_fatal_reporter_dump(), 
nsim_dev_empty_reporter_dump(), nsim_dev_dummy_reporter_dump(), 
qlge_reporter_coredump()


> --
> pw-bot: cr
diff mbox series

Patch

diff --git a/net/devlink/health.c b/net/devlink/health.c
index 638cad8d5c65..51e6e81e31bb 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -58,7 +58,6 @@  struct devlink_health_reporter {
 	struct devlink *devlink;
 	struct devlink_port *devlink_port;
 	struct devlink_fmsg *dump_fmsg;
-	struct mutex dump_lock; /* lock parallel read/write from dump buffers */
 	u64 graceful_period;
 	bool auto_recover;
 	bool auto_dump;
@@ -125,7 +124,6 @@  __devlink_health_reporter_create(struct devlink *devlink,
 	reporter->graceful_period = graceful_period;
 	reporter->auto_recover = !!ops->recover;
 	reporter->auto_dump = !!ops->dump;
-	mutex_init(&reporter->dump_lock);
 	return reporter;
 }
 
@@ -226,7 +224,6 @@  EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
 static void
 devlink_health_reporter_free(struct devlink_health_reporter *reporter)
 {
-	mutex_destroy(&reporter->dump_lock);
 	if (reporter->dump_fmsg)
 		devlink_fmsg_free(reporter->dump_fmsg);
 	kfree(reporter);
@@ -625,10 +622,10 @@  int devlink_health_report(struct devlink_health_reporter *reporter,
 	}
 
 	if (reporter->auto_dump) {
-		mutex_lock(&reporter->dump_lock);
+		devl_lock(devlink);
 		/* store current dump of current error, for later analysis */
 		devlink_health_do_dump(reporter, priv_ctx, NULL);
-		mutex_unlock(&reporter->dump_lock);
+		devl_unlock(devlink);
 	}
 
 	if (!reporter->auto_recover)
@@ -1262,7 +1259,7 @@  int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
 }
 
 static struct devlink_health_reporter *
-devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
+devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb)
 {
 	const struct genl_info *info = genl_info_dump(cb);
 	struct devlink_health_reporter *reporter;
@@ -1272,10 +1269,12 @@  devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
 	devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
 	if (IS_ERR(devlink))
 		return NULL;
-	devl_unlock(devlink);
 
 	reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
-	devlink_put(devlink);
+	if (!reporter) {
+		devl_unlock(devlink);
+		devlink_put(devlink);
+	}
 	return reporter;
 }
 
@@ -1284,16 +1283,20 @@  int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
 {
 	struct devlink_nl_dump_state *state = devlink_dump_state(cb);
 	struct devlink_health_reporter *reporter;
+	struct devlink *devlink;
 	int err;
 
-	reporter = devlink_health_reporter_get_from_cb(cb);
+	reporter = devlink_health_reporter_get_from_cb_lock(cb);
 	if (!reporter)
 		return -EINVAL;
 
-	if (!reporter->ops->dump)
+	devlink = reporter->devlink;
+	if (!reporter->ops->dump) {
+		devl_unlock(devlink);
+		devlink_put(devlink);
 		return -EOPNOTSUPP;
+	}
 
-	mutex_lock(&reporter->dump_lock);
 	if (!state->idx) {
 		err = devlink_health_do_dump(reporter, NULL, cb->extack);
 		if (err)
@@ -1309,7 +1312,8 @@  int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
 	err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb,
 				  DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET);
 unlock:
-	mutex_unlock(&reporter->dump_lock);
+	devl_unlock(devlink);
+	devlink_put(devlink);
 	return err;
 }
 
@@ -1326,9 +1330,7 @@  int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
 	if (!reporter->ops->dump)
 		return -EOPNOTSUPP;
 
-	mutex_lock(&reporter->dump_lock);
 	devlink_health_dump_clear(reporter);
-	mutex_unlock(&reporter->dump_lock);
 	return 0;
 }