diff mbox series

[net] net/mlx5e: limit log messages due to (ovs) probing to _once

Message ID d57b95462cccf0f67089c91d3dfd3d1f4c46e9bf.1585872570.git.marcelo.leitner@gmail.com (mailing list archive)
State Not Applicable
Headers show
Series [net] net/mlx5e: limit log messages due to (ovs) probing to _once | expand

Commit Message

Marcelo Ricardo Leitner April 3, 2020, 12:11 a.m. UTC
OVS will keep adding such flows, no matter what. They will usually be
handled by tc software (or ovs datapath, if skip_sw is used). But the
driver is logging these messages for each and every attempt, despite the
extack. Note that they weren't rate limited, and a broadcast storm could
trigger system console flooding.

Switch these to be _once. It's enough to tell the sysadmin what is
happenning, and if anything, the OVS log will have all the errors.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 61 ++++++++++---------
 1 file changed, 32 insertions(+), 29 deletions(-)

Comments

Saeed Mahameed April 3, 2020, 2:27 a.m. UTC | #1
On Thu, 2020-04-02 at 21:11 -0300, Marcelo Ricardo Leitner wrote:
> OVS will keep adding such flows, no matter what. They will usually be
> handled by tc software (or ovs datapath, if skip_sw is used). But the
> driver is logging these messages for each and every attempt, despite
> the
> extack. Note that they weren't rate limited, and a broadcast storm
> could
> trigger system console flooding.
> 
> Switch these to be _once. It's enough to tell the sysadmin what is
> happenning, and if anything, the OVS log will have all the errors.
> 

++ mlnx TC stake holders 

The fact that for all of the suppressed messages we will still have NL
extack reporting, makes it easier for me to agree with this patch. but
there is a loss of information since now we will stop printing the
attribute/params which caused the failure in most of the cases, and it
will be harder for the user and the developer to understand why these
attributes are not working .. 

I understand it is for debug only but i strongly suggest to not totally
suppress these messages and maybe just move them to tracepoints buffer
? for those who would want to really debug .. 

we already have some tracepoints implemented for en_tc.c 
mlx5/core/diag/en_tc_tracepoints.c, maybe we should define a tracepoint
for error reporting .. 

> Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

net patches must have a "Fixes:" tag

> ---
>  .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 61 ++++++++++-------
> --
>  1 file changed, 32 insertions(+), 29 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> index
> 438128dde187d7ec58892c2879c6037f807f576f..1182fba3edbb8cf7bd59557b7ec
> e18765c704186 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> @@ -1828,8 +1828,8 @@ enc_opts_is_dont_care_or_full_match(struct
> mlx5e_priv *priv,
>  				       opt->length * 4)) {
>  				NL_SET_ERR_MSG(extack,
>  					       "Partial match of tunnel
> options in chain > 0 isn't supported");
> -				netdev_warn(priv->netdev,
> -					    "Partial match of tunnel
> options in chain > 0 isn't supported");
> +				netdev_warn_once(priv->netdev,
> +						 "Partial match of
> tunnel options in chain > 0 isn't supported");
>  				return -EOPNOTSUPP;
>  			}
>  		}
> @@ -1988,8 +1988,8 @@ static int parse_tunnel_attr(struct mlx5e_priv
> *priv,
>  	    !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
>  		NL_SET_ERR_MSG(extack,
>  			       "Chains on tunnel devices isn't
> supported without register loopback support");
> -		netdev_warn(priv->netdev,
> -			    "Chains on tunnel devices isn't supported
> without register loopback support");
> +		netdev_warn_once(priv->netdev,
> +				 "Chains on tunnel devices isn't
> supported without register loopback support");
>  		return -EOPNOTSUPP;
>  	}
>  
> @@ -2133,8 +2133,8 @@ static int __parse_cls_flower(struct mlx5e_priv
> *priv,
>  	      BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
>  	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
>  		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
> -		netdev_warn(priv->netdev, "Unsupported key used:
> 0x%x\n",
> -			    dissector->used_keys);
> +		netdev_warn_once(priv->netdev, "Unsupported key used:
> 0x%x\n",
> +				 dissector->used_keys);
>  		return -EOPNOTSUPP;
>  	}
>  
> @@ -2484,8 +2484,8 @@ static int parse_cls_flower(struct mlx5e_priv
> *priv,
>  		    esw->offloads.inline_mode <
> non_tunnel_match_level)) {
>  			NL_SET_ERR_MSG_MOD(extack,
>  					   "Flow is not offloaded due
> to min inline setting");
> -			netdev_warn(priv->netdev,
> -				    "Flow is not offloaded due to min
> inline setting, required %d actual %d\n",
> +			netdev_warn_once(priv->netdev,
> +					 "Flow is not offloaded due to
> min inline setting, required %d actual %d\n",
>  				    non_tunnel_match_level, esw-
> >offloads.inline_mode);
>  			return -EOPNOTSUPP;
>  		}
> @@ -2885,7 +2885,9 @@ static int alloc_tc_pedit_action(struct
> mlx5e_priv *priv, int namespace,
>  		if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks)))
> {
>  			NL_SET_ERR_MSG_MOD(extack,
>  					   "attempt to offload an
> unsupported field");
> -			netdev_warn(priv->netdev, "attempt to offload
> an unsupported field (cmd %d)\n", cmd);
> +			netdev_warn_once(priv->netdev,
> +					 "attempt to offload an
> unsupported field (cmd %d)\n",
> +					 cmd);
>  			print_hex_dump(KERN_WARNING, "mask: ",
> DUMP_PREFIX_ADDRESS,
>  				       16, 1, cmd_masks,
> sizeof(zero_masks), true);
>  			err = -EOPNOTSUPP;
> @@ -2912,17 +2914,17 @@ static bool csum_offload_supported(struct
> mlx5e_priv *priv,
>  	if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
>  		NL_SET_ERR_MSG_MOD(extack,
>  				   "TC csum action is only offloaded
> with pedit");
> -		netdev_warn(priv->netdev,
> -			    "TC csum action is only offloaded with
> pedit\n");
> +		netdev_warn_once(priv->netdev,
> +				 "TC csum action is only offloaded with
> pedit\n");
>  		return false;
>  	}
>  
>  	if (update_flags & ~prot_flags) {
>  		NL_SET_ERR_MSG_MOD(extack,
>  				   "can't offload TC csum action for
> some header/s");
> -		netdev_warn(priv->netdev,
> -			    "can't offload TC csum action for some
> header/s - flags %#x\n",
> -			    update_flags);
> +		netdev_warn_once(priv->netdev,
> +				 "can't offload TC csum action for some
> header/s - flags %#x\n",
> +				 update_flags);
>  		return false;
>  	}
>  
> @@ -3224,8 +3226,9 @@ static int parse_tc_nic_actions(struct
> mlx5e_priv *priv,
>  			} else {
>  				NL_SET_ERR_MSG_MOD(extack,
>  						   "device is not on
> same HW, can't offload");
> -				netdev_warn(priv->netdev, "device %s
> not on same HW, can't offload\n",
> -					    peer_dev->name);
> +				netdev_warn_once(priv->netdev,
> +						 "device %s not on same
> HW, can't offload\n",
> +						 peer_dev->name);
>  				return -EINVAL;
>  			}
>  			}
> @@ -3754,9 +3757,9 @@ static int parse_tc_fdb_actions(struct
> mlx5e_priv *priv,
>  			if (attr->out_count >=
> MLX5_MAX_FLOW_FWD_VPORTS) {
>  				NL_SET_ERR_MSG_MOD(extack,
>  						   "can't support more
> output ports, can't offload forwarding");
> -				netdev_warn(priv->netdev,
> -					    "can't support more than %d
> output ports, can't offload forwarding\n",
> -					    attr->out_count);
> +				netdev_warn_once(priv->netdev,
> +						 "can't support more
> than %d output ports, can't offload forwarding\n",
> +						 attr->out_count);
>  				return -EOPNOTSUPP;
>  			}
>  
> @@ -3821,10 +3824,10 @@ static int parse_tc_fdb_actions(struct
> mlx5e_priv *priv,
>  				if
> (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
>  					NL_SET_ERR_MSG_MOD(extack,
>  							   "devices are
> not on same switch HW, can't offload forwarding");
> -					netdev_warn(priv->netdev,
> -						    "devices %s %s not
> on same switch HW, can't offload forwarding\n",
> -						    priv->netdev->name,
> -						    out_dev->name);
> +					netdev_warn_once(priv->netdev,
> +							 "devices %s %s
> not on same switch HW, can't offload forwarding\n",
> +							 priv->netdev-
> >name,
> +							 out_dev-
> >name);
>  					return -EOPNOTSUPP;
>  				}
>  
> @@ -3843,10 +3846,10 @@ static int parse_tc_fdb_actions(struct
> mlx5e_priv *priv,
>  			} else {
>  				NL_SET_ERR_MSG_MOD(extack,
>  						   "devices are not on
> same switch HW, can't offload forwarding");
> -				netdev_warn(priv->netdev,
> -					    "devices %s %s not on same
> switch HW, can't offload forwarding\n",
> -					    priv->netdev->name,
> -					    out_dev->name);
> +				netdev_warn_once(priv->netdev,
> +						 "devices %s %s not on
> same switch HW, can't offload forwarding\n",
> +						 priv->netdev->name,
> +						 out_dev->name);
>  				return -EINVAL;
>  			}
>  			}
> @@ -3959,8 +3962,8 @@ static int parse_tc_fdb_actions(struct
> mlx5e_priv *priv,
>  
>  			NL_SET_ERR_MSG(extack,
>  				       "Decap with goto isn't
> supported");
> -			netdev_warn(priv->netdev,
> -				    "Decap with goto isn't supported");
> +			netdev_warn_once(priv->netdev,
> +					 "Decap with goto isn't
> supported");
>  			return -EOPNOTSUPP;
>  		}
>
Marcelo Ricardo Leitner April 3, 2020, 2:48 a.m. UTC | #2
On Fri, Apr 03, 2020 at 02:27:08AM +0000, Saeed Mahameed wrote:
> On Thu, 2020-04-02 at 21:11 -0300, Marcelo Ricardo Leitner wrote:
> > OVS will keep adding such flows, no matter what. They will usually be
> > handled by tc software (or ovs datapath, if skip_sw is used). But the
> > driver is logging these messages for each and every attempt, despite
> > the
> > extack. Note that they weren't rate limited, and a broadcast storm
> > could
> > trigger system console flooding.
> > 
> > Switch these to be _once. It's enough to tell the sysadmin what is
> > happenning, and if anything, the OVS log will have all the errors.
> > 
> 
> ++ mlnx TC stake holders 
> 
> The fact that for all of the suppressed messages we will still have NL
> extack reporting, makes it easier for me to agree with this patch. but
> there is a loss of information since now we will stop printing the
> attribute/params which caused the failure in most of the cases, and it
> will be harder for the user and the developer to understand why these
> attributes are not working .. 

I see.

> 
> I understand it is for debug only but i strongly suggest to not totally
> suppress these messages and maybe just move them to tracepoints buffer
> ? for those who would want to really debug .. 
> 
> we already have some tracepoints implemented for en_tc.c 
> mlx5/core/diag/en_tc_tracepoints.c, maybe we should define a tracepoint
> for error reporting .. 

That, or s/netdev_warn/netdev_dbg/, but both are more hidden to the
user than the _once.

> 
> > Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> 
> net patches must have a "Fixes:" tag

I know that it is strongly recommended, but there is nothing really
broken here. It's a small cleanup to code already there. Now I'm
confused, isn't that what net is meant for, and a Fixes tag would be
an abuse of it here?

> 
> > ---
> >  .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 61 ++++++++++-------
> > --
> >  1 file changed, 32 insertions(+), 29 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> > b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> > index
> > 438128dde187d7ec58892c2879c6037f807f576f..1182fba3edbb8cf7bd59557b7ec
> > e18765c704186 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> > @@ -1828,8 +1828,8 @@ enc_opts_is_dont_care_or_full_match(struct
> > mlx5e_priv *priv,
> >  				       opt->length * 4)) {
> >  				NL_SET_ERR_MSG(extack,
> >  					       "Partial match of tunnel
> > options in chain > 0 isn't supported");
> > -				netdev_warn(priv->netdev,
> > -					    "Partial match of tunnel
> > options in chain > 0 isn't supported");
> > +				netdev_warn_once(priv->netdev,
> > +						 "Partial match of
> > tunnel options in chain > 0 isn't supported");
> >  				return -EOPNOTSUPP;
> >  			}
> >  		}
> > @@ -1988,8 +1988,8 @@ static int parse_tunnel_attr(struct mlx5e_priv
> > *priv,
> >  	    !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
> >  		NL_SET_ERR_MSG(extack,
> >  			       "Chains on tunnel devices isn't
> > supported without register loopback support");
> > -		netdev_warn(priv->netdev,
> > -			    "Chains on tunnel devices isn't supported
> > without register loopback support");
> > +		netdev_warn_once(priv->netdev,
> > +				 "Chains on tunnel devices isn't
> > supported without register loopback support");
> >  		return -EOPNOTSUPP;
> >  	}
> >  
> > @@ -2133,8 +2133,8 @@ static int __parse_cls_flower(struct mlx5e_priv
> > *priv,
> >  	      BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
> >  	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
> >  		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
> > -		netdev_warn(priv->netdev, "Unsupported key used:
> > 0x%x\n",
> > -			    dissector->used_keys);
> > +		netdev_warn_once(priv->netdev, "Unsupported key used:
> > 0x%x\n",
> > +				 dissector->used_keys);
> >  		return -EOPNOTSUPP;
> >  	}
> >  
> > @@ -2484,8 +2484,8 @@ static int parse_cls_flower(struct mlx5e_priv
> > *priv,
> >  		    esw->offloads.inline_mode <
> > non_tunnel_match_level)) {
> >  			NL_SET_ERR_MSG_MOD(extack,
> >  					   "Flow is not offloaded due
> > to min inline setting");
> > -			netdev_warn(priv->netdev,
> > -				    "Flow is not offloaded due to min
> > inline setting, required %d actual %d\n",
> > +			netdev_warn_once(priv->netdev,
> > +					 "Flow is not offloaded due to
> > min inline setting, required %d actual %d\n",
> >  				    non_tunnel_match_level, esw-
> > >offloads.inline_mode);
> >  			return -EOPNOTSUPP;
> >  		}
> > @@ -2885,7 +2885,9 @@ static int alloc_tc_pedit_action(struct
> > mlx5e_priv *priv, int namespace,
> >  		if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks)))
> > {
> >  			NL_SET_ERR_MSG_MOD(extack,
> >  					   "attempt to offload an
> > unsupported field");
> > -			netdev_warn(priv->netdev, "attempt to offload
> > an unsupported field (cmd %d)\n", cmd);
> > +			netdev_warn_once(priv->netdev,
> > +					 "attempt to offload an
> > unsupported field (cmd %d)\n",
> > +					 cmd);
> >  			print_hex_dump(KERN_WARNING, "mask: ",
> > DUMP_PREFIX_ADDRESS,
> >  				       16, 1, cmd_masks,
> > sizeof(zero_masks), true);
> >  			err = -EOPNOTSUPP;
> > @@ -2912,17 +2914,17 @@ static bool csum_offload_supported(struct
> > mlx5e_priv *priv,
> >  	if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
> >  		NL_SET_ERR_MSG_MOD(extack,
> >  				   "TC csum action is only offloaded
> > with pedit");
> > -		netdev_warn(priv->netdev,
> > -			    "TC csum action is only offloaded with
> > pedit\n");
> > +		netdev_warn_once(priv->netdev,
> > +				 "TC csum action is only offloaded with
> > pedit\n");
> >  		return false;
> >  	}
> >  
> >  	if (update_flags & ~prot_flags) {
> >  		NL_SET_ERR_MSG_MOD(extack,
> >  				   "can't offload TC csum action for
> > some header/s");
> > -		netdev_warn(priv->netdev,
> > -			    "can't offload TC csum action for some
> > header/s - flags %#x\n",
> > -			    update_flags);
> > +		netdev_warn_once(priv->netdev,
> > +				 "can't offload TC csum action for some
> > header/s - flags %#x\n",
> > +				 update_flags);
> >  		return false;
> >  	}
> >  
> > @@ -3224,8 +3226,9 @@ static int parse_tc_nic_actions(struct
> > mlx5e_priv *priv,
> >  			} else {
> >  				NL_SET_ERR_MSG_MOD(extack,
> >  						   "device is not on
> > same HW, can't offload");
> > -				netdev_warn(priv->netdev, "device %s
> > not on same HW, can't offload\n",
> > -					    peer_dev->name);
> > +				netdev_warn_once(priv->netdev,
> > +						 "device %s not on same
> > HW, can't offload\n",
> > +						 peer_dev->name);
> >  				return -EINVAL;
> >  			}
> >  			}
> > @@ -3754,9 +3757,9 @@ static int parse_tc_fdb_actions(struct
> > mlx5e_priv *priv,
> >  			if (attr->out_count >=
> > MLX5_MAX_FLOW_FWD_VPORTS) {
> >  				NL_SET_ERR_MSG_MOD(extack,
> >  						   "can't support more
> > output ports, can't offload forwarding");
> > -				netdev_warn(priv->netdev,
> > -					    "can't support more than %d
> > output ports, can't offload forwarding\n",
> > -					    attr->out_count);
> > +				netdev_warn_once(priv->netdev,
> > +						 "can't support more
> > than %d output ports, can't offload forwarding\n",
> > +						 attr->out_count);
> >  				return -EOPNOTSUPP;
> >  			}
> >  
> > @@ -3821,10 +3824,10 @@ static int parse_tc_fdb_actions(struct
> > mlx5e_priv *priv,
> >  				if
> > (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
> >  					NL_SET_ERR_MSG_MOD(extack,
> >  							   "devices are
> > not on same switch HW, can't offload forwarding");
> > -					netdev_warn(priv->netdev,
> > -						    "devices %s %s not
> > on same switch HW, can't offload forwarding\n",
> > -						    priv->netdev->name,
> > -						    out_dev->name);
> > +					netdev_warn_once(priv->netdev,
> > +							 "devices %s %s
> > not on same switch HW, can't offload forwarding\n",
> > +							 priv->netdev-
> > >name,
> > +							 out_dev-
> > >name);
> >  					return -EOPNOTSUPP;
> >  				}
> >  
> > @@ -3843,10 +3846,10 @@ static int parse_tc_fdb_actions(struct
> > mlx5e_priv *priv,
> >  			} else {
> >  				NL_SET_ERR_MSG_MOD(extack,
> >  						   "devices are not on
> > same switch HW, can't offload forwarding");
> > -				netdev_warn(priv->netdev,
> > -					    "devices %s %s not on same
> > switch HW, can't offload forwarding\n",
> > -					    priv->netdev->name,
> > -					    out_dev->name);
> > +				netdev_warn_once(priv->netdev,
> > +						 "devices %s %s not on
> > same switch HW, can't offload forwarding\n",
> > +						 priv->netdev->name,
> > +						 out_dev->name);
> >  				return -EINVAL;
> >  			}
> >  			}
> > @@ -3959,8 +3962,8 @@ static int parse_tc_fdb_actions(struct
> > mlx5e_priv *priv,
> >  
> >  			NL_SET_ERR_MSG(extack,
> >  				       "Decap with goto isn't
> > supported");
> > -			netdev_warn(priv->netdev,
> > -				    "Decap with goto isn't supported");
> > +			netdev_warn_once(priv->netdev,
> > +					 "Decap with goto isn't
> > supported");
> >  			return -EOPNOTSUPP;
> >  		}
> >
Saeed Mahameed April 8, 2020, 7:51 p.m. UTC | #3
On Thu, 2020-04-02 at 23:48 -0300, marcelo.leitner@gmail.com wrote:
> On Fri, Apr 03, 2020 at 02:27:08AM +0000, Saeed Mahameed wrote:
> > On Thu, 2020-04-02 at 21:11 -0300, Marcelo Ricardo Leitner wrote:
> > > OVS will keep adding such flows, no matter what. They will
> > > usually be
> > > handled by tc software (or ovs datapath, if skip_sw is used). But
> > > the
> > > driver is logging these messages for each and every attempt,
> > > despite
> > > the
> > > extack. Note that they weren't rate limited, and a broadcast
> > > storm
> > > could
> > > trigger system console flooding.
> > > 
> > > Switch these to be _once. It's enough to tell the sysadmin what
> > > is
> > > happenning, and if anything, the OVS log will have all the
> > > errors.
> > > 
> > 
> > ++ mlnx TC stake holders 
> > 
> > The fact that for all of the suppressed messages we will still have
> > NL
> > extack reporting, makes it easier for me to agree with this patch.
> > but
> > there is a loss of information since now we will stop printing the
> > attribute/params which caused the failure in most of the cases, and
> > it
> > will be harder for the user and the developer to understand why
> > these
> > attributes are not working .. 
> 
> I see.
> 
> > I understand it is for debug only but i strongly suggest to not
> > totally
> > suppress these messages and maybe just move them to tracepoints
> > buffer
> > ? for those who would want to really debug .. 
> > 
> > we already have some tracepoints implemented for en_tc.c 
> > mlx5/core/diag/en_tc_tracepoints.c, maybe we should define a
> > tracepoint
> > for error reporting .. 
> 
> That, or s/netdev_warn/netdev_dbg/, but both are more hidden to the
> user than the _once.
> 

i don't see any reason to pollute kernel log with debug messages when
we have tracepoint buffer for en_tc .. 


> > > Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com
> > > >
> > 
> > net patches must have a "Fixes:" tag
> 
> I know that it is strongly recommended, but there is nothing really
> broken here. It's a small cleanup to code already there. Now I'm
> confused, isn't that what net is meant for, and a Fixes tag would be
> an abuse of it here?

net is meant for bug fixes, cleanups and "next-like" content should go
to net-next. so this should be considered as an improvement and should
be submitted to net-next.

> 
> > > ---
> > >  .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 61 ++++++++++---
> > > ----
> > > --
> > >  1 file changed, 32 insertions(+), 29 deletions(-)
> > > 
> > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> > > b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> > > index
> > > 438128dde187d7ec58892c2879c6037f807f576f..1182fba3edbb8cf7bd59557
> > > b7ec
> > > e18765c704186 100644
> > > --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> > > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> > > @@ -1828,8 +1828,8 @@ enc_opts_is_dont_care_or_full_match(struct
> > > mlx5e_priv *priv,
> > >  				       opt->length * 4)) {
> > >  				NL_SET_ERR_MSG(extack,
> > >  					       "Partial match of tunnel
> > > options in chain > 0 isn't supported");
> > > -				netdev_warn(priv->netdev,
> > > -					    "Partial match of tunnel
> > > options in chain > 0 isn't supported");
> > > +				netdev_warn_once(priv->netdev,
> > > +						 "Partial match of
> > > tunnel options in chain > 0 isn't supported");
> > >  				return -EOPNOTSUPP;
> > >  			}
> > >  		}
> > > @@ -1988,8 +1988,8 @@ static int parse_tunnel_attr(struct
> > > mlx5e_priv
> > > *priv,
> > >  	    !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
> > >  		NL_SET_ERR_MSG(extack,
> > >  			       "Chains on tunnel devices isn't
> > > supported without register loopback support");
> > > -		netdev_warn(priv->netdev,
> > > -			    "Chains on tunnel devices isn't supported
> > > without register loopback support");
> > > +		netdev_warn_once(priv->netdev,
> > > +				 "Chains on tunnel devices isn't
> > > supported without register loopback support");
> > >  		return -EOPNOTSUPP;
> > >  	}
> > >  
> > > @@ -2133,8 +2133,8 @@ static int __parse_cls_flower(struct
> > > mlx5e_priv
> > > *priv,
> > >  	      BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
> > >  	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
> > >  		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
> > > -		netdev_warn(priv->netdev, "Unsupported key used:
> > > 0x%x\n",
> > > -			    dissector->used_keys);
> > > +		netdev_warn_once(priv->netdev, "Unsupported key used:
> > > 0x%x\n",
> > > +				 dissector->used_keys);
> > >  		return -EOPNOTSUPP;
> > >  	}
> > >  
> > > @@ -2484,8 +2484,8 @@ static int parse_cls_flower(struct
> > > mlx5e_priv
> > > *priv,
> > >  		    esw->offloads.inline_mode <
> > > non_tunnel_match_level)) {
> > >  			NL_SET_ERR_MSG_MOD(extack,
> > >  					   "Flow is not offloaded due
> > > to min inline setting");
> > > -			netdev_warn(priv->netdev,
> > > -				    "Flow is not offloaded due to min
> > > inline setting, required %d actual %d\n",
> > > +			netdev_warn_once(priv->netdev,
> > > +					 "Flow is not offloaded due to
> > > min inline setting, required %d actual %d\n",
> > >  				    non_tunnel_match_level, esw-
> > > > offloads.inline_mode);
> > >  			return -EOPNOTSUPP;
> > >  		}
> > > @@ -2885,7 +2885,9 @@ static int alloc_tc_pedit_action(struct
> > > mlx5e_priv *priv, int namespace,
> > >  		if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks)))
> > > {
> > >  			NL_SET_ERR_MSG_MOD(extack,
> > >  					   "attempt to offload an
> > > unsupported field");
> > > -			netdev_warn(priv->netdev, "attempt to offload
> > > an unsupported field (cmd %d)\n", cmd);
> > > +			netdev_warn_once(priv->netdev,
> > > +					 "attempt to offload an
> > > unsupported field (cmd %d)\n",
> > > +					 cmd);
> > >  			print_hex_dump(KERN_WARNING, "mask: ",
> > > DUMP_PREFIX_ADDRESS,
> > >  				       16, 1, cmd_masks,
> > > sizeof(zero_masks), true);
> > >  			err = -EOPNOTSUPP;
> > > @@ -2912,17 +2914,17 @@ static bool csum_offload_supported(struct
> > > mlx5e_priv *priv,
> > >  	if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
> > >  		NL_SET_ERR_MSG_MOD(extack,
> > >  				   "TC csum action is only offloaded
> > > with pedit");
> > > -		netdev_warn(priv->netdev,
> > > -			    "TC csum action is only offloaded with
> > > pedit\n");
> > > +		netdev_warn_once(priv->netdev,
> > > +				 "TC csum action is only offloaded with
> > > pedit\n");
> > >  		return false;
> > >  	}
> > >  
> > >  	if (update_flags & ~prot_flags) {
> > >  		NL_SET_ERR_MSG_MOD(extack,
> > >  				   "can't offload TC csum action for
> > > some header/s");
> > > -		netdev_warn(priv->netdev,
> > > -			    "can't offload TC csum action for some
> > > header/s - flags %#x\n",
> > > -			    update_flags);
> > > +		netdev_warn_once(priv->netdev,
> > > +				 "can't offload TC csum action for some
> > > header/s - flags %#x\n",
> > > +				 update_flags);
> > >  		return false;
> > >  	}
> > >  
> > > @@ -3224,8 +3226,9 @@ static int parse_tc_nic_actions(struct
> > > mlx5e_priv *priv,
> > >  			} else {
> > >  				NL_SET_ERR_MSG_MOD(extack,
> > >  						   "device is not on
> > > same HW, can't offload");
> > > -				netdev_warn(priv->netdev, "device %s
> > > not on same HW, can't offload\n",
> > > -					    peer_dev->name);
> > > +				netdev_warn_once(priv->netdev,
> > > +						 "device %s not on same
> > > HW, can't offload\n",
> > > +						 peer_dev->name);
> > >  				return -EINVAL;
> > >  			}
> > >  			}
> > > @@ -3754,9 +3757,9 @@ static int parse_tc_fdb_actions(struct
> > > mlx5e_priv *priv,
> > >  			if (attr->out_count >=
> > > MLX5_MAX_FLOW_FWD_VPORTS) {
> > >  				NL_SET_ERR_MSG_MOD(extack,
> > >  						   "can't support more
> > > output ports, can't offload forwarding");
> > > -				netdev_warn(priv->netdev,
> > > -					    "can't support more than %d
> > > output ports, can't offload forwarding\n",
> > > -					    attr->out_count);
> > > +				netdev_warn_once(priv->netdev,
> > > +						 "can't support more
> > > than %d output ports, can't offload forwarding\n",
> > > +						 attr->out_count);
> > >  				return -EOPNOTSUPP;
> > >  			}
> > >  
> > > @@ -3821,10 +3824,10 @@ static int parse_tc_fdb_actions(struct
> > > mlx5e_priv *priv,
> > >  				if
> > > (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
> > >  					NL_SET_ERR_MSG_MOD(extack,
> > >  							   "devices are
> > > not on same switch HW, can't offload forwarding");
> > > -					netdev_warn(priv->netdev,
> > > -						    "devices %s %s not
> > > on same switch HW, can't offload forwarding\n",
> > > -						    priv->netdev->name,
> > > -						    out_dev->name);
> > > +					netdev_warn_once(priv->netdev,
> > > +							 "devices %s %s
> > > not on same switch HW, can't offload forwarding\n",
> > > +							 priv->netdev-
> > > > name,
> > > +							 out_dev-
> > > > name);
> > >  					return -EOPNOTSUPP;
> > >  				}
> > >  
> > > @@ -3843,10 +3846,10 @@ static int parse_tc_fdb_actions(struct
> > > mlx5e_priv *priv,
> > >  			} else {
> > >  				NL_SET_ERR_MSG_MOD(extack,
> > >  						   "devices are not on
> > > same switch HW, can't offload forwarding");
> > > -				netdev_warn(priv->netdev,
> > > -					    "devices %s %s not on same
> > > switch HW, can't offload forwarding\n",
> > > -					    priv->netdev->name,
> > > -					    out_dev->name);
> > > +				netdev_warn_once(priv->netdev,
> > > +						 "devices %s %s not on
> > > same switch HW, can't offload forwarding\n",
> > > +						 priv->netdev->name,
> > > +						 out_dev->name);
> > >  				return -EINVAL;
> > >  			}
> > >  			}
> > > @@ -3959,8 +3962,8 @@ static int parse_tc_fdb_actions(struct
> > > mlx5e_priv *priv,
> > >  
> > >  			NL_SET_ERR_MSG(extack,
> > >  				       "Decap with goto isn't
> > > supported");
> > > -			netdev_warn(priv->netdev,
> > > -				    "Decap with goto isn't supported");
> > > +			netdev_warn_once(priv->netdev,
> > > +					 "Decap with goto isn't
> > > supported");
> > >  			return -EOPNOTSUPP;
> > >  		}
> > >
Marcelo Ricardo Leitner April 8, 2020, 9:54 p.m. UTC | #4
On Wed, Apr 08, 2020 at 07:51:22PM +0000, Saeed Mahameed wrote:
...
> > > I understand it is for debug only but i strongly suggest to not
> > > totally
> > > suppress these messages and maybe just move them to tracepoints
> > > buffer
> > > ? for those who would want to really debug .. 
> > > 
> > > we already have some tracepoints implemented for en_tc.c 
> > > mlx5/core/diag/en_tc_tracepoints.c, maybe we should define a
> > > tracepoint
> > > for error reporting .. 
> > 
> > That, or s/netdev_warn/netdev_dbg/, but both are more hidden to the
> > user than the _once.
> > 
> 
> i don't see any reason to pollute kernel log with debug messages when
> we have tracepoint buffer for en_tc .. 

So we're agreeing that these need to be changed. Good.

I don't think a sysadmin would be using tracepoints for
troubleshooting this, but okay. My only objective here is exactly what
you said, to not pollute kernel log too much with these potentially
repetitive messages.
Saeed Mahameed April 8, 2020, 10:38 p.m. UTC | #5
On Wed, 2020-04-08 at 18:54 -0300, marcelo.leitner@gmail.com wrote:
> On Wed, Apr 08, 2020 at 07:51:22PM +0000, Saeed Mahameed wrote:
> ...
> > > > I understand it is for debug only but i strongly suggest to not
> > > > totally
> > > > suppress these messages and maybe just move them to tracepoints
> > > > buffer
> > > > ? for those who would want to really debug .. 
> > > > 
> > > > we already have some tracepoints implemented for en_tc.c 
> > > > mlx5/core/diag/en_tc_tracepoints.c, maybe we should define a
> > > > tracepoint
> > > > for error reporting .. 
> > > 
> > > That, or s/netdev_warn/netdev_dbg/, but both are more hidden to
> > > the
> > > user than the _once.
> > > 
> > 
> > i don't see any reason to pollute kernel log with debug messages
> > when
> > we have tracepoint buffer for en_tc .. 
> 
> So we're agreeing that these need to be changed. Good.

I would like to wait for the feedback from the CC'ed mlnx TC
developers..

I just pinged them, lets see what they think.

but i totally agree, TC can support 100k offloads requests per seconds,
dumping every possible issue to the kernel log shouldn't be an option,this is not a boot or a fatal error/warning .. 

> 
> I don't think a sysadmin would be using tracepoints for
> troubleshooting this, but okay. My only objective here is exactly
> what
> you said, to not pollute kernel log too much with these potentially
> repetitive messages.

these types of errors are easily reproduce-able, a sysadmin can see and
report the errno and the extack message, and in case it is really
required, the support or development team can ask to turn on trace-
points or debug and reproduce ..
Marcelo Ricardo Leitner April 8, 2020, 10:42 p.m. UTC | #6
On Wed, Apr 08, 2020 at 10:38:52PM +0000, Saeed Mahameed wrote:
> On Wed, 2020-04-08 at 18:54 -0300, marcelo.leitner@gmail.com wrote:
> > On Wed, Apr 08, 2020 at 07:51:22PM +0000, Saeed Mahameed wrote:
> > ...
> > > > > I understand it is for debug only but i strongly suggest to not
> > > > > totally
> > > > > suppress these messages and maybe just move them to tracepoints
> > > > > buffer
> > > > > ? for those who would want to really debug .. 
> > > > > 
> > > > > we already have some tracepoints implemented for en_tc.c 
> > > > > mlx5/core/diag/en_tc_tracepoints.c, maybe we should define a
> > > > > tracepoint
> > > > > for error reporting .. 
> > > > 
> > > > That, or s/netdev_warn/netdev_dbg/, but both are more hidden to
> > > > the
> > > > user than the _once.
> > > > 
> > > 
> > > i don't see any reason to pollute kernel log with debug messages
> > > when
> > > we have tracepoint buffer for en_tc .. 
> > 
> > So we're agreeing that these need to be changed. Good.
> 
> I would like to wait for the feedback from the CC'ed mlnx TC
> developers..
> 
> I just pinged them, lets see what they think.
> 
> but i totally agree, TC can support 100k offloads requests per seconds,
> dumping every possible issue to the kernel log shouldn't be an option,this is not a boot or a fatal error/warning .. 
> 
> > 
> > I don't think a sysadmin would be using tracepoints for
> > troubleshooting this, but okay. My only objective here is exactly
> > what
> > you said, to not pollute kernel log too much with these potentially
> > repetitive messages.
> 
> these types of errors are easily reproduce-able, a sysadmin can see and
> report the errno and the extack message, and in case it is really
> required, the support or development team can ask to turn on trace-
> points or debug and reproduce .. 

Roger that, thanks Saeed.
Roi Dayan April 12, 2020, 8:14 a.m. UTC | #7
On 2020-04-09 1:42 AM, marcelo.leitner@gmail.com wrote:
> On Wed, Apr 08, 2020 at 10:38:52PM +0000, Saeed Mahameed wrote:
>> On Wed, 2020-04-08 at 18:54 -0300, marcelo.leitner@gmail.com wrote:
>>> On Wed, Apr 08, 2020 at 07:51:22PM +0000, Saeed Mahameed wrote:
>>> ...
>>>>>> I understand it is for debug only but i strongly suggest to not
>>>>>> totally
>>>>>> suppress these messages and maybe just move them to tracepoints
>>>>>> buffer
>>>>>> ? for those who would want to really debug .. 
>>>>>>
>>>>>> we already have some tracepoints implemented for en_tc.c 
>>>>>> mlx5/core/diag/en_tc_tracepoints.c, maybe we should define a
>>>>>> tracepoint
>>>>>> for error reporting .. 
>>>>>
>>>>> That, or s/netdev_warn/netdev_dbg/, but both are more hidden to
>>>>> the
>>>>> user than the _once.
>>>>>
>>>>
>>>> i don't see any reason to pollute kernel log with debug messages
>>>> when
>>>> we have tracepoint buffer for en_tc .. 
>>>
>>> So we're agreeing that these need to be changed. Good.
>>
>> I would like to wait for the feedback from the CC'ed mlnx TC
>> developers..
>>
>> I just pinged them, lets see what they think.
>>
>> but i totally agree, TC can support 100k offloads requests per seconds,
>> dumping every possible issue to the kernel log shouldn't be an option,this is not a boot or a fatal error/warning .. 
>>
>>>
>>> I don't think a sysadmin would be using tracepoints for
>>> troubleshooting this, but okay. My only objective here is exactly
>>> what
>>> you said, to not pollute kernel log too much with these potentially
>>> repetitive messages.
>>
>> these types of errors are easily reproduce-able, a sysadmin can see and
>> report the errno and the extack message, and in case it is really
>> required, the support or development team can ask to turn on trace-
>> points or debug and reproduce .. 
> 
> Roger that, thanks Saeed.
> 

Hi Marcelo,

I was somewhat in favor for *_once when first read it, without starting to
enable probe stuff but I guess *_once will become redundant pretty quick.
another option is debugging with tc verbose flag but ovs doesn't support
logging extack errors today.
checking ovs issues on a large system with many bridges/ports/rules without
the tc/driver errors in the log will be very difficult.
in some places we already only use extack without netdev_warn.
so currently in favor in removing the other logs if extack error exists to
avoid flooding the log each time ovs age out an unsupported rule and re-adds it.
i'm also in favor for the trace points to ease debug.

Thanks,
Roi
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 438128dde187d7ec58892c2879c6037f807f576f..1182fba3edbb8cf7bd59557b7ece18765c704186 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1828,8 +1828,8 @@  enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
 				       opt->length * 4)) {
 				NL_SET_ERR_MSG(extack,
 					       "Partial match of tunnel options in chain > 0 isn't supported");
-				netdev_warn(priv->netdev,
-					    "Partial match of tunnel options in chain > 0 isn't supported");
+				netdev_warn_once(priv->netdev,
+						 "Partial match of tunnel options in chain > 0 isn't supported");
 				return -EOPNOTSUPP;
 			}
 		}
@@ -1988,8 +1988,8 @@  static int parse_tunnel_attr(struct mlx5e_priv *priv,
 	    !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
 		NL_SET_ERR_MSG(extack,
 			       "Chains on tunnel devices isn't supported without register loopback support");
-		netdev_warn(priv->netdev,
-			    "Chains on tunnel devices isn't supported without register loopback support");
+		netdev_warn_once(priv->netdev,
+				 "Chains on tunnel devices isn't supported without register loopback support");
 		return -EOPNOTSUPP;
 	}
 
@@ -2133,8 +2133,8 @@  static int __parse_cls_flower(struct mlx5e_priv *priv,
 	      BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
 	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
 		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
-		netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
-			    dissector->used_keys);
+		netdev_warn_once(priv->netdev, "Unsupported key used: 0x%x\n",
+				 dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
@@ -2484,8 +2484,8 @@  static int parse_cls_flower(struct mlx5e_priv *priv,
 		    esw->offloads.inline_mode < non_tunnel_match_level)) {
 			NL_SET_ERR_MSG_MOD(extack,
 					   "Flow is not offloaded due to min inline setting");
-			netdev_warn(priv->netdev,
-				    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
+			netdev_warn_once(priv->netdev,
+					 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
 				    non_tunnel_match_level, esw->offloads.inline_mode);
 			return -EOPNOTSUPP;
 		}
@@ -2885,7 +2885,9 @@  static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
 		if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
 			NL_SET_ERR_MSG_MOD(extack,
 					   "attempt to offload an unsupported field");
-			netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
+			netdev_warn_once(priv->netdev,
+					 "attempt to offload an unsupported field (cmd %d)\n",
+					 cmd);
 			print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
 				       16, 1, cmd_masks, sizeof(zero_masks), true);
 			err = -EOPNOTSUPP;
@@ -2912,17 +2914,17 @@  static bool csum_offload_supported(struct mlx5e_priv *priv,
 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "TC csum action is only offloaded with pedit");
-		netdev_warn(priv->netdev,
-			    "TC csum action is only offloaded with pedit\n");
+		netdev_warn_once(priv->netdev,
+				 "TC csum action is only offloaded with pedit\n");
 		return false;
 	}
 
 	if (update_flags & ~prot_flags) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "can't offload TC csum action for some header/s");
-		netdev_warn(priv->netdev,
-			    "can't offload TC csum action for some header/s - flags %#x\n",
-			    update_flags);
+		netdev_warn_once(priv->netdev,
+				 "can't offload TC csum action for some header/s - flags %#x\n",
+				 update_flags);
 		return false;
 	}
 
@@ -3224,8 +3226,9 @@  static int parse_tc_nic_actions(struct mlx5e_priv *priv,
 			} else {
 				NL_SET_ERR_MSG_MOD(extack,
 						   "device is not on same HW, can't offload");
-				netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
-					    peer_dev->name);
+				netdev_warn_once(priv->netdev,
+						 "device %s not on same HW, can't offload\n",
+						 peer_dev->name);
 				return -EINVAL;
 			}
 			}
@@ -3754,9 +3757,9 @@  static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
 				NL_SET_ERR_MSG_MOD(extack,
 						   "can't support more output ports, can't offload forwarding");
-				netdev_warn(priv->netdev,
-					    "can't support more than %d output ports, can't offload forwarding\n",
-					    attr->out_count);
+				netdev_warn_once(priv->netdev,
+						 "can't support more than %d output ports, can't offload forwarding\n",
+						 attr->out_count);
 				return -EOPNOTSUPP;
 			}
 
@@ -3821,10 +3824,10 @@  static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 				if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
 					NL_SET_ERR_MSG_MOD(extack,
 							   "devices are not on same switch HW, can't offload forwarding");
-					netdev_warn(priv->netdev,
-						    "devices %s %s not on same switch HW, can't offload forwarding\n",
-						    priv->netdev->name,
-						    out_dev->name);
+					netdev_warn_once(priv->netdev,
+							 "devices %s %s not on same switch HW, can't offload forwarding\n",
+							 priv->netdev->name,
+							 out_dev->name);
 					return -EOPNOTSUPP;
 				}
 
@@ -3843,10 +3846,10 @@  static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			} else {
 				NL_SET_ERR_MSG_MOD(extack,
 						   "devices are not on same switch HW, can't offload forwarding");
-				netdev_warn(priv->netdev,
-					    "devices %s %s not on same switch HW, can't offload forwarding\n",
-					    priv->netdev->name,
-					    out_dev->name);
+				netdev_warn_once(priv->netdev,
+						 "devices %s %s not on same switch HW, can't offload forwarding\n",
+						 priv->netdev->name,
+						 out_dev->name);
 				return -EINVAL;
 			}
 			}
@@ -3959,8 +3962,8 @@  static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 
 			NL_SET_ERR_MSG(extack,
 				       "Decap with goto isn't supported");
-			netdev_warn(priv->netdev,
-				    "Decap with goto isn't supported");
+			netdev_warn_once(priv->netdev,
+					 "Decap with goto isn't supported");
 			return -EOPNOTSUPP;
 		}