diff mbox series

[RFC,net-next,v2,5/5] net:openvswitch: add psample support

Message ID 20240408125753.470419-6-amorenoz@redhat.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series net: openvswitch: Add sample multicasting. | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 942 this patch: 942
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers fail 5 maintainers not CCed: kuba@kernel.org pabeni@redhat.com edumazet@google.com dev@openvswitch.org pshelar@ovn.org
netdev/build_clang success Errors and warnings before: 953 this patch: 953
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 953 this patch: 953
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 46 this patch: 46
netdev/source_inline success Was 0 now: 0

Commit Message

Adrian Moreno April 8, 2024, 12:57 p.m. UTC
Add a new attribute to the sample action, called
OVS_SAMPLE_ATTR_PSAMPLE to allow userspace to pass a group_id and a
user-defined cookie.

The maximum length of the user-defined cookie is set to 16, same as
tc_cookie to discourage using cookies that will not be offloadable.

When set, the sample action will use psample to multicast the sample.

Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
---
 include/uapi/linux/openvswitch.h | 22 +++++++--
 net/openvswitch/actions.c        | 52 ++++++++++++++++++---
 net/openvswitch/datapath.c       |  2 +-
 net/openvswitch/flow_netlink.c   | 78 +++++++++++++++++++++++++-------
 4 files changed, 127 insertions(+), 27 deletions(-)

Comments

Ilya Maximets April 8, 2024, 1:37 p.m. UTC | #1
On 4/8/24 14:57, Adrian Moreno wrote:
> Add a new attribute to the sample action, called
> OVS_SAMPLE_ATTR_PSAMPLE to allow userspace to pass a group_id and a
> user-defined cookie.
> 
> The maximum length of the user-defined cookie is set to 16, same as
> tc_cookie to discourage using cookies that will not be offloadable.
> 
> When set, the sample action will use psample to multicast the sample.
> 
> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
> ---
>  include/uapi/linux/openvswitch.h | 22 +++++++--
>  net/openvswitch/actions.c        | 52 ++++++++++++++++++---
>  net/openvswitch/datapath.c       |  2 +-
>  net/openvswitch/flow_netlink.c   | 78 +++++++++++++++++++++++++-------
>  4 files changed, 127 insertions(+), 27 deletions(-)

This cpatch is missing a few bits:
 - Update for Documentation/netlink/specs/ovs_flow.yaml
 - Maybe update for tools/testing/selftests/net/openvswitch/ovs-dpctl.py
 - Maybe some basic selftests.

> 
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index efc82c318fa2..a5a32588f582 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -646,15 +646,24 @@ enum ovs_flow_attr {
>   * %UINT32_MAX samples all packets and intermediate values sample intermediate
>   * fractions of packets.
>   * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
> - * Actions are passed as nested attributes.
> + * Actions are passed as nested attributes. Optional if OVS_SAMPLE_ATTR_PSAMPLE
> + * is not set.

'is set' probably.

> + * @OVS_SAMPLE_ATTR_PSAMPLE: Arguments to be passed to psample. Optional if
> + * OVS_SAMPLE_ATTR_ACTIONS is not set.

Same here.

>   *
> - * Executes the specified actions with the given probability on a per-packet
> - * basis.
> + * Either OVS_SAMPLE_ATTR_USER_COOKIE or OVS_SAMPLE_ATTR_USER_COOKIE must be
> + * specified.
> + *
> + * Executes the specified actions and/or sends the packet to psample
> + * with the given probability on a per-packet basis.
>   */
>  enum ovs_sample_attr {
>  	OVS_SAMPLE_ATTR_UNSPEC,
>  	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>  	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
> +	OVS_SAMPLE_ATTR_PSAMPLE,     /* struct ovs_psample followed
> +				      * by the user-provided cookie.
> +				      */
>  	__OVS_SAMPLE_ATTR_MAX,
>  
>  #ifdef __KERNEL__
> @@ -675,6 +684,13 @@ struct sample_arg {
>  };
>  #endif
>  
> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
> +struct ovs_psample {
> +	__u32 group_id;		/* The group used for packet sampling. */
> +	__u32 user_cookie_len;	/* The length of the user-provided cookie. */
> +	__u8 user_cookie[];	/* The user-provided cookie. */
> +};

Structures are not a good approach for modern netlink.
use nested attributes instead.  This way we can also
eliminate the need for variable-length array and the
length field, if the length can be taken from a netlink
attribute directly, e.g. similar to NLA_BINARY in tc.

If necessary, there could be a structure in the private
header to store the data for internal use.

> +
>  /**
>   * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
>   * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index 6fcd7e2ca81f..45d2b325b76a 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -24,6 +24,7 @@
>  #include <net/checksum.h>
>  #include <net/dsfield.h>
>  #include <net/mpls.h>
> +#include <net/psample.h>
>  #include <net/sctp/checksum.h>
>  
>  #include "datapath.h"
> @@ -1025,6 +1026,31 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>  	return 0;
>  }
>  
> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
> +			      struct ovs_psample *psample, struct sk_buff *skb,
> +			      u32 rate)
> +{
> +	struct psample_group psample_group = {};
> +	struct psample_metadata md = {};
> +	struct vport *input_vport;
> +
> +	psample_group.group_num = psample->group_id;
> +	psample_group.net = ovs_dp_get_net(dp);
> +
> +	input_vport = ovs_vport_rcu(dp, key->phy.in_port);
> +	if (!input_vport)
> +		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
> +
> +	md.in_ifindex = input_vport->dev->ifindex;
> +	md.user_cookie = psample->user_cookie;
> +	md.user_cookie_len = psample->user_cookie_len;
> +	md.trunc_size = skb->len;
> +
> +	psample_sample_packet(&psample_group, skb, rate, &md);
> +
> +	return 0;
> +}
> +
>  /* When 'last' is true, sample() should always consume the 'skb'.
>   * Otherwise, sample() should keep 'skb' intact regardless what
>   * actions are executed within sample().
> @@ -1033,16 +1059,17 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>  		  struct sw_flow_key *key, const struct nlattr *attr,
>  		  bool last)
>  {
> -	struct nlattr *actions;
> +	const struct sample_arg *arg;
>  	struct nlattr *sample_arg;
>  	int rem = nla_len(attr);
> -	const struct sample_arg *arg;
> +	struct nlattr *next;
>  	bool clone_flow_key;
> +	int ret;
>  
>  	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>  	sample_arg = nla_data(attr);
>  	arg = nla_data(sample_arg);
> -	actions = nla_next(sample_arg, &rem);
> +	next = nla_next(sample_arg, &rem);
>  
>  	if ((arg->probability != U32_MAX) &&
>  	    (!arg->probability || get_random_u32() > arg->probability)) {
> @@ -1051,9 +1078,22 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>  		return 0;
>  	}
>  
> -	clone_flow_key = !arg->exec;
> -	return clone_execute(dp, skb, key, 0, actions, rem, last,
> -			     clone_flow_key);
> +	if (next->nla_type == OVS_SAMPLE_ATTR_PSAMPLE) {

Maybe add a commnet that OVS_SAMPLE_ATTR_PSAMPLE is always a sencond
argument when present.

Is there a better way to handle this?

> +		ret = ovs_psample_packet(dp, key, nla_data(next), skb,
> +					 arg->probability);
> +		if (last)
> +			ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
> +		if (ret)
> +			return ret;
> +		next = nla_next(next, &rem);
> +	}
> +
> +	if (nla_ok(next, rem)) {
> +		clone_flow_key = !arg->exec;
> +		ret = clone_execute(dp, skb, key, 0, next, rem, last,
> +				    clone_flow_key);
> +	}
> +	return ret;
>  }
>  
>  /* When 'last' is true, clone() should always consume the 'skb'.
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index 99d72543abd3..b5b560c2e74b 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -976,7 +976,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	struct sw_flow_match match;
>  	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
>  	int error;
> -	bool log = !a[OVS_FLOW_ATTR_PROBE];
> +	bool log = true;

Debugging artifact?

>  
>  	/* Must have key and actions. */
>  	error = -EINVAL;
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index f224d9bcea5e..f540686271b7 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -2381,8 +2381,12 @@ static void ovs_nla_free_sample_action(const struct nlattr *action)
>  
>  	switch (nla_type(a)) {
>  	case OVS_SAMPLE_ATTR_ARG:
> -		/* The real list of actions follows this attribute. */

Please, don't remove this comment.  Maybe extend it instead.

>  		a = nla_next(a, &rem);
> +
> +		/* OVS_SAMPLE_ATTR_PSAMPLE may be present. */
> +		if (nla_type(a) == OVS_SAMPLE_ATTR_PSAMPLE)
> +			a = nla_next(a, &rem);
> +
>  		ovs_nla_free_nested_actions(a, rem);
>  		break;
>  	}
> @@ -2561,6 +2565,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>  				  u32 mpls_label_count, bool log,
>  				  u32 depth);
>  
> +static int copy_action(const struct nlattr *from,
> +		       struct sw_flow_actions **sfa, bool log);
> +
>  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>  				    const struct sw_flow_key *key,
>  				    struct sw_flow_actions **sfa,
> @@ -2569,10 +2576,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>  				    u32 depth)
>  {
>  	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
> -	const struct nlattr *probability, *actions;
> +	const struct nlattr *probability, *actions, *psample;
>  	const struct nlattr *a;
> -	int rem, start, err;
>  	struct sample_arg arg;
> +	int rem, start, err;
>  
>  	memset(attrs, 0, sizeof(attrs));
>  	nla_for_each_nested(a, attr, rem) {
> @@ -2589,7 +2596,23 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>  		return -EINVAL;
>  
>  	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
> -	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
> +	if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
> +		return -EINVAL;
> +
> +	psample = attrs[OVS_SAMPLE_ATTR_PSAMPLE];
> +	if (psample) {
> +		struct ovs_psample *ovs_ps;
> +
> +		if (!nla_len(psample) || nla_len(psample) < sizeof(*ovs_ps))
> +			return -EINVAL;
> +
> +		ovs_ps = nla_data(psample);
> +		if (ovs_ps->user_cookie_len > OVS_PSAMPLE_COOKIE_MAX_SIZE ||
> +		    nla_len(psample) != sizeof(*ovs_ps) + ovs_ps->user_cookie_len)
> +			return -EINVAL;
> +	}
> +
> +	if (!psample && !actions)
>  		return -EINVAL;
>  
>  	/* validation done, copy sample action. */
> @@ -2608,7 +2631,9 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>  	 * If the sample is the last action, it can always be excuted
>  	 * rather than deferred.
>  	 */
> -	arg.exec = last || !actions_may_change_flow(actions);
> +	if (actions)
> +		arg.exec = last || !actions_may_change_flow(actions);

'arg.exec' will remain uninitialized.

> +
>  	arg.probability = nla_get_u32(probability);
>  
>  	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
> @@ -2616,10 +2641,17 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>  	if (err)
>  		return err;
>  
> -	err = __ovs_nla_copy_actions(net, actions, key, sfa,
> -				     eth_type, vlan_tci, mpls_label_count, log,
> -				     depth + 1);
> +	if (psample)
> +		err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PSAMPLE,
> +					 nla_data(psample), nla_len(psample),
> +					 log);
> +	if (err)

Can be used uninitialized.

> +		return err;
>  
> +	if (actions)
> +		err = __ovs_nla_copy_actions(net, actions, key, sfa,
> +					     eth_type, vlan_tci,
> +					     mpls_label_count, log, depth + 1);
>  	if (err)
>  		return err;
>  
> @@ -3538,7 +3570,7 @@ static int sample_action_to_attr(const struct nlattr *attr,
>  	struct nlattr *start, *ac_start = NULL, *sample_arg;
>  	int err = 0, rem = nla_len(attr);
>  	const struct sample_arg *arg;
> -	struct nlattr *actions;
> +	struct nlattr *next;
>  
>  	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE);
>  	if (!start)
> @@ -3546,27 +3578,39 @@ static int sample_action_to_attr(const struct nlattr *attr,
>  
>  	sample_arg = nla_data(attr);
>  	arg = nla_data(sample_arg);
> -	actions = nla_next(sample_arg, &rem);
> +	next = nla_next(sample_arg, &rem);
>  
>  	if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
>  		err = -EMSGSIZE;
>  		goto out;
>  	}
>  
> -	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
> -	if (!ac_start) {
> -		err = -EMSGSIZE;
> -		goto out;
> +	if (nla_type(next) == OVS_SAMPLE_ATTR_PSAMPLE) {
> +		if (nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE, nla_len(next),
> +			    nla_data(next))) {
> +			err = -EMSGSIZE;
> +			goto out;
> +		}
> +		next = nla_next(next, &rem);
>  	}
>  
> -	err = ovs_nla_put_actions(actions, rem, skb);
> +	if (nla_ok(next, rem)) {
> +		ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
> +		if (!ac_start) {
> +			err = -EMSGSIZE;
> +			goto out;
> +		}
> +		err = ovs_nla_put_actions(next, rem, skb);
> +	}
>  
>  out:
>  	if (err) {
> -		nla_nest_cancel(skb, ac_start);
> +		if (ac_start)
> +			nla_nest_cancel(skb, ac_start);
>  		nla_nest_cancel(skb, start);
>  	} else {
> -		nla_nest_end(skb, ac_start);
> +		if (ac_start)
> +			nla_nest_end(skb, ac_start);
>  		nla_nest_end(skb, start);
>  	}
>
Adrian Moreno April 8, 2024, 7:48 p.m. UTC | #2
On 4/8/24 15:37, Ilya Maximets wrote:
> On 4/8/24 14:57, Adrian Moreno wrote:
>> Add a new attribute to the sample action, called
>> OVS_SAMPLE_ATTR_PSAMPLE to allow userspace to pass a group_id and a
>> user-defined cookie.
>>
>> The maximum length of the user-defined cookie is set to 16, same as
>> tc_cookie to discourage using cookies that will not be offloadable.
>>
>> When set, the sample action will use psample to multicast the sample.
>>
>> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
>> ---
>>   include/uapi/linux/openvswitch.h | 22 +++++++--
>>   net/openvswitch/actions.c        | 52 ++++++++++++++++++---
>>   net/openvswitch/datapath.c       |  2 +-
>>   net/openvswitch/flow_netlink.c   | 78 +++++++++++++++++++++++++-------
>>   4 files changed, 127 insertions(+), 27 deletions(-)
> 
> This cpatch is missing a few bits:
>   - Update for Documentation/netlink/specs/ovs_flow.yaml
>   - Maybe update for tools/testing/selftests/net/openvswitch/ovs-dpctl.py
>   - Maybe some basic selftests.
> 

Absolutely. I surely plan to add it on the first non-rfc version.

>>
>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
>> index efc82c318fa2..a5a32588f582 100644
>> --- a/include/uapi/linux/openvswitch.h
>> +++ b/include/uapi/linux/openvswitch.h
>> @@ -646,15 +646,24 @@ enum ovs_flow_attr {
>>    * %UINT32_MAX samples all packets and intermediate values sample intermediate
>>    * fractions of packets.
>>    * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
>> - * Actions are passed as nested attributes.
>> + * Actions are passed as nested attributes. Optional if OVS_SAMPLE_ATTR_PSAMPLE
>> + * is not set.
> 
> 'is set' probably. >
>> + * @OVS_SAMPLE_ATTR_PSAMPLE: Arguments to be passed to psample. Optional if
>> + * OVS_SAMPLE_ATTR_ACTIONS is not set.
> 
> Same here.
>

Good catch, I rewrote those comments a bunch of times and the were left 
expressing the exact opposite!


>>    *
>> - * Executes the specified actions with the given probability on a per-packet
>> - * basis.
>> + * Either OVS_SAMPLE_ATTR_USER_COOKIE or OVS_SAMPLE_ATTR_USER_COOKIE must be
>> + * specified.
>> + *
>> + * Executes the specified actions and/or sends the packet to psample
>> + * with the given probability on a per-packet basis.
>>    */
>>   enum ovs_sample_attr {
>>   	OVS_SAMPLE_ATTR_UNSPEC,
>>   	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>>   	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
>> +	OVS_SAMPLE_ATTR_PSAMPLE,     /* struct ovs_psample followed
>> +				      * by the user-provided cookie.
>> +				      */
>>   	__OVS_SAMPLE_ATTR_MAX,
>>   
>>   #ifdef __KERNEL__
>> @@ -675,6 +684,13 @@ struct sample_arg {
>>   };
>>   #endif
>>   
>> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
>> +struct ovs_psample {
>> +	__u32 group_id;		/* The group used for packet sampling. */
>> +	__u32 user_cookie_len;	/* The length of the user-provided cookie. */
>> +	__u8 user_cookie[];	/* The user-provided cookie. */
>> +};
> 
> Structures are not a good approach for modern netlink.
> use nested attributes instead.  This way we can also
> eliminate the need for variable-length array and the
> length field, if the length can be taken from a netlink
> attribute directly, e.g. similar to NLA_BINARY in tc.
> 
> If necessary, there could be a structure in the private
> header to store the data for internal use.
> 

Interesting. Thanks for the suggestion. I think it will also help action validation.

>> +
>>   /**
>>    * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
>>    * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>> index 6fcd7e2ca81f..45d2b325b76a 100644
>> --- a/net/openvswitch/actions.c
>> +++ b/net/openvswitch/actions.c
>> @@ -24,6 +24,7 @@
>>   #include <net/checksum.h>
>>   #include <net/dsfield.h>
>>   #include <net/mpls.h>
>> +#include <net/psample.h>
>>   #include <net/sctp/checksum.h>
>>   
>>   #include "datapath.h"
>> @@ -1025,6 +1026,31 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>>   	return 0;
>>   }
>>   
>> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
>> +			      struct ovs_psample *psample, struct sk_buff *skb,
>> +			      u32 rate)
>> +{
>> +	struct psample_group psample_group = {};
>> +	struct psample_metadata md = {};
>> +	struct vport *input_vport;
>> +
>> +	psample_group.group_num = psample->group_id;
>> +	psample_group.net = ovs_dp_get_net(dp);
>> +
>> +	input_vport = ovs_vport_rcu(dp, key->phy.in_port);
>> +	if (!input_vport)
>> +		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
>> +
>> +	md.in_ifindex = input_vport->dev->ifindex;
>> +	md.user_cookie = psample->user_cookie;
>> +	md.user_cookie_len = psample->user_cookie_len;
>> +	md.trunc_size = skb->len;
>> +
>> +	psample_sample_packet(&psample_group, skb, rate, &md);
>> +
>> +	return 0;
>> +}
>> +
>>   /* When 'last' is true, sample() should always consume the 'skb'.
>>    * Otherwise, sample() should keep 'skb' intact regardless what
>>    * actions are executed within sample().
>> @@ -1033,16 +1059,17 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>   		  struct sw_flow_key *key, const struct nlattr *attr,
>>   		  bool last)
>>   {
>> -	struct nlattr *actions;
>> +	const struct sample_arg *arg;
>>   	struct nlattr *sample_arg;
>>   	int rem = nla_len(attr);
>> -	const struct sample_arg *arg;
>> +	struct nlattr *next;
>>   	bool clone_flow_key;
>> +	int ret;
>>   
>>   	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>>   	sample_arg = nla_data(attr);
>>   	arg = nla_data(sample_arg);
>> -	actions = nla_next(sample_arg, &rem);
>> +	next = nla_next(sample_arg, &rem);
>>   
>>   	if ((arg->probability != U32_MAX) &&
>>   	    (!arg->probability || get_random_u32() > arg->probability)) {
>> @@ -1051,9 +1078,22 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>   		return 0;
>>   	}
>>   
>> -	clone_flow_key = !arg->exec;
>> -	return clone_execute(dp, skb, key, 0, actions, rem, last,
>> -			     clone_flow_key);
>> +	if (next->nla_type == OVS_SAMPLE_ATTR_PSAMPLE) {
> 
> Maybe add a commnet that OVS_SAMPLE_ATTR_PSAMPLE is always a sencond
> argument when present.
> 
> Is there a better way to handle this?
> 

I also dislike it. The fact that actions are not nested but concatenated to 
makes the internal representation a bit flaky.

The alternative I considered was adding the group_id and cookie to the 
internal-only OVS_SAMPLE_ATTR_ARG. However, now I wonder:
- Should we also use nested attributes instead of structs for this internal one?

And, probably off-topic: what's the story behind using netlink attributes to 
store action arguments internally? Has it ever been discussed using a union for 
instance?

>> +		ret = ovs_psample_packet(dp, key, nla_data(next), skb,
>> +					 arg->probability);
>> +		if (last)
>> +			ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
>> +		if (ret)
>> +			return ret;
>> +		next = nla_next(next, &rem);
>> +	}
>> +
>> +	if (nla_ok(next, rem)) {
>> +		clone_flow_key = !arg->exec;
>> +		ret = clone_execute(dp, skb, key, 0, next, rem, last,
>> +				    clone_flow_key);
>> +	}
>> +	return ret;
>>   }
>>   
>>   /* When 'last' is true, clone() should always consume the 'skb'.
>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>> index 99d72543abd3..b5b560c2e74b 100644
>> --- a/net/openvswitch/datapath.c
>> +++ b/net/openvswitch/datapath.c
>> @@ -976,7 +976,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>   	struct sw_flow_match match;
>>   	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
>>   	int error;
>> -	bool log = !a[OVS_FLOW_ATTR_PROBE];
>> +	bool log = true;
> 
> Debugging artifact?
> 

Yep, sorry.

>>   
>>   	/* Must have key and actions. */
>>   	error = -EINVAL;
>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>> index f224d9bcea5e..f540686271b7 100644
>> --- a/net/openvswitch/flow_netlink.c
>> +++ b/net/openvswitch/flow_netlink.c
>> @@ -2381,8 +2381,12 @@ static void ovs_nla_free_sample_action(const struct nlattr *action)
>>   
>>   	switch (nla_type(a)) {
>>   	case OVS_SAMPLE_ATTR_ARG:
>> -		/* The real list of actions follows this attribute. */
> 
> Please, don't remove this comment.  Maybe extend it instead.
> 
>>   		a = nla_next(a, &rem);
>> +
>> +		/* OVS_SAMPLE_ATTR_PSAMPLE may be present. */
>> +		if (nla_type(a) == OVS_SAMPLE_ATTR_PSAMPLE)
>> +			a = nla_next(a, &rem);
>> +
>>   		ovs_nla_free_nested_actions(a, rem);
>>   		break;
>>   	}
>> @@ -2561,6 +2565,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>>   				  u32 mpls_label_count, bool log,
>>   				  u32 depth);
>>   
>> +static int copy_action(const struct nlattr *from,
>> +		       struct sw_flow_actions **sfa, bool log);
>> +
>>   static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>   				    const struct sw_flow_key *key,
>>   				    struct sw_flow_actions **sfa,
>> @@ -2569,10 +2576,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>   				    u32 depth)
>>   {
>>   	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
>> -	const struct nlattr *probability, *actions;
>> +	const struct nlattr *probability, *actions, *psample;
>>   	const struct nlattr *a;
>> -	int rem, start, err;
>>   	struct sample_arg arg;
>> +	int rem, start, err;
>>   
>>   	memset(attrs, 0, sizeof(attrs));
>>   	nla_for_each_nested(a, attr, rem) {
>> @@ -2589,7 +2596,23 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>   		return -EINVAL;
>>   
>>   	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
>> -	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
>> +	if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
>> +		return -EINVAL;
>> +
>> +	psample = attrs[OVS_SAMPLE_ATTR_PSAMPLE];
>> +	if (psample) {
>> +		struct ovs_psample *ovs_ps;
>> +
>> +		if (!nla_len(psample) || nla_len(psample) < sizeof(*ovs_ps))
>> +			return -EINVAL;
>> +
>> +		ovs_ps = nla_data(psample);
>> +		if (ovs_ps->user_cookie_len > OVS_PSAMPLE_COOKIE_MAX_SIZE ||
>> +		    nla_len(psample) != sizeof(*ovs_ps) + ovs_ps->user_cookie_len)
>> +			return -EINVAL;
>> +	}
>> +
>> +	if (!psample && !actions)
>>   		return -EINVAL;
>>   
>>   	/* validation done, copy sample action. */
>> @@ -2608,7 +2631,9 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>   	 * If the sample is the last action, it can always be excuted
>>   	 * rather than deferred.
>>   	 */
>> -	arg.exec = last || !actions_may_change_flow(actions);
>> +	if (actions)
>> +		arg.exec = last || !actions_may_change_flow(actions);
> 
> 'arg.exec' will remain uninitialized.
> 

Yes. I just wanted to avoid the call to actions_may_change_flow(NULL) in which 
case arg.exec is not used. I'll make sure to zero-initialize it nevertheless.

>> +
>>   	arg.probability = nla_get_u32(probability);
>>   
>>   	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
>> @@ -2616,10 +2641,17 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>   	if (err)
>>   		return err;
>>   
>> -	err = __ovs_nla_copy_actions(net, actions, key, sfa,
>> -				     eth_type, vlan_tci, mpls_label_count, log,
>> -				     depth + 1);
>> +	if (psample)
>> +		err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PSAMPLE,
>> +					 nla_data(psample), nla_len(psample),
>> +					 log);
>> +	if (err)
> 
> Can be used uninitialized.
> 

You're right, it should be inside the if above, although err should have been 
initialized to output of ovs_nla_add_action.

>> +		return err;
>>   
>> +	if (actions)
>> +		err = __ovs_nla_copy_actions(net, actions, key, sfa,
>> +					     eth_type, vlan_tci,
>> +					     mpls_label_count, log, depth + 1);
>>   	if (err)
>>   		return err;
>>   
>> @@ -3538,7 +3570,7 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>   	struct nlattr *start, *ac_start = NULL, *sample_arg;
>>   	int err = 0, rem = nla_len(attr);
>>   	const struct sample_arg *arg;
>> -	struct nlattr *actions;
>> +	struct nlattr *next;
>>   
>>   	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE);
>>   	if (!start)
>> @@ -3546,27 +3578,39 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>   
>>   	sample_arg = nla_data(attr);
>>   	arg = nla_data(sample_arg);
>> -	actions = nla_next(sample_arg, &rem);
>> +	next = nla_next(sample_arg, &rem);
>>   
>>   	if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
>>   		err = -EMSGSIZE;
>>   		goto out;
>>   	}
>>   
>> -	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>> -	if (!ac_start) {
>> -		err = -EMSGSIZE;
>> -		goto out;
>> +	if (nla_type(next) == OVS_SAMPLE_ATTR_PSAMPLE) {
>> +		if (nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE, nla_len(next),
>> +			    nla_data(next))) {
>> +			err = -EMSGSIZE;
>> +			goto out;
>> +		}
>> +		next = nla_next(next, &rem);
>>   	}
>>   
>> -	err = ovs_nla_put_actions(actions, rem, skb);
>> +	if (nla_ok(next, rem)) {
>> +		ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>> +		if (!ac_start) {
>> +			err = -EMSGSIZE;
>> +			goto out;
>> +		}
>> +		err = ovs_nla_put_actions(next, rem, skb);
>> +	}
>>   
>>   out:
>>   	if (err) {
>> -		nla_nest_cancel(skb, ac_start);
>> +		if (ac_start)
>> +			nla_nest_cancel(skb, ac_start);
>>   		nla_nest_cancel(skb, start);
>>   	} else {
>> -		nla_nest_end(skb, ac_start);
>> +		if (ac_start)
>> +			nla_nest_end(skb, ac_start);
>>   		nla_nest_end(skb, start);
>>   	}
>>   
>
Ilya Maximets April 8, 2024, 8:40 p.m. UTC | #3
On 4/8/24 21:48, Adrian Moreno wrote:
> 
> 
> On 4/8/24 15:37, Ilya Maximets wrote:
>> On 4/8/24 14:57, Adrian Moreno wrote:
>>> Add a new attribute to the sample action, called
>>> OVS_SAMPLE_ATTR_PSAMPLE to allow userspace to pass a group_id and a
>>> user-defined cookie.
>>>
>>> The maximum length of the user-defined cookie is set to 16, same as
>>> tc_cookie to discourage using cookies that will not be offloadable.
>>>
>>> When set, the sample action will use psample to multicast the sample.
>>>
>>> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
>>> ---
>>>   include/uapi/linux/openvswitch.h | 22 +++++++--
>>>   net/openvswitch/actions.c        | 52 ++++++++++++++++++---
>>>   net/openvswitch/datapath.c       |  2 +-
>>>   net/openvswitch/flow_netlink.c   | 78 +++++++++++++++++++++++++-------
>>>   4 files changed, 127 insertions(+), 27 deletions(-)
>>
>> This cpatch is missing a few bits:
>>   - Update for Documentation/netlink/specs/ovs_flow.yaml
>>   - Maybe update for tools/testing/selftests/net/openvswitch/ovs-dpctl.py
>>   - Maybe some basic selftests.
>>
> 
> Absolutely. I surely plan to add it on the first non-rfc version.
> 
>>>
>>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
>>> index efc82c318fa2..a5a32588f582 100644
>>> --- a/include/uapi/linux/openvswitch.h
>>> +++ b/include/uapi/linux/openvswitch.h
>>> @@ -646,15 +646,24 @@ enum ovs_flow_attr {
>>>    * %UINT32_MAX samples all packets and intermediate values sample intermediate
>>>    * fractions of packets.
>>>    * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
>>> - * Actions are passed as nested attributes.
>>> + * Actions are passed as nested attributes. Optional if OVS_SAMPLE_ATTR_PSAMPLE
>>> + * is not set.
>>
>> 'is set' probably. >
>>> + * @OVS_SAMPLE_ATTR_PSAMPLE: Arguments to be passed to psample. Optional if
>>> + * OVS_SAMPLE_ATTR_ACTIONS is not set.
>>
>> Same here.
>>
> 
> Good catch, I rewrote those comments a bunch of times and the were left 
> expressing the exact opposite!
> 
> 
>>>    *
>>> - * Executes the specified actions with the given probability on a per-packet
>>> - * basis.
>>> + * Either OVS_SAMPLE_ATTR_USER_COOKIE or OVS_SAMPLE_ATTR_USER_COOKIE must be
>>> + * specified.
>>> + *
>>> + * Executes the specified actions and/or sends the packet to psample
>>> + * with the given probability on a per-packet basis.
>>>    */
>>>   enum ovs_sample_attr {
>>>   	OVS_SAMPLE_ATTR_UNSPEC,
>>>   	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>>>   	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
>>> +	OVS_SAMPLE_ATTR_PSAMPLE,     /* struct ovs_psample followed
>>> +				      * by the user-provided cookie.
>>> +				      */
>>>   	__OVS_SAMPLE_ATTR_MAX,
>>>   
>>>   #ifdef __KERNEL__
>>> @@ -675,6 +684,13 @@ struct sample_arg {
>>>   };
>>>   #endif
>>>   
>>> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
>>> +struct ovs_psample {
>>> +	__u32 group_id;		/* The group used for packet sampling. */
>>> +	__u32 user_cookie_len;	/* The length of the user-provided cookie. */

Here as well, not sure if u32 makes sense as userspace can't
actually supply a cookie this large via netlink.

>>> +	__u8 user_cookie[];	/* The user-provided cookie. */
>>> +};
>>
>> Structures are not a good approach for modern netlink.
>> use nested attributes instead.  This way we can also
>> eliminate the need for variable-length array and the
>> length field, if the length can be taken from a netlink
>> attribute directly, e.g. similar to NLA_BINARY in tc.
>>
>> If necessary, there could be a structure in the private
>> header to store the data for internal use.
>>
> 
> Interesting. Thanks for the suggestion. I think it will also help action validation.
> 
>>> +
>>>   /**
>>>    * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
>>>    * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
>>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>>> index 6fcd7e2ca81f..45d2b325b76a 100644
>>> --- a/net/openvswitch/actions.c
>>> +++ b/net/openvswitch/actions.c
>>> @@ -24,6 +24,7 @@
>>>   #include <net/checksum.h>
>>>   #include <net/dsfield.h>
>>>   #include <net/mpls.h>
>>> +#include <net/psample.h>
>>>   #include <net/sctp/checksum.h>
>>>   
>>>   #include "datapath.h"
>>> @@ -1025,6 +1026,31 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>>>   	return 0;
>>>   }
>>>   
>>> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
>>> +			      struct ovs_psample *psample, struct sk_buff *skb,
>>> +			      u32 rate)
>>> +{
>>> +	struct psample_group psample_group = {};
>>> +	struct psample_metadata md = {};
>>> +	struct vport *input_vport;
>>> +
>>> +	psample_group.group_num = psample->group_id;
>>> +	psample_group.net = ovs_dp_get_net(dp);
>>> +
>>> +	input_vport = ovs_vport_rcu(dp, key->phy.in_port);
>>> +	if (!input_vport)
>>> +		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
>>> +
>>> +	md.in_ifindex = input_vport->dev->ifindex;
>>> +	md.user_cookie = psample->user_cookie;
>>> +	md.user_cookie_len = psample->user_cookie_len;
>>> +	md.trunc_size = skb->len;
>>> +
>>> +	psample_sample_packet(&psample_group, skb, rate, &md);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>   /* When 'last' is true, sample() should always consume the 'skb'.
>>>    * Otherwise, sample() should keep 'skb' intact regardless what
>>>    * actions are executed within sample().
>>> @@ -1033,16 +1059,17 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>>   		  struct sw_flow_key *key, const struct nlattr *attr,
>>>   		  bool last)
>>>   {
>>> -	struct nlattr *actions;
>>> +	const struct sample_arg *arg;
>>>   	struct nlattr *sample_arg;
>>>   	int rem = nla_len(attr);
>>> -	const struct sample_arg *arg;
>>> +	struct nlattr *next;
>>>   	bool clone_flow_key;
>>> +	int ret;
>>>   
>>>   	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>>>   	sample_arg = nla_data(attr);
>>>   	arg = nla_data(sample_arg);
>>> -	actions = nla_next(sample_arg, &rem);
>>> +	next = nla_next(sample_arg, &rem);
>>>   
>>>   	if ((arg->probability != U32_MAX) &&
>>>   	    (!arg->probability || get_random_u32() > arg->probability)) {
>>> @@ -1051,9 +1078,22 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>>   		return 0;
>>>   	}
>>>   
>>> -	clone_flow_key = !arg->exec;
>>> -	return clone_execute(dp, skb, key, 0, actions, rem, last,
>>> -			     clone_flow_key);
>>> +	if (next->nla_type == OVS_SAMPLE_ATTR_PSAMPLE) {
>>
>> Maybe add a commnet that OVS_SAMPLE_ATTR_PSAMPLE is always a sencond
>> argument when present.
>>
>> Is there a better way to handle this?
>>
> 
> I also dislike it. The fact that actions are not nested but concatenated to 
> makes the internal representation a bit flaky.
> 
> The alternative I considered was adding the group_id and cookie to the 
> internal-only OVS_SAMPLE_ATTR_ARG. However, now I wonder:
> - Should we also use nested attributes instead of structs for this internal one?
> 
> And, probably off-topic: what's the story behind using netlink attributes to 
> store action arguments internally? Has it ever been discussed using a union for 
> instance?

I'm not sure why it was originally done this way, but it is probably
the most efficient of convenient ways to pack a tree-like structure.
If we had a union, we would likely need a tree of actions with each
action bing a separately allocated node, since we have clone() and
even check_pkt_len() or other actions that can fork the pipeline.
Or we'll need to use some dummy actions as parethesis, prectically
emulating what we already have with netlink, but with structures.

Netlink format is fast to scan, since it's in a single liner chunk
of memory.  Tree or list structure may have higher memory footprint
and be slower to iterate due to cache misses.

There might be a better way to store all this for sure, but will
require some careful performance and memory consumption testing.

> 
>>> +		ret = ovs_psample_packet(dp, key, nla_data(next), skb,
>>> +					 arg->probability);
>>> +		if (last)
>>> +			ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
>>> +		if (ret)
>>> +			return ret;
>>> +		next = nla_next(next, &rem);
>>> +	}
>>> +
>>> +	if (nla_ok(next, rem)) {
>>> +		clone_flow_key = !arg->exec;
>>> +		ret = clone_execute(dp, skb, key, 0, next, rem, last,
>>> +				    clone_flow_key);
>>> +	}
>>> +	return ret;
>>>   }
>>>   
>>>   /* When 'last' is true, clone() should always consume the 'skb'.
>>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>>> index 99d72543abd3..b5b560c2e74b 100644
>>> --- a/net/openvswitch/datapath.c
>>> +++ b/net/openvswitch/datapath.c
>>> @@ -976,7 +976,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>>   	struct sw_flow_match match;
>>>   	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
>>>   	int error;
>>> -	bool log = !a[OVS_FLOW_ATTR_PROBE];
>>> +	bool log = true;
>>
>> Debugging artifact?
>>
> 
> Yep, sorry.
> 
>>>   
>>>   	/* Must have key and actions. */
>>>   	error = -EINVAL;
>>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>>> index f224d9bcea5e..f540686271b7 100644
>>> --- a/net/openvswitch/flow_netlink.c
>>> +++ b/net/openvswitch/flow_netlink.c
>>> @@ -2381,8 +2381,12 @@ static void ovs_nla_free_sample_action(const struct nlattr *action)
>>>   
>>>   	switch (nla_type(a)) {
>>>   	case OVS_SAMPLE_ATTR_ARG:
>>> -		/* The real list of actions follows this attribute. */
>>
>> Please, don't remove this comment.  Maybe extend it instead.
>>
>>>   		a = nla_next(a, &rem);
>>> +
>>> +		/* OVS_SAMPLE_ATTR_PSAMPLE may be present. */
>>> +		if (nla_type(a) == OVS_SAMPLE_ATTR_PSAMPLE)
>>> +			a = nla_next(a, &rem);
>>> +
>>>   		ovs_nla_free_nested_actions(a, rem);
>>>   		break;
>>>   	}
>>> @@ -2561,6 +2565,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>>>   				  u32 mpls_label_count, bool log,
>>>   				  u32 depth);
>>>   
>>> +static int copy_action(const struct nlattr *from,
>>> +		       struct sw_flow_actions **sfa, bool log);
>>> +
>>>   static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   				    const struct sw_flow_key *key,
>>>   				    struct sw_flow_actions **sfa,
>>> @@ -2569,10 +2576,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   				    u32 depth)
>>>   {
>>>   	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
>>> -	const struct nlattr *probability, *actions;
>>> +	const struct nlattr *probability, *actions, *psample;
>>>   	const struct nlattr *a;
>>> -	int rem, start, err;
>>>   	struct sample_arg arg;
>>> +	int rem, start, err;
>>>   
>>>   	memset(attrs, 0, sizeof(attrs));
>>>   	nla_for_each_nested(a, attr, rem) {
>>> @@ -2589,7 +2596,23 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   		return -EINVAL;
>>>   
>>>   	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
>>> -	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
>>> +	if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
>>> +		return -EINVAL;
>>> +
>>> +	psample = attrs[OVS_SAMPLE_ATTR_PSAMPLE];
>>> +	if (psample) {
>>> +		struct ovs_psample *ovs_ps;
>>> +
>>> +		if (!nla_len(psample) || nla_len(psample) < sizeof(*ovs_ps))
>>> +			return -EINVAL;
>>> +
>>> +		ovs_ps = nla_data(psample);
>>> +		if (ovs_ps->user_cookie_len > OVS_PSAMPLE_COOKIE_MAX_SIZE ||
>>> +		    nla_len(psample) != sizeof(*ovs_ps) + ovs_ps->user_cookie_len)
>>> +			return -EINVAL;
>>> +	}
>>> +
>>> +	if (!psample && !actions)
>>>   		return -EINVAL;
>>>   
>>>   	/* validation done, copy sample action. */
>>> @@ -2608,7 +2631,9 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   	 * If the sample is the last action, it can always be excuted
>>>   	 * rather than deferred.
>>>   	 */
>>> -	arg.exec = last || !actions_may_change_flow(actions);
>>> +	if (actions)
>>> +		arg.exec = last || !actions_may_change_flow(actions);
>>
>> 'arg.exec' will remain uninitialized.
>>
> 
> Yes. I just wanted to avoid the call to actions_may_change_flow(NULL) in which 
> case arg.exec is not used. I'll make sure to zero-initialize it nevertheless.
> 
>>> +
>>>   	arg.probability = nla_get_u32(probability);
>>>   
>>>   	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
>>> @@ -2616,10 +2641,17 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>   	if (err)
>>>   		return err;
>>>   
>>> -	err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>> -				     eth_type, vlan_tci, mpls_label_count, log,
>>> -				     depth + 1);
>>> +	if (psample)
>>> +		err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PSAMPLE,
>>> +					 nla_data(psample), nla_len(psample),
>>> +					 log);
>>> +	if (err)
>>
>> Can be used uninitialized.
>>
> 
> You're right, it should be inside the if above, although err should have been 
> initialized to output of ovs_nla_add_action.

Ah, I missed the previous assingnent.  But yes, it is still a little strange
to handle errors this way.

> 
>>> +		return err;
>>>   
>>> +	if (actions)
>>> +		err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>> +					     eth_type, vlan_tci,
>>> +					     mpls_label_count, log, depth + 1);
>>>   	if (err)
>>>   		return err;
>>>   
>>> @@ -3538,7 +3570,7 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>>   	struct nlattr *start, *ac_start = NULL, *sample_arg;
>>>   	int err = 0, rem = nla_len(attr);
>>>   	const struct sample_arg *arg;
>>> -	struct nlattr *actions;
>>> +	struct nlattr *next;
>>>   
>>>   	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE);
>>>   	if (!start)
>>> @@ -3546,27 +3578,39 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>>   
>>>   	sample_arg = nla_data(attr);
>>>   	arg = nla_data(sample_arg);
>>> -	actions = nla_next(sample_arg, &rem);
>>> +	next = nla_next(sample_arg, &rem);
>>>   
>>>   	if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
>>>   		err = -EMSGSIZE;
>>>   		goto out;
>>>   	}
>>>   
>>> -	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>> -	if (!ac_start) {
>>> -		err = -EMSGSIZE;
>>> -		goto out;
>>> +	if (nla_type(next) == OVS_SAMPLE_ATTR_PSAMPLE) {
>>> +		if (nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE, nla_len(next),
>>> +			    nla_data(next))) {
>>> +			err = -EMSGSIZE;
>>> +			goto out;
>>> +		}
>>> +		next = nla_next(next, &rem);
>>>   	}
>>>   
>>> -	err = ovs_nla_put_actions(actions, rem, skb);
>>> +	if (nla_ok(next, rem)) {
>>> +		ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>> +		if (!ac_start) {
>>> +			err = -EMSGSIZE;
>>> +			goto out;
>>> +		}
>>> +		err = ovs_nla_put_actions(next, rem, skb);
>>> +	}
>>>   
>>>   out:
>>>   	if (err) {
>>> -		nla_nest_cancel(skb, ac_start);
>>> +		if (ac_start)
>>> +			nla_nest_cancel(skb, ac_start);
>>>   		nla_nest_cancel(skb, start);
>>>   	} else {
>>> -		nla_nest_end(skb, ac_start);
>>> +		if (ac_start)
>>> +			nla_nest_end(skb, ac_start);
>>>   		nla_nest_end(skb, start);
>>>   	}
>>>   
>>
>
Adrian Moreno April 9, 2024, 8:16 a.m. UTC | #4
On 4/8/24 22:40, Ilya Maximets wrote:
> On 4/8/24 21:48, Adrian Moreno wrote:
>>
>>
>> On 4/8/24 15:37, Ilya Maximets wrote:
>>> On 4/8/24 14:57, Adrian Moreno wrote:
>>>> Add a new attribute to the sample action, called
>>>> OVS_SAMPLE_ATTR_PSAMPLE to allow userspace to pass a group_id and a
>>>> user-defined cookie.
>>>>
>>>> The maximum length of the user-defined cookie is set to 16, same as
>>>> tc_cookie to discourage using cookies that will not be offloadable.
>>>>
>>>> When set, the sample action will use psample to multicast the sample.
>>>>
>>>> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
>>>> ---
>>>>    include/uapi/linux/openvswitch.h | 22 +++++++--
>>>>    net/openvswitch/actions.c        | 52 ++++++++++++++++++---
>>>>    net/openvswitch/datapath.c       |  2 +-
>>>>    net/openvswitch/flow_netlink.c   | 78 +++++++++++++++++++++++++-------
>>>>    4 files changed, 127 insertions(+), 27 deletions(-)
>>>
>>> This cpatch is missing a few bits:
>>>    - Update for Documentation/netlink/specs/ovs_flow.yaml
>>>    - Maybe update for tools/testing/selftests/net/openvswitch/ovs-dpctl.py
>>>    - Maybe some basic selftests.
>>>
>>
>> Absolutely. I surely plan to add it on the first non-rfc version.
>>
>>>>
>>>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
>>>> index efc82c318fa2..a5a32588f582 100644
>>>> --- a/include/uapi/linux/openvswitch.h
>>>> +++ b/include/uapi/linux/openvswitch.h
>>>> @@ -646,15 +646,24 @@ enum ovs_flow_attr {
>>>>     * %UINT32_MAX samples all packets and intermediate values sample intermediate
>>>>     * fractions of packets.
>>>>     * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
>>>> - * Actions are passed as nested attributes.
>>>> + * Actions are passed as nested attributes. Optional if OVS_SAMPLE_ATTR_PSAMPLE
>>>> + * is not set.
>>>
>>> 'is set' probably. >
>>>> + * @OVS_SAMPLE_ATTR_PSAMPLE: Arguments to be passed to psample. Optional if
>>>> + * OVS_SAMPLE_ATTR_ACTIONS is not set.
>>>
>>> Same here.
>>>
>>
>> Good catch, I rewrote those comments a bunch of times and the were left
>> expressing the exact opposite!
>>
>>
>>>>     *
>>>> - * Executes the specified actions with the given probability on a per-packet
>>>> - * basis.
>>>> + * Either OVS_SAMPLE_ATTR_USER_COOKIE or OVS_SAMPLE_ATTR_USER_COOKIE must be
>>>> + * specified.
>>>> + *
>>>> + * Executes the specified actions and/or sends the packet to psample
>>>> + * with the given probability on a per-packet basis.
>>>>     */
>>>>    enum ovs_sample_attr {
>>>>    	OVS_SAMPLE_ATTR_UNSPEC,
>>>>    	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>>>>    	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
>>>> +	OVS_SAMPLE_ATTR_PSAMPLE,     /* struct ovs_psample followed
>>>> +				      * by the user-provided cookie.
>>>> +				      */
>>>>    	__OVS_SAMPLE_ATTR_MAX,
>>>>    
>>>>    #ifdef __KERNEL__
>>>> @@ -675,6 +684,13 @@ struct sample_arg {
>>>>    };
>>>>    #endif
>>>>    
>>>> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
>>>> +struct ovs_psample {
>>>> +	__u32 group_id;		/* The group used for packet sampling. */
>>>> +	__u32 user_cookie_len;	/* The length of the user-provided cookie. */
> 
> Here as well, not sure if u32 makes sense as userspace can't
> actually supply a cookie this large via netlink.
> 
>>>> +	__u8 user_cookie[];	/* The user-provided cookie. */
>>>> +};
>>>
>>> Structures are not a good approach for modern netlink.
>>> use nested attributes instead.  This way we can also
>>> eliminate the need for variable-length array and the
>>> length field, if the length can be taken from a netlink
>>> attribute directly, e.g. similar to NLA_BINARY in tc.
>>>
>>> If necessary, there could be a structure in the private
>>> header to store the data for internal use.
>>>
>>
>> Interesting. Thanks for the suggestion. I think it will also help action validation.
>>
>>>> +
>>>>    /**
>>>>     * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
>>>>     * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
>>>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>>>> index 6fcd7e2ca81f..45d2b325b76a 100644
>>>> --- a/net/openvswitch/actions.c
>>>> +++ b/net/openvswitch/actions.c
>>>> @@ -24,6 +24,7 @@
>>>>    #include <net/checksum.h>
>>>>    #include <net/dsfield.h>
>>>>    #include <net/mpls.h>
>>>> +#include <net/psample.h>
>>>>    #include <net/sctp/checksum.h>
>>>>    
>>>>    #include "datapath.h"
>>>> @@ -1025,6 +1026,31 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>>>>    	return 0;
>>>>    }
>>>>    
>>>> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
>>>> +			      struct ovs_psample *psample, struct sk_buff *skb,
>>>> +			      u32 rate)
>>>> +{
>>>> +	struct psample_group psample_group = {};
>>>> +	struct psample_metadata md = {};
>>>> +	struct vport *input_vport;
>>>> +
>>>> +	psample_group.group_num = psample->group_id;
>>>> +	psample_group.net = ovs_dp_get_net(dp);
>>>> +
>>>> +	input_vport = ovs_vport_rcu(dp, key->phy.in_port);
>>>> +	if (!input_vport)
>>>> +		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
>>>> +
>>>> +	md.in_ifindex = input_vport->dev->ifindex;
>>>> +	md.user_cookie = psample->user_cookie;
>>>> +	md.user_cookie_len = psample->user_cookie_len;
>>>> +	md.trunc_size = skb->len;
>>>> +
>>>> +	psample_sample_packet(&psample_group, skb, rate, &md);
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>>    /* When 'last' is true, sample() should always consume the 'skb'.
>>>>     * Otherwise, sample() should keep 'skb' intact regardless what
>>>>     * actions are executed within sample().
>>>> @@ -1033,16 +1059,17 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>>>    		  struct sw_flow_key *key, const struct nlattr *attr,
>>>>    		  bool last)
>>>>    {
>>>> -	struct nlattr *actions;
>>>> +	const struct sample_arg *arg;
>>>>    	struct nlattr *sample_arg;
>>>>    	int rem = nla_len(attr);
>>>> -	const struct sample_arg *arg;
>>>> +	struct nlattr *next;
>>>>    	bool clone_flow_key;
>>>> +	int ret;
>>>>    
>>>>    	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>>>>    	sample_arg = nla_data(attr);
>>>>    	arg = nla_data(sample_arg);
>>>> -	actions = nla_next(sample_arg, &rem);
>>>> +	next = nla_next(sample_arg, &rem);
>>>>    
>>>>    	if ((arg->probability != U32_MAX) &&
>>>>    	    (!arg->probability || get_random_u32() > arg->probability)) {
>>>> @@ -1051,9 +1078,22 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>>>    		return 0;
>>>>    	}
>>>>    
>>>> -	clone_flow_key = !arg->exec;
>>>> -	return clone_execute(dp, skb, key, 0, actions, rem, last,
>>>> -			     clone_flow_key);
>>>> +	if (next->nla_type == OVS_SAMPLE_ATTR_PSAMPLE) {
>>>
>>> Maybe add a commnet that OVS_SAMPLE_ATTR_PSAMPLE is always a sencond
>>> argument when present.
>>>
>>> Is there a better way to handle this?
>>>
>>
>> I also dislike it. The fact that actions are not nested but concatenated to
>> makes the internal representation a bit flaky.
>>
>> The alternative I considered was adding the group_id and cookie to the
>> internal-only OVS_SAMPLE_ATTR_ARG. However, now I wonder:
>> - Should we also use nested attributes instead of structs for this internal one?
>>
>> And, probably off-topic: what's the story behind using netlink attributes to
>> store action arguments internally? Has it ever been discussed using a union for
>> instance?
> 
> I'm not sure why it was originally done this way, but it is probably
> the most efficient of convenient ways to pack a tree-like structure.
> If we had a union, we would likely need a tree of actions with each
> action bing a separately allocated node, since we have clone() and
> even check_pkt_len() or other actions that can fork the pipeline.
> Or we'll need to use some dummy actions as parethesis, prectically
> emulating what we already have with netlink, but with structures.
> 
> Netlink format is fast to scan, since it's in a single liner chunk
> of memory.  Tree or list structure may have higher memory footprint
> and be slower to iterate due to cache misses.
> 

That's true. Iteration over a number of actions is easy in netlink. And nesting 
easily enables tree-like actions. But in the context of:

 > Structures are not a good approach for modern netlink. use nested attributes 
instead.

If we try to implement all our actions following this way, and we keep just 
copying the incoming actions into the internal representation, we incur in 
unnecessary memory overhead (e.g: storing 2x struct nlaattr + padding of extra 
memory to store 2 integers).

I don't want to derail the discussion into historical or futuristic changes, 
just saying that the approach taken in the SAMPLE action (not including this 
patch) of exposing arguments as attributes but having a kernel-only struct to 
store them seems to me a good compromise.


> There might be a better way to store all this for sure, but will
> require some careful performance and memory consumption testing.
> 
>>
>>>> +		ret = ovs_psample_packet(dp, key, nla_data(next), skb,
>>>> +					 arg->probability);
>>>> +		if (last)
>>>> +			ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
>>>> +		if (ret)
>>>> +			return ret;
>>>> +		next = nla_next(next, &rem);
>>>> +	}
>>>> +
>>>> +	if (nla_ok(next, rem)) {
>>>> +		clone_flow_key = !arg->exec;
>>>> +		ret = clone_execute(dp, skb, key, 0, next, rem, last,
>>>> +				    clone_flow_key);
>>>> +	}
>>>> +	return ret;
>>>>    }
>>>>    
>>>>    /* When 'last' is true, clone() should always consume the 'skb'.
>>>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>>>> index 99d72543abd3..b5b560c2e74b 100644
>>>> --- a/net/openvswitch/datapath.c
>>>> +++ b/net/openvswitch/datapath.c
>>>> @@ -976,7 +976,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>>>    	struct sw_flow_match match;
>>>>    	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
>>>>    	int error;
>>>> -	bool log = !a[OVS_FLOW_ATTR_PROBE];
>>>> +	bool log = true;
>>>
>>> Debugging artifact?
>>>
>>
>> Yep, sorry.
>>
>>>>    
>>>>    	/* Must have key and actions. */
>>>>    	error = -EINVAL;
>>>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>>>> index f224d9bcea5e..f540686271b7 100644
>>>> --- a/net/openvswitch/flow_netlink.c
>>>> +++ b/net/openvswitch/flow_netlink.c
>>>> @@ -2381,8 +2381,12 @@ static void ovs_nla_free_sample_action(const struct nlattr *action)
>>>>    
>>>>    	switch (nla_type(a)) {
>>>>    	case OVS_SAMPLE_ATTR_ARG:
>>>> -		/* The real list of actions follows this attribute. */
>>>
>>> Please, don't remove this comment.  Maybe extend it instead.
>>>
>>>>    		a = nla_next(a, &rem);
>>>> +
>>>> +		/* OVS_SAMPLE_ATTR_PSAMPLE may be present. */
>>>> +		if (nla_type(a) == OVS_SAMPLE_ATTR_PSAMPLE)
>>>> +			a = nla_next(a, &rem);
>>>> +
>>>>    		ovs_nla_free_nested_actions(a, rem);
>>>>    		break;
>>>>    	}
>>>> @@ -2561,6 +2565,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>>>>    				  u32 mpls_label_count, bool log,
>>>>    				  u32 depth);
>>>>    
>>>> +static int copy_action(const struct nlattr *from,
>>>> +		       struct sw_flow_actions **sfa, bool log);
>>>> +
>>>>    static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>>    				    const struct sw_flow_key *key,
>>>>    				    struct sw_flow_actions **sfa,
>>>> @@ -2569,10 +2576,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>>    				    u32 depth)
>>>>    {
>>>>    	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
>>>> -	const struct nlattr *probability, *actions;
>>>> +	const struct nlattr *probability, *actions, *psample;
>>>>    	const struct nlattr *a;
>>>> -	int rem, start, err;
>>>>    	struct sample_arg arg;
>>>> +	int rem, start, err;
>>>>    
>>>>    	memset(attrs, 0, sizeof(attrs));
>>>>    	nla_for_each_nested(a, attr, rem) {
>>>> @@ -2589,7 +2596,23 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>>    		return -EINVAL;
>>>>    
>>>>    	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
>>>> -	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
>>>> +	if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
>>>> +		return -EINVAL;
>>>> +
>>>> +	psample = attrs[OVS_SAMPLE_ATTR_PSAMPLE];
>>>> +	if (psample) {
>>>> +		struct ovs_psample *ovs_ps;
>>>> +
>>>> +		if (!nla_len(psample) || nla_len(psample) < sizeof(*ovs_ps))
>>>> +			return -EINVAL;
>>>> +
>>>> +		ovs_ps = nla_data(psample);
>>>> +		if (ovs_ps->user_cookie_len > OVS_PSAMPLE_COOKIE_MAX_SIZE ||
>>>> +		    nla_len(psample) != sizeof(*ovs_ps) + ovs_ps->user_cookie_len)
>>>> +			return -EINVAL;
>>>> +	}
>>>> +
>>>> +	if (!psample && !actions)
>>>>    		return -EINVAL;
>>>>    
>>>>    	/* validation done, copy sample action. */
>>>> @@ -2608,7 +2631,9 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>>    	 * If the sample is the last action, it can always be excuted
>>>>    	 * rather than deferred.
>>>>    	 */
>>>> -	arg.exec = last || !actions_may_change_flow(actions);
>>>> +	if (actions)
>>>> +		arg.exec = last || !actions_may_change_flow(actions);
>>>
>>> 'arg.exec' will remain uninitialized.
>>>
>>
>> Yes. I just wanted to avoid the call to actions_may_change_flow(NULL) in which
>> case arg.exec is not used. I'll make sure to zero-initialize it nevertheless.
>>
>>>> +
>>>>    	arg.probability = nla_get_u32(probability);
>>>>    
>>>>    	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
>>>> @@ -2616,10 +2641,17 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>>    	if (err)
>>>>    		return err;
>>>>    
>>>> -	err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>>> -				     eth_type, vlan_tci, mpls_label_count, log,
>>>> -				     depth + 1);
>>>> +	if (psample)
>>>> +		err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PSAMPLE,
>>>> +					 nla_data(psample), nla_len(psample),
>>>> +					 log);
>>>> +	if (err)
>>>
>>> Can be used uninitialized.
>>>
>>
>> You're right, it should be inside the if above, although err should have been
>> initialized to output of ovs_nla_add_action.
> 
> Ah, I missed the previous assingnent.  But yes, it is still a little strange
> to handle errors this way.
> 
>>
>>>> +		return err;
>>>>    
>>>> +	if (actions)
>>>> +		err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>>> +					     eth_type, vlan_tci,
>>>> +					     mpls_label_count, log, depth + 1);
>>>>    	if (err)
>>>>    		return err;
>>>>    
>>>> @@ -3538,7 +3570,7 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>>>    	struct nlattr *start, *ac_start = NULL, *sample_arg;
>>>>    	int err = 0, rem = nla_len(attr);
>>>>    	const struct sample_arg *arg;
>>>> -	struct nlattr *actions;
>>>> +	struct nlattr *next;
>>>>    
>>>>    	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE);
>>>>    	if (!start)
>>>> @@ -3546,27 +3578,39 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>>>    
>>>>    	sample_arg = nla_data(attr);
>>>>    	arg = nla_data(sample_arg);
>>>> -	actions = nla_next(sample_arg, &rem);
>>>> +	next = nla_next(sample_arg, &rem);
>>>>    
>>>>    	if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
>>>>    		err = -EMSGSIZE;
>>>>    		goto out;
>>>>    	}
>>>>    
>>>> -	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>>> -	if (!ac_start) {
>>>> -		err = -EMSGSIZE;
>>>> -		goto out;
>>>> +	if (nla_type(next) == OVS_SAMPLE_ATTR_PSAMPLE) {
>>>> +		if (nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE, nla_len(next),
>>>> +			    nla_data(next))) {
>>>> +			err = -EMSGSIZE;
>>>> +			goto out;
>>>> +		}
>>>> +		next = nla_next(next, &rem);
>>>>    	}
>>>>    
>>>> -	err = ovs_nla_put_actions(actions, rem, skb);
>>>> +	if (nla_ok(next, rem)) {
>>>> +		ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>>> +		if (!ac_start) {
>>>> +			err = -EMSGSIZE;
>>>> +			goto out;
>>>> +		}
>>>> +		err = ovs_nla_put_actions(next, rem, skb);
>>>> +	}
>>>>    
>>>>    out:
>>>>    	if (err) {
>>>> -		nla_nest_cancel(skb, ac_start);
>>>> +		if (ac_start)
>>>> +			nla_nest_cancel(skb, ac_start);
>>>>    		nla_nest_cancel(skb, start);
>>>>    	} else {
>>>> -		nla_nest_end(skb, ac_start);
>>>> +		if (ac_start)
>>>> +			nla_nest_end(skb, ac_start);
>>>>    		nla_nest_end(skb, start);
>>>>    	}
>>>>    
>>>
>>
>
Ilya Maximets April 9, 2024, 9:35 a.m. UTC | #5
On 4/9/24 10:16, Adrian Moreno wrote:
> 
> 
> On 4/8/24 22:40, Ilya Maximets wrote:
>> On 4/8/24 21:48, Adrian Moreno wrote:
>>>
>>>
>>> On 4/8/24 15:37, Ilya Maximets wrote:
>>>> On 4/8/24 14:57, Adrian Moreno wrote:
>>>>> Add a new attribute to the sample action, called
>>>>> OVS_SAMPLE_ATTR_PSAMPLE to allow userspace to pass a group_id and a
>>>>> user-defined cookie.
>>>>>
>>>>> The maximum length of the user-defined cookie is set to 16, same as
>>>>> tc_cookie to discourage using cookies that will not be offloadable.
>>>>>
>>>>> When set, the sample action will use psample to multicast the sample.
>>>>>
>>>>> Signed-off-by: Adrian Moreno <amorenoz@redhat.com>
>>>>> ---
>>>>>    include/uapi/linux/openvswitch.h | 22 +++++++--
>>>>>    net/openvswitch/actions.c        | 52 ++++++++++++++++++---
>>>>>    net/openvswitch/datapath.c       |  2 +-
>>>>>    net/openvswitch/flow_netlink.c   | 78 +++++++++++++++++++++++++-------
>>>>>    4 files changed, 127 insertions(+), 27 deletions(-)
>>>>
>>>> This cpatch is missing a few bits:
>>>>    - Update for Documentation/netlink/specs/ovs_flow.yaml
>>>>    - Maybe update for tools/testing/selftests/net/openvswitch/ovs-dpctl.py
>>>>    - Maybe some basic selftests.
>>>>
>>>
>>> Absolutely. I surely plan to add it on the first non-rfc version.
>>>
>>>>>
>>>>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
>>>>> index efc82c318fa2..a5a32588f582 100644
>>>>> --- a/include/uapi/linux/openvswitch.h
>>>>> +++ b/include/uapi/linux/openvswitch.h
>>>>> @@ -646,15 +646,24 @@ enum ovs_flow_attr {
>>>>>     * %UINT32_MAX samples all packets and intermediate values sample intermediate
>>>>>     * fractions of packets.
>>>>>     * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
>>>>> - * Actions are passed as nested attributes.
>>>>> + * Actions are passed as nested attributes. Optional if OVS_SAMPLE_ATTR_PSAMPLE
>>>>> + * is not set.
>>>>
>>>> 'is set' probably. >
>>>>> + * @OVS_SAMPLE_ATTR_PSAMPLE: Arguments to be passed to psample. Optional if
>>>>> + * OVS_SAMPLE_ATTR_ACTIONS is not set.
>>>>
>>>> Same here.
>>>>
>>>
>>> Good catch, I rewrote those comments a bunch of times and the were left
>>> expressing the exact opposite!
>>>
>>>
>>>>>     *
>>>>> - * Executes the specified actions with the given probability on a per-packet
>>>>> - * basis.
>>>>> + * Either OVS_SAMPLE_ATTR_USER_COOKIE or OVS_SAMPLE_ATTR_USER_COOKIE must be
>>>>> + * specified.
>>>>> + *
>>>>> + * Executes the specified actions and/or sends the packet to psample
>>>>> + * with the given probability on a per-packet basis.
>>>>>     */
>>>>>    enum ovs_sample_attr {
>>>>>    	OVS_SAMPLE_ATTR_UNSPEC,
>>>>>    	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
>>>>>    	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
>>>>> +	OVS_SAMPLE_ATTR_PSAMPLE,     /* struct ovs_psample followed
>>>>> +				      * by the user-provided cookie.
>>>>> +				      */
>>>>>    	__OVS_SAMPLE_ATTR_MAX,
>>>>>    
>>>>>    #ifdef __KERNEL__
>>>>> @@ -675,6 +684,13 @@ struct sample_arg {
>>>>>    };
>>>>>    #endif
>>>>>    
>>>>> +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
>>>>> +struct ovs_psample {
>>>>> +	__u32 group_id;		/* The group used for packet sampling. */
>>>>> +	__u32 user_cookie_len;	/* The length of the user-provided cookie. */
>>
>> Here as well, not sure if u32 makes sense as userspace can't
>> actually supply a cookie this large via netlink.
>>
>>>>> +	__u8 user_cookie[];	/* The user-provided cookie. */
>>>>> +};
>>>>
>>>> Structures are not a good approach for modern netlink.
>>>> use nested attributes instead.  This way we can also
>>>> eliminate the need for variable-length array and the
>>>> length field, if the length can be taken from a netlink
>>>> attribute directly, e.g. similar to NLA_BINARY in tc.
>>>>
>>>> If necessary, there could be a structure in the private
>>>> header to store the data for internal use.
>>>>
>>>
>>> Interesting. Thanks for the suggestion. I think it will also help action validation.
>>>
>>>>> +
>>>>>    /**
>>>>>     * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
>>>>>     * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
>>>>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>>>>> index 6fcd7e2ca81f..45d2b325b76a 100644
>>>>> --- a/net/openvswitch/actions.c
>>>>> +++ b/net/openvswitch/actions.c
>>>>> @@ -24,6 +24,7 @@
>>>>>    #include <net/checksum.h>
>>>>>    #include <net/dsfield.h>
>>>>>    #include <net/mpls.h>
>>>>> +#include <net/psample.h>
>>>>>    #include <net/sctp/checksum.h>
>>>>>    
>>>>>    #include "datapath.h"
>>>>> @@ -1025,6 +1026,31 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
>>>>>    	return 0;
>>>>>    }
>>>>>    
>>>>> +static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
>>>>> +			      struct ovs_psample *psample, struct sk_buff *skb,
>>>>> +			      u32 rate)
>>>>> +{
>>>>> +	struct psample_group psample_group = {};
>>>>> +	struct psample_metadata md = {};
>>>>> +	struct vport *input_vport;
>>>>> +
>>>>> +	psample_group.group_num = psample->group_id;
>>>>> +	psample_group.net = ovs_dp_get_net(dp);
>>>>> +
>>>>> +	input_vport = ovs_vport_rcu(dp, key->phy.in_port);
>>>>> +	if (!input_vport)
>>>>> +		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
>>>>> +
>>>>> +	md.in_ifindex = input_vport->dev->ifindex;
>>>>> +	md.user_cookie = psample->user_cookie;
>>>>> +	md.user_cookie_len = psample->user_cookie_len;
>>>>> +	md.trunc_size = skb->len;
>>>>> +
>>>>> +	psample_sample_packet(&psample_group, skb, rate, &md);
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>>    /* When 'last' is true, sample() should always consume the 'skb'.
>>>>>     * Otherwise, sample() should keep 'skb' intact regardless what
>>>>>     * actions are executed within sample().
>>>>> @@ -1033,16 +1059,17 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>>>>    		  struct sw_flow_key *key, const struct nlattr *attr,
>>>>>    		  bool last)
>>>>>    {
>>>>> -	struct nlattr *actions;
>>>>> +	const struct sample_arg *arg;
>>>>>    	struct nlattr *sample_arg;
>>>>>    	int rem = nla_len(attr);
>>>>> -	const struct sample_arg *arg;
>>>>> +	struct nlattr *next;
>>>>>    	bool clone_flow_key;
>>>>> +	int ret;
>>>>>    
>>>>>    	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>>>>>    	sample_arg = nla_data(attr);
>>>>>    	arg = nla_data(sample_arg);
>>>>> -	actions = nla_next(sample_arg, &rem);
>>>>> +	next = nla_next(sample_arg, &rem);
>>>>>    
>>>>>    	if ((arg->probability != U32_MAX) &&
>>>>>    	    (!arg->probability || get_random_u32() > arg->probability)) {
>>>>> @@ -1051,9 +1078,22 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
>>>>>    		return 0;
>>>>>    	}
>>>>>    
>>>>> -	clone_flow_key = !arg->exec;
>>>>> -	return clone_execute(dp, skb, key, 0, actions, rem, last,
>>>>> -			     clone_flow_key);
>>>>> +	if (next->nla_type == OVS_SAMPLE_ATTR_PSAMPLE) {
>>>>
>>>> Maybe add a commnet that OVS_SAMPLE_ATTR_PSAMPLE is always a sencond
>>>> argument when present.
>>>>
>>>> Is there a better way to handle this?
>>>>
>>>
>>> I also dislike it. The fact that actions are not nested but concatenated to
>>> makes the internal representation a bit flaky.
>>>
>>> The alternative I considered was adding the group_id and cookie to the
>>> internal-only OVS_SAMPLE_ATTR_ARG. However, now I wonder:
>>> - Should we also use nested attributes instead of structs for this internal one?
>>>
>>> And, probably off-topic: what's the story behind using netlink attributes to
>>> store action arguments internally? Has it ever been discussed using a union for
>>> instance?
>>
>> I'm not sure why it was originally done this way, but it is probably
>> the most efficient of convenient ways to pack a tree-like structure.
>> If we had a union, we would likely need a tree of actions with each
>> action bing a separately allocated node, since we have clone() and
>> even check_pkt_len() or other actions that can fork the pipeline.
>> Or we'll need to use some dummy actions as parethesis, prectically
>> emulating what we already have with netlink, but with structures.
>>
>> Netlink format is fast to scan, since it's in a single liner chunk
>> of memory.  Tree or list structure may have higher memory footprint
>> and be slower to iterate due to cache misses.
>>
> 
> That's true. Iteration over a number of actions is easy in netlink. And nesting 
> easily enables tree-like actions. But in the context of:
> 
>  > Structures are not a good approach for modern netlink. use nested attributes 
> instead.
> 
> If we try to implement all our actions following this way, and we keep just 
> copying the incoming actions into the internal representation, we incur in 
> unnecessary memory overhead (e.g: storing 2x struct nlaattr + padding of extra 
> memory to store 2 integers).
> 
> I don't want to derail the discussion into historical or futuristic changes, 
> just saying that the approach taken in the SAMPLE action (not including this 
> patch) of exposing arguments as attributes but having a kernel-only struct to 
> store them seems to me a good compromise.

Sure.  As I said, it's fine to have internal structures.  My comment
was mainly about uAPI part.  We should avoid structures in uAPI if
possible, as they are very hard to maintain and keep compatible with
older userspace in case some changes will be needed in the future.

> 
> 
>> There might be a better way to store all this for sure, but will
>> require some careful performance and memory consumption testing.
>>
>>>
>>>>> +		ret = ovs_psample_packet(dp, key, nla_data(next), skb,
>>>>> +					 arg->probability);
>>>>> +		if (last)
>>>>> +			ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
>>>>> +		if (ret)
>>>>> +			return ret;
>>>>> +		next = nla_next(next, &rem);
>>>>> +	}
>>>>> +
>>>>> +	if (nla_ok(next, rem)) {
>>>>> +		clone_flow_key = !arg->exec;
>>>>> +		ret = clone_execute(dp, skb, key, 0, next, rem, last,
>>>>> +				    clone_flow_key);
>>>>> +	}
>>>>> +	return ret;
>>>>>    }
>>>>>    
>>>>>    /* When 'last' is true, clone() should always consume the 'skb'.
>>>>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>>>>> index 99d72543abd3..b5b560c2e74b 100644
>>>>> --- a/net/openvswitch/datapath.c
>>>>> +++ b/net/openvswitch/datapath.c
>>>>> @@ -976,7 +976,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>>>>    	struct sw_flow_match match;
>>>>>    	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
>>>>>    	int error;
>>>>> -	bool log = !a[OVS_FLOW_ATTR_PROBE];
>>>>> +	bool log = true;
>>>>
>>>> Debugging artifact?
>>>>
>>>
>>> Yep, sorry.
>>>
>>>>>    
>>>>>    	/* Must have key and actions. */
>>>>>    	error = -EINVAL;
>>>>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>>>>> index f224d9bcea5e..f540686271b7 100644
>>>>> --- a/net/openvswitch/flow_netlink.c
>>>>> +++ b/net/openvswitch/flow_netlink.c
>>>>> @@ -2381,8 +2381,12 @@ static void ovs_nla_free_sample_action(const struct nlattr *action)
>>>>>    
>>>>>    	switch (nla_type(a)) {
>>>>>    	case OVS_SAMPLE_ATTR_ARG:
>>>>> -		/* The real list of actions follows this attribute. */
>>>>
>>>> Please, don't remove this comment.  Maybe extend it instead.
>>>>
>>>>>    		a = nla_next(a, &rem);
>>>>> +
>>>>> +		/* OVS_SAMPLE_ATTR_PSAMPLE may be present. */
>>>>> +		if (nla_type(a) == OVS_SAMPLE_ATTR_PSAMPLE)
>>>>> +			a = nla_next(a, &rem);
>>>>> +
>>>>>    		ovs_nla_free_nested_actions(a, rem);
>>>>>    		break;
>>>>>    	}
>>>>> @@ -2561,6 +2565,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>>>>>    				  u32 mpls_label_count, bool log,
>>>>>    				  u32 depth);
>>>>>    
>>>>> +static int copy_action(const struct nlattr *from,
>>>>> +		       struct sw_flow_actions **sfa, bool log);
>>>>> +
>>>>>    static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>>>    				    const struct sw_flow_key *key,
>>>>>    				    struct sw_flow_actions **sfa,
>>>>> @@ -2569,10 +2576,10 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>>>    				    u32 depth)
>>>>>    {
>>>>>    	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
>>>>> -	const struct nlattr *probability, *actions;
>>>>> +	const struct nlattr *probability, *actions, *psample;
>>>>>    	const struct nlattr *a;
>>>>> -	int rem, start, err;
>>>>>    	struct sample_arg arg;
>>>>> +	int rem, start, err;
>>>>>    
>>>>>    	memset(attrs, 0, sizeof(attrs));
>>>>>    	nla_for_each_nested(a, attr, rem) {
>>>>> @@ -2589,7 +2596,23 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>>>    		return -EINVAL;
>>>>>    
>>>>>    	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
>>>>> -	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
>>>>> +	if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
>>>>> +		return -EINVAL;
>>>>> +
>>>>> +	psample = attrs[OVS_SAMPLE_ATTR_PSAMPLE];
>>>>> +	if (psample) {
>>>>> +		struct ovs_psample *ovs_ps;
>>>>> +
>>>>> +		if (!nla_len(psample) || nla_len(psample) < sizeof(*ovs_ps))
>>>>> +			return -EINVAL;
>>>>> +
>>>>> +		ovs_ps = nla_data(psample);
>>>>> +		if (ovs_ps->user_cookie_len > OVS_PSAMPLE_COOKIE_MAX_SIZE ||
>>>>> +		    nla_len(psample) != sizeof(*ovs_ps) + ovs_ps->user_cookie_len)
>>>>> +			return -EINVAL;
>>>>> +	}
>>>>> +
>>>>> +	if (!psample && !actions)
>>>>>    		return -EINVAL;
>>>>>    
>>>>>    	/* validation done, copy sample action. */
>>>>> @@ -2608,7 +2631,9 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>>>    	 * If the sample is the last action, it can always be excuted
>>>>>    	 * rather than deferred.
>>>>>    	 */
>>>>> -	arg.exec = last || !actions_may_change_flow(actions);
>>>>> +	if (actions)
>>>>> +		arg.exec = last || !actions_may_change_flow(actions);
>>>>
>>>> 'arg.exec' will remain uninitialized.
>>>>
>>>
>>> Yes. I just wanted to avoid the call to actions_may_change_flow(NULL) in which
>>> case arg.exec is not used. I'll make sure to zero-initialize it nevertheless.
>>>
>>>>> +
>>>>>    	arg.probability = nla_get_u32(probability);
>>>>>    
>>>>>    	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
>>>>> @@ -2616,10 +2641,17 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
>>>>>    	if (err)
>>>>>    		return err;
>>>>>    
>>>>> -	err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>>>> -				     eth_type, vlan_tci, mpls_label_count, log,
>>>>> -				     depth + 1);
>>>>> +	if (psample)
>>>>> +		err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PSAMPLE,
>>>>> +					 nla_data(psample), nla_len(psample),
>>>>> +					 log);
>>>>> +	if (err)
>>>>
>>>> Can be used uninitialized.
>>>>
>>>
>>> You're right, it should be inside the if above, although err should have been
>>> initialized to output of ovs_nla_add_action.
>>
>> Ah, I missed the previous assingnent.  But yes, it is still a little strange
>> to handle errors this way.
>>
>>>
>>>>> +		return err;
>>>>>    
>>>>> +	if (actions)
>>>>> +		err = __ovs_nla_copy_actions(net, actions, key, sfa,
>>>>> +					     eth_type, vlan_tci,
>>>>> +					     mpls_label_count, log, depth + 1);
>>>>>    	if (err)
>>>>>    		return err;
>>>>>    
>>>>> @@ -3538,7 +3570,7 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>>>>    	struct nlattr *start, *ac_start = NULL, *sample_arg;
>>>>>    	int err = 0, rem = nla_len(attr);
>>>>>    	const struct sample_arg *arg;
>>>>> -	struct nlattr *actions;
>>>>> +	struct nlattr *next;
>>>>>    
>>>>>    	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE);
>>>>>    	if (!start)
>>>>> @@ -3546,27 +3578,39 @@ static int sample_action_to_attr(const struct nlattr *attr,
>>>>>    
>>>>>    	sample_arg = nla_data(attr);
>>>>>    	arg = nla_data(sample_arg);
>>>>> -	actions = nla_next(sample_arg, &rem);
>>>>> +	next = nla_next(sample_arg, &rem);
>>>>>    
>>>>>    	if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
>>>>>    		err = -EMSGSIZE;
>>>>>    		goto out;
>>>>>    	}
>>>>>    
>>>>> -	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>>>> -	if (!ac_start) {
>>>>> -		err = -EMSGSIZE;
>>>>> -		goto out;
>>>>> +	if (nla_type(next) == OVS_SAMPLE_ATTR_PSAMPLE) {
>>>>> +		if (nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE, nla_len(next),
>>>>> +			    nla_data(next))) {
>>>>> +			err = -EMSGSIZE;
>>>>> +			goto out;
>>>>> +		}
>>>>> +		next = nla_next(next, &rem);
>>>>>    	}
>>>>>    
>>>>> -	err = ovs_nla_put_actions(actions, rem, skb);
>>>>> +	if (nla_ok(next, rem)) {
>>>>> +		ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
>>>>> +		if (!ac_start) {
>>>>> +			err = -EMSGSIZE;
>>>>> +			goto out;
>>>>> +		}
>>>>> +		err = ovs_nla_put_actions(next, rem, skb);
>>>>> +	}
>>>>>    
>>>>>    out:
>>>>>    	if (err) {
>>>>> -		nla_nest_cancel(skb, ac_start);
>>>>> +		if (ac_start)
>>>>> +			nla_nest_cancel(skb, ac_start);
>>>>>    		nla_nest_cancel(skb, start);
>>>>>    	} else {
>>>>> -		nla_nest_end(skb, ac_start);
>>>>> +		if (ac_start)
>>>>> +			nla_nest_end(skb, ac_start);
>>>>>    		nla_nest_end(skb, start);
>>>>>    	}
>>>>>    
>>>>
>>>
>>
>
Jakub Kicinski April 9, 2024, 9:49 p.m. UTC | #6
On Tue, 9 Apr 2024 11:35:04 +0200 Ilya Maximets wrote:
> > If we try to implement all our actions following this way, and we keep just 
> > copying the incoming actions into the internal representation, we incur in 
> > unnecessary memory overhead (e.g: storing 2x struct nlaattr + padding of extra 
> > memory to store 2 integers).
> > 
> > I don't want to derail the discussion into historical or futuristic changes, 
> > just saying that the approach taken in the SAMPLE action (not including this 
> > patch) of exposing arguments as attributes but having a kernel-only struct to 
> > store them seems to me a good compromise.  
> 
> Sure.  As I said, it's fine to have internal structures.  My comment
> was mainly about uAPI part.  We should avoid structures in uAPI if
> possible, as they are very hard to maintain and keep compatible with
> older userspace in case some changes will be needed in the future.

FWIW there are some YAML specs for ovs under
Documentation/netlink/specs/ovs_*
perhaps they should also be updated?
Adrian Moreno April 10, 2024, 1:44 p.m. UTC | #7
On 4/9/24 23:49, Jakub Kicinski wrote:
> On Tue, 9 Apr 2024 11:35:04 +0200 Ilya Maximets wrote:
>>> If we try to implement all our actions following this way, and we keep just
>>> copying the incoming actions into the internal representation, we incur in
>>> unnecessary memory overhead (e.g: storing 2x struct nlaattr + padding of extra
>>> memory to store 2 integers).
>>>
>>> I don't want to derail the discussion into historical or futuristic changes,
>>> just saying that the approach taken in the SAMPLE action (not including this
>>> patch) of exposing arguments as attributes but having a kernel-only struct to
>>> store them seems to me a good compromise.
>>
>> Sure.  As I said, it's fine to have internal structures.  My comment
>> was mainly about uAPI part.  We should avoid structures in uAPI if
>> possible, as they are very hard to maintain and keep compatible with
>> older userspace in case some changes will be needed in the future.
> 
> FWIW there are some YAML specs for ovs under
> Documentation/netlink/specs/ovs_*
> perhaps they should also be updated?
> 

Sure, I will update them in the next version.
Thanks.
diff mbox series

Patch

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index efc82c318fa2..a5a32588f582 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -646,15 +646,24 @@  enum ovs_flow_attr {
  * %UINT32_MAX samples all packets and intermediate values sample intermediate
  * fractions of packets.
  * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
- * Actions are passed as nested attributes.
+ * Actions are passed as nested attributes. Optional if OVS_SAMPLE_ATTR_PSAMPLE
+ * is not set.
+ * @OVS_SAMPLE_ATTR_PSAMPLE: Arguments to be passed to psample. Optional if
+ * OVS_SAMPLE_ATTR_ACTIONS is not set.
  *
- * Executes the specified actions with the given probability on a per-packet
- * basis.
+ * Either OVS_SAMPLE_ATTR_USER_COOKIE or OVS_SAMPLE_ATTR_USER_COOKIE must be
+ * specified.
+ *
+ * Executes the specified actions and/or sends the packet to psample
+ * with the given probability on a per-packet basis.
  */
 enum ovs_sample_attr {
 	OVS_SAMPLE_ATTR_UNSPEC,
 	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
 	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
+	OVS_SAMPLE_ATTR_PSAMPLE,     /* struct ovs_psample followed
+				      * by the user-provided cookie.
+				      */
 	__OVS_SAMPLE_ATTR_MAX,
 
 #ifdef __KERNEL__
@@ -675,6 +684,13 @@  struct sample_arg {
 };
 #endif
 
+#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
+struct ovs_psample {
+	__u32 group_id;		/* The group used for packet sampling. */
+	__u32 user_cookie_len;	/* The length of the user-provided cookie. */
+	__u8 user_cookie[];	/* The user-provided cookie. */
+};
+
 /**
  * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
  * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 6fcd7e2ca81f..45d2b325b76a 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -24,6 +24,7 @@ 
 #include <net/checksum.h>
 #include <net/dsfield.h>
 #include <net/mpls.h>
+#include <net/psample.h>
 #include <net/sctp/checksum.h>
 
 #include "datapath.h"
@@ -1025,6 +1026,31 @@  static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
 	return 0;
 }
 
+static int ovs_psample_packet(struct datapath *dp, struct sw_flow_key *key,
+			      struct ovs_psample *psample, struct sk_buff *skb,
+			      u32 rate)
+{
+	struct psample_group psample_group = {};
+	struct psample_metadata md = {};
+	struct vport *input_vport;
+
+	psample_group.group_num = psample->group_id;
+	psample_group.net = ovs_dp_get_net(dp);
+
+	input_vport = ovs_vport_rcu(dp, key->phy.in_port);
+	if (!input_vport)
+		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
+
+	md.in_ifindex = input_vport->dev->ifindex;
+	md.user_cookie = psample->user_cookie;
+	md.user_cookie_len = psample->user_cookie_len;
+	md.trunc_size = skb->len;
+
+	psample_sample_packet(&psample_group, skb, rate, &md);
+
+	return 0;
+}
+
 /* When 'last' is true, sample() should always consume the 'skb'.
  * Otherwise, sample() should keep 'skb' intact regardless what
  * actions are executed within sample().
@@ -1033,16 +1059,17 @@  static int sample(struct datapath *dp, struct sk_buff *skb,
 		  struct sw_flow_key *key, const struct nlattr *attr,
 		  bool last)
 {
-	struct nlattr *actions;
+	const struct sample_arg *arg;
 	struct nlattr *sample_arg;
 	int rem = nla_len(attr);
-	const struct sample_arg *arg;
+	struct nlattr *next;
 	bool clone_flow_key;
+	int ret;
 
 	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
 	sample_arg = nla_data(attr);
 	arg = nla_data(sample_arg);
-	actions = nla_next(sample_arg, &rem);
+	next = nla_next(sample_arg, &rem);
 
 	if ((arg->probability != U32_MAX) &&
 	    (!arg->probability || get_random_u32() > arg->probability)) {
@@ -1051,9 +1078,22 @@  static int sample(struct datapath *dp, struct sk_buff *skb,
 		return 0;
 	}
 
-	clone_flow_key = !arg->exec;
-	return clone_execute(dp, skb, key, 0, actions, rem, last,
-			     clone_flow_key);
+	if (next->nla_type == OVS_SAMPLE_ATTR_PSAMPLE) {
+		ret = ovs_psample_packet(dp, key, nla_data(next), skb,
+					 arg->probability);
+		if (last)
+			ovs_kfree_skb_reason(skb, OVS_DROP_LAST_ACTION);
+		if (ret)
+			return ret;
+		next = nla_next(next, &rem);
+	}
+
+	if (nla_ok(next, rem)) {
+		clone_flow_key = !arg->exec;
+		ret = clone_execute(dp, skb, key, 0, next, rem, last,
+				    clone_flow_key);
+	}
+	return ret;
 }
 
 /* When 'last' is true, clone() should always consume the 'skb'.
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 99d72543abd3..b5b560c2e74b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -976,7 +976,7 @@  static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow_match match;
 	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int error;
-	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool log = true;
 
 	/* Must have key and actions. */
 	error = -EINVAL;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index f224d9bcea5e..f540686271b7 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2381,8 +2381,12 @@  static void ovs_nla_free_sample_action(const struct nlattr *action)
 
 	switch (nla_type(a)) {
 	case OVS_SAMPLE_ATTR_ARG:
-		/* The real list of actions follows this attribute. */
 		a = nla_next(a, &rem);
+
+		/* OVS_SAMPLE_ATTR_PSAMPLE may be present. */
+		if (nla_type(a) == OVS_SAMPLE_ATTR_PSAMPLE)
+			a = nla_next(a, &rem);
+
 		ovs_nla_free_nested_actions(a, rem);
 		break;
 	}
@@ -2561,6 +2565,9 @@  static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 				  u32 mpls_label_count, bool log,
 				  u32 depth);
 
+static int copy_action(const struct nlattr *from,
+		       struct sw_flow_actions **sfa, bool log);
+
 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 				    const struct sw_flow_key *key,
 				    struct sw_flow_actions **sfa,
@@ -2569,10 +2576,10 @@  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 				    u32 depth)
 {
 	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
-	const struct nlattr *probability, *actions;
+	const struct nlattr *probability, *actions, *psample;
 	const struct nlattr *a;
-	int rem, start, err;
 	struct sample_arg arg;
+	int rem, start, err;
 
 	memset(attrs, 0, sizeof(attrs));
 	nla_for_each_nested(a, attr, rem) {
@@ -2589,7 +2596,23 @@  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 		return -EINVAL;
 
 	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
-	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+	if (actions && (!nla_len(actions) || nla_len(actions) < NLA_HDRLEN))
+		return -EINVAL;
+
+	psample = attrs[OVS_SAMPLE_ATTR_PSAMPLE];
+	if (psample) {
+		struct ovs_psample *ovs_ps;
+
+		if (!nla_len(psample) || nla_len(psample) < sizeof(*ovs_ps))
+			return -EINVAL;
+
+		ovs_ps = nla_data(psample);
+		if (ovs_ps->user_cookie_len > OVS_PSAMPLE_COOKIE_MAX_SIZE ||
+		    nla_len(psample) != sizeof(*ovs_ps) + ovs_ps->user_cookie_len)
+			return -EINVAL;
+	}
+
+	if (!psample && !actions)
 		return -EINVAL;
 
 	/* validation done, copy sample action. */
@@ -2608,7 +2631,9 @@  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 	 * If the sample is the last action, it can always be excuted
 	 * rather than deferred.
 	 */
-	arg.exec = last || !actions_may_change_flow(actions);
+	if (actions)
+		arg.exec = last || !actions_may_change_flow(actions);
+
 	arg.probability = nla_get_u32(probability);
 
 	err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
@@ -2616,10 +2641,17 @@  static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
 	if (err)
 		return err;
 
-	err = __ovs_nla_copy_actions(net, actions, key, sfa,
-				     eth_type, vlan_tci, mpls_label_count, log,
-				     depth + 1);
+	if (psample)
+		err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PSAMPLE,
+					 nla_data(psample), nla_len(psample),
+					 log);
+	if (err)
+		return err;
 
+	if (actions)
+		err = __ovs_nla_copy_actions(net, actions, key, sfa,
+					     eth_type, vlan_tci,
+					     mpls_label_count, log, depth + 1);
 	if (err)
 		return err;
 
@@ -3538,7 +3570,7 @@  static int sample_action_to_attr(const struct nlattr *attr,
 	struct nlattr *start, *ac_start = NULL, *sample_arg;
 	int err = 0, rem = nla_len(attr);
 	const struct sample_arg *arg;
-	struct nlattr *actions;
+	struct nlattr *next;
 
 	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE);
 	if (!start)
@@ -3546,27 +3578,39 @@  static int sample_action_to_attr(const struct nlattr *attr,
 
 	sample_arg = nla_data(attr);
 	arg = nla_data(sample_arg);
-	actions = nla_next(sample_arg, &rem);
+	next = nla_next(sample_arg, &rem);
 
 	if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
 		err = -EMSGSIZE;
 		goto out;
 	}
 
-	ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
-	if (!ac_start) {
-		err = -EMSGSIZE;
-		goto out;
+	if (nla_type(next) == OVS_SAMPLE_ATTR_PSAMPLE) {
+		if (nla_put(skb, OVS_SAMPLE_ATTR_PSAMPLE, nla_len(next),
+			    nla_data(next))) {
+			err = -EMSGSIZE;
+			goto out;
+		}
+		next = nla_next(next, &rem);
 	}
 
-	err = ovs_nla_put_actions(actions, rem, skb);
+	if (nla_ok(next, rem)) {
+		ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS);
+		if (!ac_start) {
+			err = -EMSGSIZE;
+			goto out;
+		}
+		err = ovs_nla_put_actions(next, rem, skb);
+	}
 
 out:
 	if (err) {
-		nla_nest_cancel(skb, ac_start);
+		if (ac_start)
+			nla_nest_cancel(skb, ac_start);
 		nla_nest_cancel(skb, start);
 	} else {
-		nla_nest_end(skb, ac_start);
+		if (ac_start)
+			nla_nest_end(skb, ac_start);
 		nla_nest_end(skb, start);
 	}