Message ID | 20240424135109.3524355-3-amorenoz@redhat.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: openvswitch: Add sample multicasting. | expand |
Wed, Apr 24, 2024 at 03:50:49PM CEST, amorenoz@redhat.com wrote: >Packet samples can come from several places (e.g: different tc sample >actions), typically using the sample group (PSAMPLE_ATTR_SAMPLE_GROUP) >to differentiate them. > >Likewise, sample consumers that listen on the multicast group may only >be interested on a single group. However, they are currently forced to >receive all samples and discard the ones that are not relevant, causing >unnecessary overhead. > >Allow users to filter on the desired group_id by adding a new command >PSAMPLE_SET_FILTER that can be used to pass the desired group id. >Store this filter on the per-socket private pointer and use it for >filtering multicasted samples. > >Signed-off-by: Adrian Moreno <amorenoz@redhat.com> >--- > include/uapi/linux/psample.h | 1 + > net/psample/psample.c | 110 +++++++++++++++++++++++++++++++++-- > 2 files changed, 105 insertions(+), 6 deletions(-) > >diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h >index e585db5bf2d2..9d62983af0a4 100644 >--- a/include/uapi/linux/psample.h >+++ b/include/uapi/linux/psample.h >@@ -28,6 +28,7 @@ enum psample_command { > PSAMPLE_CMD_GET_GROUP, > PSAMPLE_CMD_NEW_GROUP, > PSAMPLE_CMD_DEL_GROUP, >+ PSAMPLE_CMD_SET_FILTER, > }; > > enum psample_tunnel_key_attr { >diff --git a/net/psample/psample.c b/net/psample/psample.c >index a5d9b8446f77..f5f77515b969 100644 >--- a/net/psample/psample.c >+++ b/net/psample/psample.c >@@ -98,13 +98,77 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, > return msg->len; > } > >-static const struct genl_small_ops psample_nl_ops[] = { >+struct psample_obj_desc { >+ struct rcu_head rcu; >+ u32 group_num; >+}; >+ >+struct psample_nl_sock_priv { >+ struct psample_obj_desc __rcu *filter; >+ spinlock_t filter_lock; /* Protects filter. */ >+}; >+ >+static void psample_nl_sock_priv_init(void *priv) >+{ >+ struct psample_nl_sock_priv *sk_priv = priv; >+ >+ spin_lock_init(&sk_priv->filter_lock); >+} >+ >+static void psample_nl_sock_priv_destroy(void *priv) >+{ >+ struct psample_nl_sock_priv *sk_priv = priv; >+ struct psample_obj_desc *filter; >+ >+ filter = rcu_dereference_protected(sk_priv->filter, true); >+ kfree_rcu(filter, rcu); >+} >+ >+static int psample_nl_set_filter_doit(struct sk_buff *skb, >+ struct genl_info *info) >+{ >+ struct psample_obj_desc *filter = NULL; >+ struct psample_nl_sock_priv *sk_priv; >+ struct nlattr **attrs = info->attrs; >+ >+ if (attrs[PSAMPLE_ATTR_SAMPLE_GROUP]) { >+ filter = kzalloc(sizeof(*filter), GFP_KERNEL); >+ filter->group_num = >+ nla_get_u32(attrs[PSAMPLE_ATTR_SAMPLE_GROUP]); >+ } >+ >+ sk_priv = genl_sk_priv_get(&psample_nl_family, NETLINK_CB(skb).sk); >+ if (IS_ERR(sk_priv)) { >+ kfree(filter); >+ return PTR_ERR(sk_priv); >+ } >+ >+ spin_lock(&sk_priv->filter_lock); >+ filter = rcu_replace_pointer(sk_priv->filter, filter, >+ lockdep_is_held(&sk_priv->filter_lock)); >+ spin_unlock(&sk_priv->filter_lock); >+ kfree_rcu(filter, rcu); >+ return 0; >+} >+ >+static const struct nla_policy >+psample_set_filter_policy[PSAMPLE_ATTR_SAMPLE_GROUP + 1] = { >+ [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NLA_U32, }, >+}; >+ >+static const struct genl_ops psample_nl_ops[] = { > { > .cmd = PSAMPLE_CMD_GET_GROUP, > .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, > .dumpit = psample_nl_cmd_get_group_dumpit, > /* can be retrieved by unprivileged users */ >- } >+ }, >+ { >+ .cmd = PSAMPLE_CMD_SET_FILTER, >+ .doit = psample_nl_set_filter_doit, >+ .policy = psample_set_filter_policy, >+ .flags = 0, >+ }, Sidenote: Did you think about converting psample to split ops and to introcude ynl spec file for it? > }; > > static struct genl_family psample_nl_family __ro_after_init = { >@@ -114,10 +178,13 @@ static struct genl_family psample_nl_family __ro_after_init = { > .netnsok = true, > .module = THIS_MODULE, > .mcgrps = psample_nl_mcgrps, >- .small_ops = psample_nl_ops, >- .n_small_ops = ARRAY_SIZE(psample_nl_ops), >+ .ops = psample_nl_ops, >+ .n_ops = ARRAY_SIZE(psample_nl_ops), > .resv_start_op = PSAMPLE_CMD_GET_GROUP + 1, > .n_mcgrps = ARRAY_SIZE(psample_nl_mcgrps), >+ .sock_priv_size = sizeof(struct psample_nl_sock_priv), >+ .sock_priv_init = psample_nl_sock_priv_init, >+ .sock_priv_destroy = psample_nl_sock_priv_destroy, > }; > > static void psample_group_notify(struct psample_group *group, >@@ -360,6 +427,32 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) > } > #endif > >+static inline void psample_nl_obj_desc_init(struct psample_obj_desc *desc, >+ u32 group_num) >+{ >+ memset(desc, 0, sizeof(*desc)); >+ desc->group_num = group_num; >+} >+ >+static int psample_nl_sample_filter(struct sock *dsk, struct sk_buff *skb, >+ void *data) >+{ >+ struct psample_obj_desc *desc = data; >+ struct psample_nl_sock_priv *sk_priv; >+ struct psample_obj_desc *filter; >+ int ret = 0; >+ >+ rcu_read_lock(); >+ sk_priv = __genl_sk_priv_get(&psample_nl_family, dsk); >+ if (!IS_ERR_OR_NULL(sk_priv)) { >+ filter = rcu_dereference(sk_priv->filter); >+ if (filter && desc) >+ ret = (filter->group_num != desc->group_num); >+ } >+ rcu_read_unlock(); >+ return ret; >+} >+ > void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, > u32 sample_rate, const struct psample_metadata *md) > { >@@ -370,6 +463,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, > #ifdef CONFIG_INET > struct ip_tunnel_info *tun_info; > #endif >+ struct psample_obj_desc desc; > struct sk_buff *nl_skb; > int data_len; > int meta_len; >@@ -487,8 +581,12 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, > #endif > > genlmsg_end(nl_skb, data); >- genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, >- PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); >+ psample_nl_obj_desc_init(&desc, group->group_num); >+ genlmsg_multicast_netns_filtered(&psample_nl_family, >+ group->net, nl_skb, 0, >+ PSAMPLE_NL_MCGRP_SAMPLE, >+ GFP_ATOMIC, psample_nl_sample_filter, >+ &desc); > > return; > error: >-- >2.44.0 > >
On 4/24/24 16:54, Jiri Pirko wrote: > Wed, Apr 24, 2024 at 03:50:49PM CEST, amorenoz@redhat.com wrote: >> Packet samples can come from several places (e.g: different tc sample >> actions), typically using the sample group (PSAMPLE_ATTR_SAMPLE_GROUP) >> to differentiate them. >> >> Likewise, sample consumers that listen on the multicast group may only >> be interested on a single group. However, they are currently forced to >> receive all samples and discard the ones that are not relevant, causing >> unnecessary overhead. >> >> Allow users to filter on the desired group_id by adding a new command >> PSAMPLE_SET_FILTER that can be used to pass the desired group id. >> Store this filter on the per-socket private pointer and use it for >> filtering multicasted samples. >> >> Signed-off-by: Adrian Moreno <amorenoz@redhat.com> >> --- >> include/uapi/linux/psample.h | 1 + >> net/psample/psample.c | 110 +++++++++++++++++++++++++++++++++-- >> 2 files changed, 105 insertions(+), 6 deletions(-) >> >> diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h >> index e585db5bf2d2..9d62983af0a4 100644 >> --- a/include/uapi/linux/psample.h >> +++ b/include/uapi/linux/psample.h >> @@ -28,6 +28,7 @@ enum psample_command { >> PSAMPLE_CMD_GET_GROUP, >> PSAMPLE_CMD_NEW_GROUP, >> PSAMPLE_CMD_DEL_GROUP, >> + PSAMPLE_CMD_SET_FILTER, >> }; >> >> enum psample_tunnel_key_attr { >> diff --git a/net/psample/psample.c b/net/psample/psample.c >> index a5d9b8446f77..f5f77515b969 100644 >> --- a/net/psample/psample.c >> +++ b/net/psample/psample.c >> @@ -98,13 +98,77 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, >> return msg->len; >> } >> >> -static const struct genl_small_ops psample_nl_ops[] = { >> +struct psample_obj_desc { >> + struct rcu_head rcu; >> + u32 group_num; >> +}; >> + >> +struct psample_nl_sock_priv { >> + struct psample_obj_desc __rcu *filter; >> + spinlock_t filter_lock; /* Protects filter. */ >> +}; >> + >> +static void psample_nl_sock_priv_init(void *priv) >> +{ >> + struct psample_nl_sock_priv *sk_priv = priv; >> + >> + spin_lock_init(&sk_priv->filter_lock); >> +} >> + >> +static void psample_nl_sock_priv_destroy(void *priv) >> +{ >> + struct psample_nl_sock_priv *sk_priv = priv; >> + struct psample_obj_desc *filter; >> + >> + filter = rcu_dereference_protected(sk_priv->filter, true); >> + kfree_rcu(filter, rcu); >> +} >> + >> +static int psample_nl_set_filter_doit(struct sk_buff *skb, >> + struct genl_info *info) >> +{ >> + struct psample_obj_desc *filter = NULL; >> + struct psample_nl_sock_priv *sk_priv; >> + struct nlattr **attrs = info->attrs; >> + >> + if (attrs[PSAMPLE_ATTR_SAMPLE_GROUP]) { >> + filter = kzalloc(sizeof(*filter), GFP_KERNEL); >> + filter->group_num = >> + nla_get_u32(attrs[PSAMPLE_ATTR_SAMPLE_GROUP]); >> + } >> + >> + sk_priv = genl_sk_priv_get(&psample_nl_family, NETLINK_CB(skb).sk); >> + if (IS_ERR(sk_priv)) { >> + kfree(filter); >> + return PTR_ERR(sk_priv); >> + } >> + >> + spin_lock(&sk_priv->filter_lock); >> + filter = rcu_replace_pointer(sk_priv->filter, filter, >> + lockdep_is_held(&sk_priv->filter_lock)); >> + spin_unlock(&sk_priv->filter_lock); >> + kfree_rcu(filter, rcu); >> + return 0; >> +} >> + >> +static const struct nla_policy >> +psample_set_filter_policy[PSAMPLE_ATTR_SAMPLE_GROUP + 1] = { >> + [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NLA_U32, }, >> +}; >> + >> +static const struct genl_ops psample_nl_ops[] = { >> { >> .cmd = PSAMPLE_CMD_GET_GROUP, >> .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, >> .dumpit = psample_nl_cmd_get_group_dumpit, >> /* can be retrieved by unprivileged users */ >> - } >> + }, >> + { >> + .cmd = PSAMPLE_CMD_SET_FILTER, >> + .doit = psample_nl_set_filter_doit, >> + .policy = psample_set_filter_policy, >> + .flags = 0, >> + }, > > Sidenote: > Did you think about converting psample to split ops and to introcude > ynl spec file for it? > If split opts are preferred then sure, I can do that. Thanks. >> }; >> >> static struct genl_family psample_nl_family __ro_after_init = { >> @@ -114,10 +178,13 @@ static struct genl_family psample_nl_family __ro_after_init = { >> .netnsok = true, >> .module = THIS_MODULE, >> .mcgrps = psample_nl_mcgrps, >> - .small_ops = psample_nl_ops, >> - .n_small_ops = ARRAY_SIZE(psample_nl_ops), >> + .ops = psample_nl_ops, >> + .n_ops = ARRAY_SIZE(psample_nl_ops), >> .resv_start_op = PSAMPLE_CMD_GET_GROUP + 1, >> .n_mcgrps = ARRAY_SIZE(psample_nl_mcgrps), >> + .sock_priv_size = sizeof(struct psample_nl_sock_priv), >> + .sock_priv_init = psample_nl_sock_priv_init, >> + .sock_priv_destroy = psample_nl_sock_priv_destroy, >> }; >> >> static void psample_group_notify(struct psample_group *group, >> @@ -360,6 +427,32 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) >> } >> #endif >> >> +static inline void psample_nl_obj_desc_init(struct psample_obj_desc *desc, >> + u32 group_num) >> +{ >> + memset(desc, 0, sizeof(*desc)); >> + desc->group_num = group_num; >> +} >> + >> +static int psample_nl_sample_filter(struct sock *dsk, struct sk_buff *skb, >> + void *data) >> +{ >> + struct psample_obj_desc *desc = data; >> + struct psample_nl_sock_priv *sk_priv; >> + struct psample_obj_desc *filter; >> + int ret = 0; >> + >> + rcu_read_lock(); >> + sk_priv = __genl_sk_priv_get(&psample_nl_family, dsk); >> + if (!IS_ERR_OR_NULL(sk_priv)) { >> + filter = rcu_dereference(sk_priv->filter); >> + if (filter && desc) >> + ret = (filter->group_num != desc->group_num); >> + } >> + rcu_read_unlock(); >> + return ret; >> +} >> + >> void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, >> u32 sample_rate, const struct psample_metadata *md) >> { >> @@ -370,6 +463,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, >> #ifdef CONFIG_INET >> struct ip_tunnel_info *tun_info; >> #endif >> + struct psample_obj_desc desc; >> struct sk_buff *nl_skb; >> int data_len; >> int meta_len; >> @@ -487,8 +581,12 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, >> #endif >> >> genlmsg_end(nl_skb, data); >> - genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, >> - PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); >> + psample_nl_obj_desc_init(&desc, group->group_num); >> + genlmsg_multicast_netns_filtered(&psample_nl_family, >> + group->net, nl_skb, 0, >> + PSAMPLE_NL_MCGRP_SAMPLE, >> + GFP_ATOMIC, psample_nl_sample_filter, >> + &desc); >> >> return; >> error: >> -- >> 2.44.0 >> >> >
diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index e585db5bf2d2..9d62983af0a4 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -28,6 +28,7 @@ enum psample_command { PSAMPLE_CMD_GET_GROUP, PSAMPLE_CMD_NEW_GROUP, PSAMPLE_CMD_DEL_GROUP, + PSAMPLE_CMD_SET_FILTER, }; enum psample_tunnel_key_attr { diff --git a/net/psample/psample.c b/net/psample/psample.c index a5d9b8446f77..f5f77515b969 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -98,13 +98,77 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, return msg->len; } -static const struct genl_small_ops psample_nl_ops[] = { +struct psample_obj_desc { + struct rcu_head rcu; + u32 group_num; +}; + +struct psample_nl_sock_priv { + struct psample_obj_desc __rcu *filter; + spinlock_t filter_lock; /* Protects filter. */ +}; + +static void psample_nl_sock_priv_init(void *priv) +{ + struct psample_nl_sock_priv *sk_priv = priv; + + spin_lock_init(&sk_priv->filter_lock); +} + +static void psample_nl_sock_priv_destroy(void *priv) +{ + struct psample_nl_sock_priv *sk_priv = priv; + struct psample_obj_desc *filter; + + filter = rcu_dereference_protected(sk_priv->filter, true); + kfree_rcu(filter, rcu); +} + +static int psample_nl_set_filter_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct psample_obj_desc *filter = NULL; + struct psample_nl_sock_priv *sk_priv; + struct nlattr **attrs = info->attrs; + + if (attrs[PSAMPLE_ATTR_SAMPLE_GROUP]) { + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + filter->group_num = + nla_get_u32(attrs[PSAMPLE_ATTR_SAMPLE_GROUP]); + } + + sk_priv = genl_sk_priv_get(&psample_nl_family, NETLINK_CB(skb).sk); + if (IS_ERR(sk_priv)) { + kfree(filter); + return PTR_ERR(sk_priv); + } + + spin_lock(&sk_priv->filter_lock); + filter = rcu_replace_pointer(sk_priv->filter, filter, + lockdep_is_held(&sk_priv->filter_lock)); + spin_unlock(&sk_priv->filter_lock); + kfree_rcu(filter, rcu); + return 0; +} + +static const struct nla_policy +psample_set_filter_policy[PSAMPLE_ATTR_SAMPLE_GROUP + 1] = { + [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NLA_U32, }, +}; + +static const struct genl_ops psample_nl_ops[] = { { .cmd = PSAMPLE_CMD_GET_GROUP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = psample_nl_cmd_get_group_dumpit, /* can be retrieved by unprivileged users */ - } + }, + { + .cmd = PSAMPLE_CMD_SET_FILTER, + .doit = psample_nl_set_filter_doit, + .policy = psample_set_filter_policy, + .flags = 0, + }, }; static struct genl_family psample_nl_family __ro_after_init = { @@ -114,10 +178,13 @@ static struct genl_family psample_nl_family __ro_after_init = { .netnsok = true, .module = THIS_MODULE, .mcgrps = psample_nl_mcgrps, - .small_ops = psample_nl_ops, - .n_small_ops = ARRAY_SIZE(psample_nl_ops), + .ops = psample_nl_ops, + .n_ops = ARRAY_SIZE(psample_nl_ops), .resv_start_op = PSAMPLE_CMD_GET_GROUP + 1, .n_mcgrps = ARRAY_SIZE(psample_nl_mcgrps), + .sock_priv_size = sizeof(struct psample_nl_sock_priv), + .sock_priv_init = psample_nl_sock_priv_init, + .sock_priv_destroy = psample_nl_sock_priv_destroy, }; static void psample_group_notify(struct psample_group *group, @@ -360,6 +427,32 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) } #endif +static inline void psample_nl_obj_desc_init(struct psample_obj_desc *desc, + u32 group_num) +{ + memset(desc, 0, sizeof(*desc)); + desc->group_num = group_num; +} + +static int psample_nl_sample_filter(struct sock *dsk, struct sk_buff *skb, + void *data) +{ + struct psample_obj_desc *desc = data; + struct psample_nl_sock_priv *sk_priv; + struct psample_obj_desc *filter; + int ret = 0; + + rcu_read_lock(); + sk_priv = __genl_sk_priv_get(&psample_nl_family, dsk); + if (!IS_ERR_OR_NULL(sk_priv)) { + filter = rcu_dereference(sk_priv->filter); + if (filter && desc) + ret = (filter->group_num != desc->group_num); + } + rcu_read_unlock(); + return ret; +} + void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, u32 sample_rate, const struct psample_metadata *md) { @@ -370,6 +463,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, #ifdef CONFIG_INET struct ip_tunnel_info *tun_info; #endif + struct psample_obj_desc desc; struct sk_buff *nl_skb; int data_len; int meta_len; @@ -487,8 +581,12 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, #endif genlmsg_end(nl_skb, data); - genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, - PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); + psample_nl_obj_desc_init(&desc, group->group_num); + genlmsg_multicast_netns_filtered(&psample_nl_family, + group->net, nl_skb, 0, + PSAMPLE_NL_MCGRP_SAMPLE, + GFP_ATOMIC, psample_nl_sample_filter, + &desc); return; error:
Packet samples can come from several places (e.g: different tc sample actions), typically using the sample group (PSAMPLE_ATTR_SAMPLE_GROUP) to differentiate them. Likewise, sample consumers that listen on the multicast group may only be interested on a single group. However, they are currently forced to receive all samples and discard the ones that are not relevant, causing unnecessary overhead. Allow users to filter on the desired group_id by adding a new command PSAMPLE_SET_FILTER that can be used to pass the desired group id. Store this filter on the per-socket private pointer and use it for filtering multicasted samples. Signed-off-by: Adrian Moreno <amorenoz@redhat.com> --- include/uapi/linux/psample.h | 1 + net/psample/psample.c | 110 +++++++++++++++++++++++++++++++++-- 2 files changed, 105 insertions(+), 6 deletions(-)