Message ID | 223614cb8fbe91c6050794762becdd9a3c3b689a.1709217658.git.petrm@nvidia.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Support for nexthop group statistics | expand |
On 2/29/24 11:16 AM, Petr Machata wrote: > From: Ido Schimmel <idosch@nvidia.com> > > Add netlink support for reading NH group stats. > > This data is only for statistics of the traffic in the SW datapath. HW > nexthop group statistics will be added in the following patches. > > Emission of the stats is keyed to a new op_stats flag to avoid cluttering > the netlink message with stats if the user doesn't need them: > NHA_OP_FLAG_DUMP_STATS. > > Co-developed-by: Petr Machata <petrm@nvidia.com> > Signed-off-by: Petr Machata <petrm@nvidia.com> > Signed-off-by: Ido Schimmel <idosch@nvidia.com> > --- > > Notes: > v2: > - Use uint to encode NHA_GROUP_STATS_ENTRY_PACKETS > - Rename jump target in nla_put_nh_group_stats() to avoid > having to rename further in the patchset. > > include/uapi/linux/nexthop.h | 30 ++++++++++++ > net/ipv4/nexthop.c | 92 ++++++++++++++++++++++++++++++++---- > 2 files changed, 114 insertions(+), 8 deletions(-) > > @@ -104,4 +109,29 @@ enum { > > #define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1) > > +enum { > + NHA_GROUP_STATS_UNSPEC, > + > + /* nested; nexthop group entry stats */ > + NHA_GROUP_STATS_ENTRY, > + > + __NHA_GROUP_STATS_MAX, > +}; > + > +#define NHA_GROUP_STATS_MAX (__NHA_GROUP_STATS_MAX - 1) > + > +enum { > + NHA_GROUP_STATS_ENTRY_UNSPEC, > + > + /* u32; nexthop id of the nexthop group entry */ > + NHA_GROUP_STATS_ENTRY_ID, > + > + /* uint; number of packets forwarded via the nexthop group entry */ why not make it a u64? > + NHA_GROUP_STATS_ENTRY_PACKETS, > + > + __NHA_GROUP_STATS_ENTRY_MAX, > +}; > + > +#define NHA_GROUP_STATS_ENTRY_MAX (__NHA_GROUP_STATS_ENTRY_MAX - 1) > + > #endif Reviewed-by: David Ahern <dsahern@kernel.org>
David Ahern <dsahern@kernel.org> writes: > On 2/29/24 11:16 AM, Petr Machata wrote: >> +enum { >> + NHA_GROUP_STATS_ENTRY_UNSPEC, >> + >> + /* u32; nexthop id of the nexthop group entry */ >> + NHA_GROUP_STATS_ENTRY_ID, >> + >> + /* uint; number of packets forwarded via the nexthop group entry */ > > why not make it a u64? uint will store it as 32-bit if possible and as 64-bit if necessary.
On Fri, 1 Mar 2024 08:45:52 -0700 David Ahern wrote: > > + /* uint; number of packets forwarded via the nexthop group entry */ > > why not make it a u64? I think it should be our default type. Explicit length should only be picked if there's a clear reason. Too many review cycles wasted steering people onto correct types... Now, I'm not sure Pablo merged the sint/uint support into libmnl, I can share the patches to handle them in iproute2, if that helps. Or just copy/paste from YNL. I need to find the courage to ask you how we can start using YNL in iproute2, I really worry that we're not getting CLI tools written for new families any more :(
On Thu, Feb 29, 2024 at 7:20 PM Petr Machata <petrm@nvidia.com> wrote: > > From: Ido Schimmel <idosch@nvidia.com> > > Add netlink support for reading NH group stats. > > This data is only for statistics of the traffic in the SW datapath. HW > nexthop group statistics will be added in the following patches. > > Emission of the stats is keyed to a new op_stats flag to avoid cluttering > the netlink message with stats if the user doesn't need them: > NHA_OP_FLAG_DUMP_STATS. > > Co-developed-by: Petr Machata <petrm@nvidia.com> > Signed-off-by: Petr Machata <petrm@nvidia.com> > Signed-off-by: Ido Schimmel <idosch@nvidia.com> > --- > > Notes: > v2: > - Use uint to encode NHA_GROUP_STATS_ENTRY_PACKETS > - Rename jump target in nla_put_nh_group_stats() to avoid > having to rename further in the patchset. > > include/uapi/linux/nexthop.h | 30 ++++++++++++ > net/ipv4/nexthop.c | 92 ++++++++++++++++++++++++++++++++---- > 2 files changed, 114 insertions(+), 8 deletions(-) > > diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h > index 086444e2946c..f4db63c17085 100644 > --- a/include/uapi/linux/nexthop.h > +++ b/include/uapi/linux/nexthop.h > @@ -30,6 +30,8 @@ enum { > > #define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1) > > +#define NHA_OP_FLAG_DUMP_STATS BIT(0) > + > enum { > NHA_UNSPEC, > NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */ > @@ -63,6 +65,9 @@ enum { > /* u32; operation-specific flags */ > NHA_OP_FLAGS, > > + /* nested; nexthop group stats */ > + NHA_GROUP_STATS, > + > __NHA_MAX, > }; > > @@ -104,4 +109,29 @@ enum { > > #define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1) > > +enum { > + NHA_GROUP_STATS_UNSPEC, > + > + /* nested; nexthop group entry stats */ > + NHA_GROUP_STATS_ENTRY, > + > + __NHA_GROUP_STATS_MAX, > +}; > + > +#define NHA_GROUP_STATS_MAX (__NHA_GROUP_STATS_MAX - 1) > + > +enum { > + NHA_GROUP_STATS_ENTRY_UNSPEC, > + > + /* u32; nexthop id of the nexthop group entry */ > + NHA_GROUP_STATS_ENTRY_ID, > + > + /* uint; number of packets forwarded via the nexthop group entry */ > + NHA_GROUP_STATS_ENTRY_PACKETS, > + > + __NHA_GROUP_STATS_ENTRY_MAX, > +}; > + > +#define NHA_GROUP_STATS_ENTRY_MAX (__NHA_GROUP_STATS_ENTRY_MAX - 1) > + > #endif > diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c > index 4be66622e24f..0ede8777bd66 100644 > --- a/net/ipv4/nexthop.c > +++ b/net/ipv4/nexthop.c > @@ -41,7 +41,8 @@ static const struct nla_policy rtm_nh_policy_new[] = { > > static const struct nla_policy rtm_nh_policy_get[] = { > [NHA_ID] = { .type = NLA_U32 }, > - [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, 0), > + [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, > + NHA_OP_FLAG_DUMP_STATS), > }; > > static const struct nla_policy rtm_nh_policy_del[] = { > @@ -53,7 +54,8 @@ static const struct nla_policy rtm_nh_policy_dump[] = { > [NHA_GROUPS] = { .type = NLA_FLAG }, > [NHA_MASTER] = { .type = NLA_U32 }, > [NHA_FDB] = { .type = NLA_FLAG }, > - [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, 0), > + [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, > + NHA_OP_FLAG_DUMP_STATS), > }; > > static const struct nla_policy rtm_nh_res_policy_new[] = { > @@ -661,8 +663,77 @@ static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg) > return -EMSGSIZE; > } > > -static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) > +static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge, > + struct nh_grp_entry_stats *stats) > { > + int i; > + > + memset(stats, 0, sizeof(*stats)); > + for_each_possible_cpu(i) { > + struct nh_grp_entry_stats *cpu_stats; > + unsigned int start; > + u64 packets; > + > + cpu_stats = per_cpu_ptr(nhge->stats, i); > + do { > + start = u64_stats_fetch_begin(&cpu_stats->syncp); > + packets = cpu_stats->packets; This is not safe, even on 64bit arches. You should use u64_stats_t, u64_stats_read(), u64_stats_add() ... > + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); > + > + stats->packets += packets; > + } > +}
On 3/1/24 10:28 AM, Jakub Kicinski wrote: > On Fri, 1 Mar 2024 08:45:52 -0700 David Ahern wrote: >>> + /* uint; number of packets forwarded via the nexthop group entry */ >> >> why not make it a u64? > > I think it should be our default type. Explicit length should only > be picked if there's a clear reason. Too many review cycles wasted > steering people onto correct types... > > Now, I'm not sure Pablo merged the sint/uint support into libmnl, > I can share the patches to handle them in iproute2, if that helps. > Or just copy/paste from YNL. > > I need to find the courage to ask you how we can start using YNL > in iproute2, I really worry that we're not getting CLI tools written > for new families any more :( I need to find time to play around with the cli tool more; my attempts last weekend were a bit rough hunting for the right syntax.
On Fri, 1 Mar 2024 19:45:27 -0700 David Ahern wrote: > > I need to find the courage to ask you how we can start using YNL > > in iproute2, I really worry that we're not getting CLI tools written > > for new families any more :( > > I need to find time to play around with the cli tool more; my attempts > last weekend were a bit rough hunting for the right syntax. Do you mean playing with the Python CLI? :( The thought of packaging that did cross my mind, but I always viewed it as a quick PoC / development tool. Maybe you're right tho, I should stop fighting it.
Eric Dumazet <edumazet@google.com> writes: > On Thu, Feb 29, 2024 at 7:20 PM Petr Machata <petrm@nvidia.com> wrote: >> @@ -661,8 +663,77 @@ static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg) >> return -EMSGSIZE; >> } >> >> -static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) >> +static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge, >> + struct nh_grp_entry_stats *stats) >> { >> + int i; >> + >> + memset(stats, 0, sizeof(*stats)); >> + for_each_possible_cpu(i) { >> + struct nh_grp_entry_stats *cpu_stats; >> + unsigned int start; >> + u64 packets; >> + >> + cpu_stats = per_cpu_ptr(nhge->stats, i); >> + do { >> + start = u64_stats_fetch_begin(&cpu_stats->syncp); >> + packets = cpu_stats->packets; > > This is not safe, even on 64bit arches. > > You should use u64_stats_t, u64_stats_read(), u64_stats_add() ... OK.
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index 086444e2946c..f4db63c17085 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -30,6 +30,8 @@ enum { #define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1) +#define NHA_OP_FLAG_DUMP_STATS BIT(0) + enum { NHA_UNSPEC, NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */ @@ -63,6 +65,9 @@ enum { /* u32; operation-specific flags */ NHA_OP_FLAGS, + /* nested; nexthop group stats */ + NHA_GROUP_STATS, + __NHA_MAX, }; @@ -104,4 +109,29 @@ enum { #define NHA_RES_BUCKET_MAX (__NHA_RES_BUCKET_MAX - 1) +enum { + NHA_GROUP_STATS_UNSPEC, + + /* nested; nexthop group entry stats */ + NHA_GROUP_STATS_ENTRY, + + __NHA_GROUP_STATS_MAX, +}; + +#define NHA_GROUP_STATS_MAX (__NHA_GROUP_STATS_MAX - 1) + +enum { + NHA_GROUP_STATS_ENTRY_UNSPEC, + + /* u32; nexthop id of the nexthop group entry */ + NHA_GROUP_STATS_ENTRY_ID, + + /* uint; number of packets forwarded via the nexthop group entry */ + NHA_GROUP_STATS_ENTRY_PACKETS, + + __NHA_GROUP_STATS_ENTRY_MAX, +}; + +#define NHA_GROUP_STATS_ENTRY_MAX (__NHA_GROUP_STATS_ENTRY_MAX - 1) + #endif diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 4be66622e24f..0ede8777bd66 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -41,7 +41,8 @@ static const struct nla_policy rtm_nh_policy_new[] = { static const struct nla_policy rtm_nh_policy_get[] = { [NHA_ID] = { .type = NLA_U32 }, - [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, 0), + [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, + NHA_OP_FLAG_DUMP_STATS), }; static const struct nla_policy rtm_nh_policy_del[] = { @@ -53,7 +54,8 @@ static const struct nla_policy rtm_nh_policy_dump[] = { [NHA_GROUPS] = { .type = NLA_FLAG }, [NHA_MASTER] = { .type = NLA_U32 }, [NHA_FDB] = { .type = NLA_FLAG }, - [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, 0), + [NHA_OP_FLAGS] = NLA_POLICY_MASK(NLA_U32, + NHA_OP_FLAG_DUMP_STATS), }; static const struct nla_policy rtm_nh_res_policy_new[] = { @@ -661,8 +663,77 @@ static int nla_put_nh_group_res(struct sk_buff *skb, struct nh_group *nhg) return -EMSGSIZE; } -static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) +static void nh_grp_entry_stats_read(struct nh_grp_entry *nhge, + struct nh_grp_entry_stats *stats) { + int i; + + memset(stats, 0, sizeof(*stats)); + for_each_possible_cpu(i) { + struct nh_grp_entry_stats *cpu_stats; + unsigned int start; + u64 packets; + + cpu_stats = per_cpu_ptr(nhge->stats, i); + do { + start = u64_stats_fetch_begin(&cpu_stats->syncp); + packets = cpu_stats->packets; + } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + stats->packets += packets; + } +} + +static int nla_put_nh_group_stats_entry(struct sk_buff *skb, + struct nh_grp_entry *nhge) +{ + struct nh_grp_entry_stats stats; + struct nlattr *nest; + + nh_grp_entry_stats_read(nhge, &stats); + + nest = nla_nest_start(skb, NHA_GROUP_STATS_ENTRY); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, NHA_GROUP_STATS_ENTRY_ID, nhge->nh->id) || + nla_put_uint(skb, NHA_GROUP_STATS_ENTRY_PACKETS, stats.packets)) + goto nla_put_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int nla_put_nh_group_stats(struct sk_buff *skb, struct nexthop *nh) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + struct nlattr *nest; + int i; + + nest = nla_nest_start(skb, NHA_GROUP_STATS); + if (!nest) + return -EMSGSIZE; + + for (i = 0; i < nhg->num_nh; i++) + if (nla_put_nh_group_stats_entry(skb, &nhg->nh_entries[i])) + goto cancel_out; + + nla_nest_end(skb, nest); + return 0; + +cancel_out: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int nla_put_nh_group(struct sk_buff *skb, struct nexthop *nh, + u32 op_flags) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); struct nexthop_grp *p; size_t len = nhg->num_nh * sizeof(*p); struct nlattr *nla; @@ -691,6 +762,10 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) if (nhg->resilient && nla_put_nh_group_res(skb, nhg)) goto nla_put_failure; + if (op_flags & NHA_OP_FLAG_DUMP_STATS && + nla_put_nh_group_stats(skb, nh)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -698,7 +773,8 @@ static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) } static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, - int event, u32 portid, u32 seq, unsigned int nlflags) + int event, u32 portid, u32 seq, unsigned int nlflags, + u32 op_flags) { struct fib6_nh *fib6_nh; struct fib_nh *fib_nh; @@ -725,7 +801,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB)) goto nla_put_failure; - if (nla_put_nh_group(skb, nhg)) + if (nla_put_nh_group(skb, nh, op_flags)) goto nla_put_failure; goto out; } @@ -856,7 +932,7 @@ static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) if (!skb) goto errout; - err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags); + err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags, 0); if (err < 0) { /* -EMSGSIZE implies BUG in nh_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -3085,7 +3161,7 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout_free; err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); + nlh->nlmsg_seq, 0, op_flags); if (err < 0) { WARN_ON(err == -EMSGSIZE); goto errout_free; @@ -3255,7 +3331,7 @@ static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb, return nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI); + cb->nlh->nlmsg_seq, NLM_F_MULTI, filter->op_flags); } /* rtnl */