Message ID | ed2e2e305dd49423745b62c0152a0b85bc84a767.1674752051.git.petrm@nvidia.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | None | expand |
On Thu, 26 Jan 2023 18:01:14 +0100 Petr Machata <petrm@nvidia.com> wrote: > The following patch will add two more maximum MDB allowances to the global > one, mcast_hash_max, that exists today. In all these cases, attempts to add > MDB entries above the configured maximums through netlink, fail noisily and > obviously. Such visibility is missing when adding entries through the > control plane traffic, by IGMP or MLD packets. > > To improve visibility in those cases, add a trace point that reports the > violation, including the relevant netdevice (be it a slave or the bridge > itself), and the MDB entry parameters: > > # perf record -e bridge:br_mdb_full & > # [...] > # perf script | cut -d: -f4- > dev v2 af 2 src 192.0.2.1/:: grp 239.1.1.1/::/00:00:00:00:00:00 vid 0 > dev v2 af 10 src 0.0.0.0/2001:db8:1::1 grp 0.0.0.0/ff0e::1/00:00:00:00:00:00 vid 0 > dev v2 af 2 src 192.0.2.1/:: grp 239.1.1.1/::/00:00:00:00:00:00 vid 10 > dev v2 af 10 src 0.0.0.0/2001:db8:1::1 grp 0.0.0.0/ff0e::1/00:00:00:00:00:00 vid 10 > > CC: Steven Rostedt <rostedt@goodmis.org> > CC: linux-trace-kernel@vger.kernel.org > Signed-off-by: Petr Machata <petrm@nvidia.com> > Reviewed-by: Ido Schimmel <idosch@nvidia.com> > --- > include/trace/events/bridge.h | 67 +++++++++++++++++++++++++++++++++++ > net/core/net-traces.c | 1 + > 2 files changed, 68 insertions(+) > > diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h > index 6b200059c2c5..00d5e2dcb3ad 100644 > --- a/include/trace/events/bridge.h > +++ b/include/trace/events/bridge.h > @@ -122,6 +122,73 @@ TRACE_EVENT(br_fdb_update, > __entry->flags) > ); > > +TRACE_EVENT(br_mdb_full, > + > + TP_PROTO(const struct net_device *dev, > + const struct br_ip *group), > + > + TP_ARGS(dev, group), > + > + TP_STRUCT__entry( > + __string(dev, dev->name) > + __field(int, af) > + __field(u16, vid) > + __array(__u8, src4, 4) > + __array(__u8, src6, 16) > + __array(__u8, grp4, 4) > + __array(__u8, grp6, 16) > + __array(__u8, grpmac, ETH_ALEN) /* For af == 0. */ Instead of wasting ring buffer space, why not just have: __array(__u8, src, 16) __array(__u8, grp, 16) > + ), > + > + TP_fast_assign( > + __assign_str(dev, dev->name); > + __entry->vid = group->vid; > + > + if (!group->proto) { > + __entry->af = 0; > + > + memset(__entry->src4, 0, sizeof(__entry->src4)); > + memset(__entry->src6, 0, sizeof(__entry->src6)); > + memset(__entry->grp4, 0, sizeof(__entry->grp4)); > + memset(__entry->grp6, 0, sizeof(__entry->grp6)); > + memcpy(__entry->grpmac, group->dst.mac_addr, ETH_ALEN); > + } else if (group->proto == htons(ETH_P_IP)) { > + __be32 *p32; > + > + __entry->af = AF_INET; > + > + p32 = (__be32 *) __entry->src4; > + *p32 = group->src.ip4; > + > + p32 = (__be32 *) __entry->grp4; > + *p32 = group->dst.ip4; struct in6_addr *in6; in6 = (struct in6_addr *)__entry->src; ipv6_addr_set_v4mapped(group->src.ip4, in6); in6 = (struct in6_addr *)__entry->grp; ipv6_addr_set_v4mapped(group->grp.ip4, in6); > + > + memset(__entry->src6, 0, sizeof(__entry->src6)); > + memset(__entry->grp6, 0, sizeof(__entry->grp6)); > + memset(__entry->grpmac, 0, ETH_ALEN); > +#if IS_ENABLED(CONFIG_IPV6) > + } else { > + struct in6_addr *in6; > + > + __entry->af = AF_INET6; > + > + in6 = (struct in6_addr *)__entry->src6; > + *in6 = group->src.ip6; > + > + in6 = (struct in6_addr *)__entry->grp6; > + *in6 = group->dst.ip6; > + > + memset(__entry->src4, 0, sizeof(__entry->src4)); > + memset(__entry->grp4, 0, sizeof(__entry->grp4)); > + memset(__entry->grpmac, 0, ETH_ALEN); > +#endif > + } > + ), > + > + TP_printk("dev %s af %u src %pI4/%pI6c grp %pI4/%pI6c/%pM vid %u", > + __get_str(dev), __entry->af, __entry->src4, __entry->src6, > + __entry->grp4, __entry->grp6, __entry->grpmac, __entry->vid) And just have: TP_printk("dev %s af %u src %pI6c grp %pI6c/%pM vid %u", __get_str(dev), __entry->af, __entry->src, __entry->grp, __entry->grpmac, __entry->vid) As the %pI6c should detect that it's a ipv4 address and show that. -- Steve > +); > > #endif /* _TRACE_BRIDGE_H */ > > diff --git a/net/core/net-traces.c b/net/core/net-traces.c > index ee7006bbe49b..805b7385dd8d 100644 > --- a/net/core/net-traces.c > +++ b/net/core/net-traces.c > @@ -41,6 +41,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add); > EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add); > EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete); > EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); > +EXPORT_TRACEPOINT_SYMBOL_GPL(br_mdb_full); > #endif > > #if IS_ENABLED(CONFIG_PAGE_POOL)
Steven Rostedt <rostedt@goodmis.org> writes: >> diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h >> index 6b200059c2c5..00d5e2dcb3ad 100644 >> --- a/include/trace/events/bridge.h >> +++ b/include/trace/events/bridge.h >> @@ -122,6 +122,73 @@ TRACE_EVENT(br_fdb_update, >> __entry->flags) >> ); >> >> +TRACE_EVENT(br_mdb_full, >> + >> + TP_PROTO(const struct net_device *dev, >> + const struct br_ip *group), >> + >> + TP_ARGS(dev, group), >> + >> + TP_STRUCT__entry( >> + __string(dev, dev->name) >> + __field(int, af) >> + __field(u16, vid) >> + __array(__u8, src4, 4) >> + __array(__u8, src6, 16) >> + __array(__u8, grp4, 4) >> + __array(__u8, grp6, 16) >> + __array(__u8, grpmac, ETH_ALEN) /* For af == 0. */ > > Instead of wasting ring buffer space, why not just have: > > __array(__u8, src, 16) > __array(__u8, grp, 16) > >> + ), >> + >> + TP_fast_assign( >> + __assign_str(dev, dev->name); >> + __entry->vid = group->vid; >> + >> + if (!group->proto) { >> + __entry->af = 0; >> + >> + memset(__entry->src4, 0, sizeof(__entry->src4)); >> + memset(__entry->src6, 0, sizeof(__entry->src6)); >> + memset(__entry->grp4, 0, sizeof(__entry->grp4)); >> + memset(__entry->grp6, 0, sizeof(__entry->grp6)); >> + memcpy(__entry->grpmac, group->dst.mac_addr, ETH_ALEN); >> + } else if (group->proto == htons(ETH_P_IP)) { >> + __be32 *p32; >> + >> + __entry->af = AF_INET; >> + >> + p32 = (__be32 *) __entry->src4; >> + *p32 = group->src.ip4; >> + >> + p32 = (__be32 *) __entry->grp4; >> + *p32 = group->dst.ip4; > > struct in6_addr *in6; > > in6 = (struct in6_addr *)__entry->src; > ipv6_addr_set_v4mapped(group->src.ip4, in6); > > in6 = (struct in6_addr *)__entry->grp; > ipv6_addr_set_v4mapped(group->grp.ip4, in6); > >> + >> + memset(__entry->src6, 0, sizeof(__entry->src6)); >> + memset(__entry->grp6, 0, sizeof(__entry->grp6)); >> + memset(__entry->grpmac, 0, ETH_ALEN); >> +#if IS_ENABLED(CONFIG_IPV6) >> + } else { >> + struct in6_addr *in6; >> + >> + __entry->af = AF_INET6; >> + >> + in6 = (struct in6_addr *)__entry->src6; >> + *in6 = group->src.ip6; >> + >> + in6 = (struct in6_addr *)__entry->grp6; >> + *in6 = group->dst.ip6; >> + >> + memset(__entry->src4, 0, sizeof(__entry->src4)); >> + memset(__entry->grp4, 0, sizeof(__entry->grp4)); >> + memset(__entry->grpmac, 0, ETH_ALEN); >> +#endif >> + } >> + ), >> + >> + TP_printk("dev %s af %u src %pI4/%pI6c grp %pI4/%pI6c/%pM vid %u", >> + __get_str(dev), __entry->af, __entry->src4, __entry->src6, >> + __entry->grp4, __entry->grp6, __entry->grpmac, __entry->vid) > > And just have: > > TP_printk("dev %s af %u src %pI6c grp %pI6c/%pM vid %u", > __get_str(dev), __entry->af, __entry->src, __entry->grp, > __entry->grpmac, __entry->vid) > > As the %pI6c should detect that it's a ipv4 address and show that. So the reason I split the fields was that %pI4, %pI6c, %pM do not seem to work with buffers of wrong size. But I can consolidate 4/6 by changing the address to IPv6 like you propose. I'll do this for v2. Thanks!
Steven Rostedt <rostedt@goodmis.org> writes: > On Thu, 26 Jan 2023 18:01:14 +0100 > Petr Machata <petrm@nvidia.com> wrote: > >> + TP_printk("dev %s af %u src %pI4/%pI6c grp %pI4/%pI6c/%pM vid %u", >> + __get_str(dev), __entry->af, __entry->src4, __entry->src6, >> + __entry->grp4, __entry->grp6, __entry->grpmac, __entry->vid) > > And just have: > > TP_printk("dev %s af %u src %pI6c grp %pI6c/%pM vid %u", > __get_str(dev), __entry->af, __entry->src, __entry->grp, > __entry->grpmac, __entry->vid) > > As the %pI6c should detect that it's a ipv4 address and show that. This means the IP addresses will always be IPv6, even for an IPv4 MDB entries. One can still figure out the true protocol from the address family field, but it might not be obvious. Plus the IPv4-mapped IPv6 addresses are not really formatted as IPv4, though yeah, IPv4 notation is embedded in that. All the information is still there, but... scrambled? Not sure the reduction in clarity is worth the 8 bytes that we save. The tracepoint is unlikely to trigger often. What say you?
On Mon, 30 Jan 2023 16:50:32 +0100 Petr Machata <petrm@nvidia.com> wrote: > Steven Rostedt <rostedt@goodmis.org> writes: > > > On Thu, 26 Jan 2023 18:01:14 +0100 > > Petr Machata <petrm@nvidia.com> wrote: > > > >> + TP_printk("dev %s af %u src %pI4/%pI6c grp %pI4/%pI6c/%pM vid %u", > >> + __get_str(dev), __entry->af, __entry->src4, __entry->src6, > >> + __entry->grp4, __entry->grp6, __entry->grpmac, __entry->vid) > > > > And just have: > > > > TP_printk("dev %s af %u src %pI6c grp %pI6c/%pM vid %u", > > __get_str(dev), __entry->af, __entry->src, __entry->grp, > > __entry->grpmac, __entry->vid) > > > > As the %pI6c should detect that it's a ipv4 address and show that. > > This means the IP addresses will always be IPv6, even for an IPv4 MDB > entries. One can still figure out the true protocol from the address > family field, but it might not be obvious. Plus the IPv4-mapped IPv6 > addresses are not really formatted as IPv4, though yeah, IPv4 notation > is embedded in that. > > All the information is still there, but... scrambled? Not sure the > reduction in clarity is worth the 8 bytes that we save. The tracepoint > is unlikely to trigger often. 8 bytes per event, and yes, ring buffer real estate is expensive. And if you use trace-cmd or perf, we can always add a plugin to libtraceevent that can format this much nicer based on the information that is there. -- Steve
diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h index 6b200059c2c5..00d5e2dcb3ad 100644 --- a/include/trace/events/bridge.h +++ b/include/trace/events/bridge.h @@ -122,6 +122,73 @@ TRACE_EVENT(br_fdb_update, __entry->flags) ); +TRACE_EVENT(br_mdb_full, + + TP_PROTO(const struct net_device *dev, + const struct br_ip *group), + + TP_ARGS(dev, group), + + TP_STRUCT__entry( + __string(dev, dev->name) + __field(int, af) + __field(u16, vid) + __array(__u8, src4, 4) + __array(__u8, src6, 16) + __array(__u8, grp4, 4) + __array(__u8, grp6, 16) + __array(__u8, grpmac, ETH_ALEN) /* For af == 0. */ + ), + + TP_fast_assign( + __assign_str(dev, dev->name); + __entry->vid = group->vid; + + if (!group->proto) { + __entry->af = 0; + + memset(__entry->src4, 0, sizeof(__entry->src4)); + memset(__entry->src6, 0, sizeof(__entry->src6)); + memset(__entry->grp4, 0, sizeof(__entry->grp4)); + memset(__entry->grp6, 0, sizeof(__entry->grp6)); + memcpy(__entry->grpmac, group->dst.mac_addr, ETH_ALEN); + } else if (group->proto == htons(ETH_P_IP)) { + __be32 *p32; + + __entry->af = AF_INET; + + p32 = (__be32 *) __entry->src4; + *p32 = group->src.ip4; + + p32 = (__be32 *) __entry->grp4; + *p32 = group->dst.ip4; + + memset(__entry->src6, 0, sizeof(__entry->src6)); + memset(__entry->grp6, 0, sizeof(__entry->grp6)); + memset(__entry->grpmac, 0, ETH_ALEN); +#if IS_ENABLED(CONFIG_IPV6) + } else { + struct in6_addr *in6; + + __entry->af = AF_INET6; + + in6 = (struct in6_addr *)__entry->src6; + *in6 = group->src.ip6; + + in6 = (struct in6_addr *)__entry->grp6; + *in6 = group->dst.ip6; + + memset(__entry->src4, 0, sizeof(__entry->src4)); + memset(__entry->grp4, 0, sizeof(__entry->grp4)); + memset(__entry->grpmac, 0, ETH_ALEN); +#endif + } + ), + + TP_printk("dev %s af %u src %pI4/%pI6c grp %pI4/%pI6c/%pM vid %u", + __get_str(dev), __entry->af, __entry->src4, __entry->src6, + __entry->grp4, __entry->grp6, __entry->grpmac, __entry->vid) +); #endif /* _TRACE_BRIDGE_H */ diff --git a/net/core/net-traces.c b/net/core/net-traces.c index ee7006bbe49b..805b7385dd8d 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -41,6 +41,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add); EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); +EXPORT_TRACEPOINT_SYMBOL_GPL(br_mdb_full); #endif #if IS_ENABLED(CONFIG_PAGE_POOL)