Message ID | ac24d9b6-bfff-4700-a301-d4bd0dbb9313@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net] ipmr: support IP_PKTINFO on cache report IGMP msg | expand |
On Wed, Dec 13, 2023 at 3:35 PM Leone Fernando <leone4fernando@gmail.com> wrote: > > In order to support IP_PKTINFO on those packets, we need to call > ipv4_pktinfo_prepare, so introduced minor changes to this > function to support this flow. > > When sending mrouted/pimd daemons a cache report IGMP msg, it is > unnecessary to set dst on the newly created skb. > It used to be necessary on older versions until > commit d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") which > changed the way IP_PKTINFO struct is been retrieved. > Given this is a 12 years old bug, I would rather target net-next tree. > Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") > Signed-off-by: Leone Fernando <leone4fernando@gmail.com> > --- > include/net/ip.h | 10 +++++++++- > net/ipv4/ip_sockglue.c | 25 ++++++++++++++++--------- > net/ipv4/ipmr.c | 12 +++++------- > net/ipv4/raw.c | 2 +- > net/ipv4/udp.c | 2 +- > 5 files changed, 32 insertions(+), 19 deletions(-) > > diff --git a/include/net/ip.h b/include/net/ip.h > index b31be912489a..1b40b7386c56 100644 > --- a/include/net/ip.h > +++ b/include/net/ip.h > @@ -767,7 +767,15 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); > * Functions provided by ip_sockglue.c > */ > > -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); > +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb, > + struct sk_buff *oskb); > + > + > +static inline void ipv4_pktinfo_input_prepare(const struct sock *sk, struct sk_buff *skb) > +{ > + ipv4_pktinfo_prepare(sk, skb, NULL); > +} > + > void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, > struct sk_buff *skb, int tlen, int offset); > int ip_cmsg_send(struct sock *sk, struct msghdr *msg, > diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c > index d7d13940774e..fb26963e3869 100644 > --- a/net/ipv4/ip_sockglue.c > +++ b/net/ipv4/ip_sockglue.c > @@ -1364,19 +1364,26 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, > /** > * ipv4_pktinfo_prepare - transfer some info from rtable to skb > * @sk: socket > - * @skb: buffer > + * @iskb: input buffer > + * @oskb: out buffer > * > * To support IP_CMSG_PKTINFO option, we store rt_iif and specific > * destination in skb->cb[] before dst drop. > * This way, receiver doesn't make cache line misses to read rtable. > */ > -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) > +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb, > + struct sk_buff *oskb) This looks more complicated than needed. I am pretty sure we can fix the bug without touching this function... > { > - struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); > + struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(iskb); > bool prepare = inet_test_bit(PKTINFO, sk) || > ipv6_sk_rxinfo(sk); > > - if (prepare && skb_rtable(skb)) { > + if (oskb) { > + memcpy(oskb->cb, iskb->cb, sizeof(iskb->cb)); > + pktinfo = PKTINFO_SKB_CB(oskb); > + } > + > + if (prepare && skb_rtable(iskb)) { > /* skb->cb is overloaded: prior to this point it is IP{6}CB > * which has interface index (iif) as the first member of the > * underlying inet{6}_skb_parm struct. This code then overlays > @@ -1386,20 +1393,20 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) > * (e.g., process binds socket to eth0 for Tx which is > * redirected to loopback in the rtable/dst). > */ > - struct rtable *rt = skb_rtable(skb); > - bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags); > + struct rtable *rt = skb_rtable(iskb); > + bool l3slave = ipv4_l3mdev_skb(IPCB(iskb)->flags); > > if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) > - pktinfo->ipi_ifindex = inet_iif(skb); > + pktinfo->ipi_ifindex = inet_iif(iskb); > else if (l3slave && rt && rt->rt_iif) > pktinfo->ipi_ifindex = rt->rt_iif; > > - pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); > + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(iskb); > } else { > pktinfo->ipi_ifindex = 0; > pktinfo->ipi_spec_dst.s_addr = 0; > } > - skb_dst_drop(skb); > + skb_dst_drop(iskb); > } > > int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, > diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c > index 9e222a57bc2b..6ed7c88743f9 100644 > --- a/net/ipv4/ipmr.c > +++ b/net/ipv4/ipmr.c > @@ -1025,6 +1025,10 @@ static int ipmr_cache_report(const struct mr_table *mrt, > struct sk_buff *skb; > int ret; > > + mroute_sk = rcu_dereference(mrt->mroute_sk); > + if (!mroute_sk) > + return -EINVAL; > + > if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) > skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); > else > @@ -1069,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt, > msg = (struct igmpmsg *)skb_network_header(skb); > msg->im_vif = vifi; > msg->im_vif_hi = vifi >> 8; > - skb_dst_set(skb, dst_clone(skb_dst(pkt))); > + ipv4_pktinfo_prepare(mroute_sk, pkt, skb); All we need is to call ipv4_pktinfo_prepare(sk, pkt); then copy pkt->cb to skb->cb ? > /* Add our header */ > igmp = skb_put(skb, sizeof(struct igmphdr)); > igmp->type = assert; > @@ -1079,12 +1083,6 @@ static int ipmr_cache_report(const struct mr_table *mrt, > skb->transport_header = skb->network_header; > } > > - mroute_sk = rcu_dereference(mrt->mroute_sk); > - if (!mroute_sk) { > - kfree_skb(skb); > - return -EINVAL; > - } > - > igmpmsg_netlink_event(mrt, skb); > > /* Deliver to mrouted */ > diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c > index 27da9d7294c0..cde60c8deed4 100644 > --- a/net/ipv4/raw.c > +++ b/net/ipv4/raw.c > @@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) > > /* Charge it to the socket. */ > > - ipv4_pktinfo_prepare(sk, skb); > + ipv4_pktinfo_input_prepare(sk, skb); > if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { > kfree_skb_reason(skb, reason); > return NET_RX_DROP; > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > index 89e5a806b82e..3e5a418c96c3 100644 > --- a/net/ipv4/udp.c > +++ b/net/ipv4/udp.c > @@ -2169,7 +2169,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) > > udp_csum_pull_header(skb); > > - ipv4_pktinfo_prepare(sk, skb); > + ipv4_pktinfo_input_prepare(sk, skb); > return __udp_queue_rcv_skb(sk, skb); > > csum_error: > -- > 2.34.1 >
Thank you Eric. I will submit a v2.
diff --git a/include/net/ip.h b/include/net/ip.h index b31be912489a..1b40b7386c56 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -767,7 +767,15 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); * Functions provided by ip_sockglue.c */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb, + struct sk_buff *oskb); + + +static inline void ipv4_pktinfo_input_prepare(const struct sock *sk, struct sk_buff *skb) +{ + ipv4_pktinfo_prepare(sk, skb, NULL); +} + void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d7d13940774e..fb26963e3869 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1364,19 +1364,26 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, /** * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket - * @skb: buffer + * @iskb: input buffer + * @oskb: out buffer * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. * This way, receiver doesn't make cache line misses to read rtable. */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb, + struct sk_buff *oskb) { - struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); + struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(iskb); bool prepare = inet_test_bit(PKTINFO, sk) || ipv6_sk_rxinfo(sk); - if (prepare && skb_rtable(skb)) { + if (oskb) { + memcpy(oskb->cb, iskb->cb, sizeof(iskb->cb)); + pktinfo = PKTINFO_SKB_CB(oskb); + } + + if (prepare && skb_rtable(iskb)) { /* skb->cb is overloaded: prior to this point it is IP{6}CB * which has interface index (iif) as the first member of the * underlying inet{6}_skb_parm struct. This code then overlays @@ -1386,20 +1393,20 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) * (e.g., process binds socket to eth0 for Tx which is * redirected to loopback in the rtable/dst). */ - struct rtable *rt = skb_rtable(skb); - bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags); + struct rtable *rt = skb_rtable(iskb); + bool l3slave = ipv4_l3mdev_skb(IPCB(iskb)->flags); if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) - pktinfo->ipi_ifindex = inet_iif(skb); + pktinfo->ipi_ifindex = inet_iif(iskb); else if (l3slave && rt && rt->rt_iif) pktinfo->ipi_ifindex = rt->rt_iif; - pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); + pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(iskb); } else { pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + skb_dst_drop(iskb); } int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9e222a57bc2b..6ed7c88743f9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1025,6 +1025,10 @@ static int ipmr_cache_report(const struct mr_table *mrt, struct sk_buff *skb; int ret; + mroute_sk = rcu_dereference(mrt->mroute_sk); + if (!mroute_sk) + return -EINVAL; + if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) skb = skb_realloc_headroom(pkt, sizeof(struct iphdr)); else @@ -1069,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg = (struct igmpmsg *)skb_network_header(skb); msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; - skb_dst_set(skb, dst_clone(skb_dst(pkt))); + ipv4_pktinfo_prepare(mroute_sk, pkt, skb); /* Add our header */ igmp = skb_put(skb, sizeof(struct igmphdr)); igmp->type = assert; @@ -1079,12 +1083,6 @@ static int ipmr_cache_report(const struct mr_table *mrt, skb->transport_header = skb->network_header; } - mroute_sk = rcu_dereference(mrt->mroute_sk); - if (!mroute_sk) { - kfree_skb(skb); - return -EINVAL; - } - igmpmsg_netlink_event(mrt, skb); /* Deliver to mrouted */ diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 27da9d7294c0..cde60c8deed4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Charge it to the socket. */ - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_input_prepare(sk, skb); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { kfree_skb_reason(skb, reason); return NET_RX_DROP; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 89e5a806b82e..3e5a418c96c3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2169,7 +2169,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_csum_pull_header(skb); - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_input_prepare(sk, skb); return __udp_queue_rcv_skb(sk, skb); csum_error:
In order to support IP_PKTINFO on those packets, we need to call ipv4_pktinfo_prepare, so introduced minor changes to this function to support this flow. When sending mrouted/pimd daemons a cache report IGMP msg, it is unnecessary to set dst on the newly created skb. It used to be necessary on older versions until commit d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") which changed the way IP_PKTINFO struct is been retrieved. Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") Signed-off-by: Leone Fernando <leone4fernando@gmail.com> --- include/net/ip.h | 10 +++++++++- net/ipv4/ip_sockglue.c | 25 ++++++++++++++++--------- net/ipv4/ipmr.c | 12 +++++------- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- 5 files changed, 32 insertions(+), 19 deletions(-)