Message ID | 36dc1203db9169f553797a6e2d2a46265f19dd8b.1692172297.git.antony.antony@secunet.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | xfrm: Support GRO decapsulation for ESP in UDP encapsulation | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Guessing tree name failed - patch did not apply |
Hi Antony, On Wed, Aug 16, 2023 at 12:57 PM Antony Antony <antony.antony@secunet.com> wrote: > > From: Steffen Klassert <steffen.klassert@secunet.com> > > This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated > packets. Decapsulation happens at L2 and saves a full round through > the stack for each packet. This is also needed to support HW offload > for ESP in UDP encapsulation. > > Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> > Co-developed-by: Antony Antony <antony.antony@secunet.com> > Signed-off-by: Antony Antony <antony.antony@secunet.com> > --- > include/net/gro.h | 2 +- > include/net/xfrm.h | 4 ++ > net/ipv4/esp4_offload.c | 6 ++- > net/ipv4/udp.c | 16 ++++++- > net/ipv4/xfrm4_input.c | 98 ++++++++++++++++++++++++++++++++--------- > 5 files changed, 103 insertions(+), 23 deletions(-) > > diff --git a/include/net/gro.h b/include/net/gro.h > index a4fab706240d..41c12c5d1ea1 100644 > --- a/include/net/gro.h > +++ b/include/net/gro.h > @@ -29,7 +29,7 @@ struct napi_gro_cb { > /* Number of segments aggregated. */ > u16 count; > > - /* Used in ipv6_gro_receive() and foo-over-udp */ > + /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ > u16 proto; > > /* jiffies when first packet was created/queued */ > diff --git a/include/net/xfrm.h b/include/net/xfrm.h > index 33ee3f5936e6..e980f442ddcd 100644 > --- a/include/net/xfrm.h > +++ b/include/net/xfrm.h > @@ -1671,6 +1671,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu); > int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); > int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, > int encap_type); > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, > + struct sk_buff *skb); Why does this function need to be declared twice in this file? > int xfrm4_transport_finish(struct sk_buff *skb, int async); > int xfrm4_rcv(struct sk_buff *skb); > > @@ -1711,6 +1713,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); > void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); > int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); > int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, > + struct sk_buff *skb); > int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, > int optlen); > #else > diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c > index 77bb01032667..34ebfdf0e986 100644 > --- a/net/ipv4/esp4_offload.c > +++ b/net/ipv4/esp4_offload.c > @@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, > int offset = skb_gro_offset(skb); > struct xfrm_offload *xo; > struct xfrm_state *x; > + int encap_type = 0; > __be32 seq; > __be32 spi; > > @@ -69,6 +70,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, > > xo->flags |= XFRM_GRO; > > + if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP) > + encap_type = UDP_ENCAP_ESPINUDP; > + > XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; > XFRM_SPI_SKB_CB(skb)->family = AF_INET; > XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); > @@ -76,7 +80,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, > > /* We don't need to handle errors from xfrm_input, it does all > * the error handling and frees the resources on error. */ > - xfrm_input(skb, IPPROTO_ESP, spi, 0); > + xfrm_input(skb, IPPROTO_ESP, spi, encap_type); > > return ERR_PTR(-EINPROGRESS); > out_reset: > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > index aa32afd871ee..337607b17ebd 100644 > --- a/net/ipv4/udp.c > +++ b/net/ipv4/udp.c > @@ -2681,6 +2681,17 @@ void udp_destroy_sock(struct sock *sk) > } > } > > +static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family, > + struct udp_sock *up) > +{ > +#ifdef CONFIG_XFRM > + if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) { > + if (family == AF_INET) > + up->gro_receive = xfrm4_gro_udp_encap_rcv; > + } > +#endif > +} > + > /* > * Socket option code for UDP > */ > @@ -2730,12 +2741,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, > case 0: > #ifdef CONFIG_XFRM > case UDP_ENCAP_ESPINUDP: > + set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, up); > + fallthrough; > case UDP_ENCAP_ESPINUDP_NON_IKE: > #if IS_ENABLED(CONFIG_IPV6) > if (sk->sk_family == AF_INET6) > up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv; > - else > #endif > + if (sk->sk_family == AF_INET) Why is this change needed? > up->encap_rcv = xfrm4_udp_encap_rcv; > #endif > fallthrough; > @@ -2773,6 +2786,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, > udp_tunnel_encap_enable(sk->sk_socket); > up->gro_enabled = valbool; > up->accept_udp_l4 = valbool; > + set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, up); > release_sock(sk); > break; > > diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c > index ad2afeef4f10..b57f477c745e 100644 > --- a/net/ipv4/xfrm4_input.c > +++ b/net/ipv4/xfrm4_input.c > @@ -17,6 +17,8 @@ > #include <linux/netfilter_ipv4.h> > #include <net/ip.h> > #include <net/xfrm.h> > +#include <net/protocol.h> > +#include <net/gro.h> > > static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, > struct sk_buff *skb) > @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) > return 0; > } > > -/* If it's a keepalive packet, then just eat it. > - * If it's an encapsulated packet, then pass it to the > - * IPsec xfrm input. > - * Returns 0 if skb passed to xfrm or was dropped. > - * Returns >0 if skb should be passed to UDP. > - * Returns <0 if skb should be resubmitted (-ret is protocol) > - */ > -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull) > { > struct udp_sock *up = udp_sk(sk); > struct udphdr *uh; > @@ -90,8 +85,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > __be32 *udpdata32; > __u16 encap_type = up->encap_type; > > - /* if this is not encapsulated socket, then just return now */ > - if (!encap_type) > + /* if unknown encap_type then just return now */ > + if (encap_type != UDP_ENCAP_ESPINUDP && encap_type != UDP_ENCAP_ESPINUDP_NON_IKE) This change is unclear to me - the patch adds support for GRO on UDP_ENCAP_ESPINUDP. How can we now get other encap types here? and why wasn't the old condition ok? > return 1; > > /* If this is a paged skb, make sure we pull up > @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > case UDP_ENCAP_ESPINUDP: > /* Check if this is a keepalive packet. If so, eat it. */ > if (len == 1 && udpdata[0] == 0xff) { > - goto drop; > + return -EINVAL; > } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { > /* ESP Packet without Non-ESP header */ > len = sizeof(struct udphdr); > @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > case UDP_ENCAP_ESPINUDP_NON_IKE: > /* Check if this is a keepalive packet. If so, eat it. */ > if (len == 1 && udpdata[0] == 0xff) { > - goto drop; > + return -EINVAL; > } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && > udpdata32[0] == 0 && udpdata32[1] == 0) { > > @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > * protocol to ESP, and then call into the transform receiver. > */ > if (skb_unclone(skb, GFP_ATOMIC)) > - goto drop; > + return -EINVAL; > > /* Now we can update and verify the packet length... */ > iph = ip_hdr(skb); > @@ -147,24 +142,87 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > iph->tot_len = htons(ntohs(iph->tot_len) - len); > if (skb->len < iphlen + len) { > /* packet is too small!?! */ > - goto drop; > + return -EINVAL; > } > > /* pull the data buffer up to the ESP header and set the > * transport header to point to ESP. Keep UDP on the stack > * for later. > */ > - __skb_pull(skb, len); > - skb_reset_transport_header(skb); > + if (pull) { > + __skb_pull(skb, len); > + skb_reset_transport_header(skb); > + } else { > + skb_set_transport_header(skb, len); > + } > > /* process ESP */ > - return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); > - > -drop: > - kfree_skb(skb); > return 0; > } > > +/* If it's a keepalive packet, then just eat it. > + * If it's an encapsulated packet, then pass it to the > + * IPsec xfrm input. > + * Returns 0 if skb passed to xfrm or was dropped. > + * Returns >0 if skb should be passed to UDP. > + * Returns <0 if skb should be resubmitted (-ret is protocol) > + */ > +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > +{ > + int ret; > + > + ret = __xfrm4_udp_encap_rcv(sk, skb, true); > + if (!ret) > + return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, > + udp_sk(sk)->encap_type); > + > + if (ret < 0) { > + kfree_skb(skb); > + return 0; > + } > + > + return ret; > +} > + > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, > + struct sk_buff *skb) > +{ > + int offset = skb_gro_offset(skb); > + const struct net_offload *ops; > + struct sk_buff *pp = NULL; > + int ret; > + > + offset = offset - sizeof(struct udphdr); > + > + if (!pskb_pull(skb, offset)) > + return NULL; > + > + rcu_read_lock(); > + ops = rcu_dereference(inet_offloads[IPPROTO_ESP]); > + if (!ops || !ops->callbacks.gro_receive) > + goto out; > + > + ret = __xfrm4_udp_encap_rcv(sk, skb, false); > + if (ret) > + goto out; > + > + skb_push(skb, offset); > + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; > + > + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); > + rcu_read_unlock(); > + > + return pp; > + > +out: > + rcu_read_unlock(); > + skb_push(skb, offset); > + NAPI_GRO_CB(skb)->same_flow = 0; > + NAPI_GRO_CB(skb)->flush = 1; > + > + return NULL; > +} > + > int xfrm4_rcv(struct sk_buff *skb) > { > return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); > -- > 2.30.2 >
Hi Eyal, Thanks for your quick review. I have addressed the points you raised for both v4 and send v5 patches. On Wed, Aug 16, 2023 at 14:15:01 +0300, Eyal Birger wrote: > Hi Antony, > > On Wed, Aug 16, 2023 at 12:57 PM Antony Antony > <antony.antony@secunet.com> wrote: > > > > From: Steffen Klassert <steffen.klassert@secunet.com> > > > > This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated > > packets. Decapsulation happens at L2 and saves a full round through > > the stack for each packet. This is also needed to support HW offload > > for ESP in UDP encapsulation. > > > > Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> > > Co-developed-by: Antony Antony <antony.antony@secunet.com> > > Signed-off-by: Antony Antony <antony.antony@secunet.com> > > --- > > include/net/gro.h | 2 +- > > include/net/xfrm.h | 4 ++ > > net/ipv4/esp4_offload.c | 6 ++- > > net/ipv4/udp.c | 16 ++++++- > > net/ipv4/xfrm4_input.c | 98 ++++++++++++++++++++++++++++++++--------- > > 5 files changed, 103 insertions(+), 23 deletions(-) > > > > diff --git a/include/net/gro.h b/include/net/gro.h > > index a4fab706240d..41c12c5d1ea1 100644 > > --- a/include/net/gro.h > > +++ b/include/net/gro.h > > @@ -29,7 +29,7 @@ struct napi_gro_cb { > > /* Number of segments aggregated. */ > > u16 count; > > > > - /* Used in ipv6_gro_receive() and foo-over-udp */ > > + /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ > > u16 proto; > > > > /* jiffies when first packet was created/queued */ > > diff --git a/include/net/xfrm.h b/include/net/xfrm.h > > index 33ee3f5936e6..e980f442ddcd 100644 > > --- a/include/net/xfrm.h > > +++ b/include/net/xfrm.h > > @@ -1671,6 +1671,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu); > > int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); > > int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, > > int encap_type); > > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, > > + struct sk_buff *skb); > > Why does this function need to be declared twice in this file? no need. Actully the following patch was removed it:) It is fixed in v5. > > > int xfrm4_transport_finish(struct sk_buff *skb, int async); > > int xfrm4_rcv(struct sk_buff *skb); > > > > @@ -1711,6 +1713,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); > > void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); > > int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); > > int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); > > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, > > + struct sk_buff *skb); > > int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, > > int optlen); > > #else > > diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c > > index 77bb01032667..34ebfdf0e986 100644 > > --- a/net/ipv4/esp4_offload.c > > +++ b/net/ipv4/esp4_offload.c > > @@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, > > int offset = skb_gro_offset(skb); > > struct xfrm_offload *xo; > > struct xfrm_state *x; > > + int encap_type = 0; > > __be32 seq; > > __be32 spi; > > > > @@ -69,6 +70,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, > > > > xo->flags |= XFRM_GRO; > > > > + if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP) > > + encap_type = UDP_ENCAP_ESPINUDP; > > + > > XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; > > XFRM_SPI_SKB_CB(skb)->family = AF_INET; > > XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); > > @@ -76,7 +80,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, > > > > /* We don't need to handle errors from xfrm_input, it does all > > * the error handling and frees the resources on error. */ > > - xfrm_input(skb, IPPROTO_ESP, spi, 0); > > + xfrm_input(skb, IPPROTO_ESP, spi, encap_type); > > > > return ERR_PTR(-EINPROGRESS); > > out_reset: > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > > index aa32afd871ee..337607b17ebd 100644 > > --- a/net/ipv4/udp.c > > +++ b/net/ipv4/udp.c > > @@ -2681,6 +2681,17 @@ void udp_destroy_sock(struct sock *sk) > > } > > } > > > > +static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family, > > + struct udp_sock *up) > > +{ > > +#ifdef CONFIG_XFRM > > + if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) { > > + if (family == AF_INET) > > + up->gro_receive = xfrm4_gro_udp_encap_rcv; > > + } > > +#endif > > +} > > + > > /* > > * Socket option code for UDP > > */ > > @@ -2730,12 +2741,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, > > case 0: > > #ifdef CONFIG_XFRM > > case UDP_ENCAP_ESPINUDP: > > + set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, up); > > + fallthrough; > > case UDP_ENCAP_ESPINUDP_NON_IKE: > > #if IS_ENABLED(CONFIG_IPV6) > > if (sk->sk_family == AF_INET6) > > up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv; > > - else > > #endif > > + if (sk->sk_family == AF_INET) > > Why is this change needed? It is not necessary. I removed it in v5. > > > up->encap_rcv = xfrm4_udp_encap_rcv; > > #endif > > fallthrough; > > @@ -2773,6 +2786,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, > > udp_tunnel_encap_enable(sk->sk_socket); > > up->gro_enabled = valbool; > > up->accept_udp_l4 = valbool; > > + set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, up); > > release_sock(sk); > > break; > > > > diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c > > index ad2afeef4f10..b57f477c745e 100644 > > --- a/net/ipv4/xfrm4_input.c > > +++ b/net/ipv4/xfrm4_input.c > > @@ -17,6 +17,8 @@ > > #include <linux/netfilter_ipv4.h> > > #include <net/ip.h> > > #include <net/xfrm.h> > > +#include <net/protocol.h> > > +#include <net/gro.h> > > > > static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, > > struct sk_buff *skb) > > @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) > > return 0; > > } > > > > -/* If it's a keepalive packet, then just eat it. > > - * If it's an encapsulated packet, then pass it to the > > - * IPsec xfrm input. > > - * Returns 0 if skb passed to xfrm or was dropped. > > - * Returns >0 if skb should be passed to UDP. > > - * Returns <0 if skb should be resubmitted (-ret is protocol) > > - */ > > -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > > +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull) > > { > > struct udp_sock *up = udp_sk(sk); > > struct udphdr *uh; > > @@ -90,8 +85,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > > __be32 *udpdata32; > > __u16 encap_type = up->encap_type; > > > > - /* if this is not encapsulated socket, then just return now */ > > - if (!encap_type) > > + /* if unknown encap_type then just return now */ > > + if (encap_type != UDP_ENCAP_ESPINUDP && encap_type != UDP_ENCAP_ESPINUDP_NON_IKE) > > This change is unclear to me - the patch adds support for GRO on > UDP_ENCAP_ESPINUDP. yes. > How can we now get other encap types here? and why wasn't the old condition ok? In the current code the old check is enoguh. I removed new code in v5. > > > return 1; > > > > /* If this is a paged skb, make sure we pull up > > @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > > case UDP_ENCAP_ESPINUDP: > > /* Check if this is a keepalive packet. If so, eat it. */ > > if (len == 1 && udpdata[0] == 0xff) { > > - goto drop; > > + return -EINVAL; > > } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { > > /* ESP Packet without Non-ESP header */ > > len = sizeof(struct udphdr); > > @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > > case UDP_ENCAP_ESPINUDP_NON_IKE: > > /* Check if this is a keepalive packet. If so, eat it. */ > > if (len == 1 && udpdata[0] == 0xff) { > > - goto drop; > > + return -EINVAL; > > } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && > > udpdata32[0] == 0 && udpdata32[1] == 0) { > > > > @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > > * protocol to ESP, and then call into the transform receiver. > > */ > > if (skb_unclone(skb, GFP_ATOMIC)) > > - goto drop; > > + return -EINVAL; > > > > /* Now we can update and verify the packet length... */ > > iph = ip_hdr(skb); > > @@ -147,24 +142,87 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > > iph->tot_len = htons(ntohs(iph->tot_len) - len); > > if (skb->len < iphlen + len) { > > /* packet is too small!?! */ > > - goto drop; > > + return -EINVAL; > > } > > > > /* pull the data buffer up to the ESP header and set the > > * transport header to point to ESP. Keep UDP on the stack > > * for later. > > */ > > - __skb_pull(skb, len); > > - skb_reset_transport_header(skb); > > + if (pull) { > > + __skb_pull(skb, len); > > + skb_reset_transport_header(skb); > > + } else { > > + skb_set_transport_header(skb, len); > > + } > > > > /* process ESP */ > > - return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); > > - > > -drop: > > - kfree_skb(skb); > > return 0; > > } > > > > +/* If it's a keepalive packet, then just eat it. > > + * If it's an encapsulated packet, then pass it to the > > + * IPsec xfrm input. > > + * Returns 0 if skb passed to xfrm or was dropped. > > + * Returns >0 if skb should be passed to UDP. > > + * Returns <0 if skb should be resubmitted (-ret is protocol) > > + */ > > +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) > > +{ > > + int ret; > > + > > + ret = __xfrm4_udp_encap_rcv(sk, skb, true); > > + if (!ret) > > + return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, > > + udp_sk(sk)->encap_type); > > + > > + if (ret < 0) { > > + kfree_skb(skb); > > + return 0; > > + } > > + > > + return ret; > > +} > > + > > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, > > + struct sk_buff *skb) > > +{ > > + int offset = skb_gro_offset(skb); > > + const struct net_offload *ops; > > + struct sk_buff *pp = NULL; > > + int ret; > > + > > + offset = offset - sizeof(struct udphdr); > > + > > + if (!pskb_pull(skb, offset)) > > + return NULL; > > + > > + rcu_read_lock(); > > + ops = rcu_dereference(inet_offloads[IPPROTO_ESP]); > > + if (!ops || !ops->callbacks.gro_receive) > > + goto out; > > + > > + ret = __xfrm4_udp_encap_rcv(sk, skb, false); > > + if (ret) > > + goto out; > > + > > + skb_push(skb, offset); > > + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; > > + > > + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); > > + rcu_read_unlock(); > > + > > + return pp; > > + > > +out: > > + rcu_read_unlock(); > > + skb_push(skb, offset); > > + NAPI_GRO_CB(skb)->same_flow = 0; > > + NAPI_GRO_CB(skb)->flush = 1; > > + > > + return NULL; > > +} > > + > > int xfrm4_rcv(struct sk_buff *skb) > > { > > return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); > > -- > > 2.30.2 > >
diff --git a/include/net/gro.h b/include/net/gro.h index a4fab706240d..41c12c5d1ea1 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -29,7 +29,7 @@ struct napi_gro_cb { /* Number of segments aggregated. */ u16 count; - /* Used in ipv6_gro_receive() and foo-over-udp */ + /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ u16 proto; /* jiffies when first packet was created/queued */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 33ee3f5936e6..e980f442ddcd 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1671,6 +1671,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu); int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb); int xfrm4_transport_finish(struct sk_buff *skb, int async); int xfrm4_rcv(struct sk_buff *skb); @@ -1711,6 +1713,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu); int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb); +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb); int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen); #else diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 77bb01032667..34ebfdf0e986 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, int offset = skb_gro_offset(skb); struct xfrm_offload *xo; struct xfrm_state *x; + int encap_type = 0; __be32 seq; __be32 spi; @@ -69,6 +70,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, xo->flags |= XFRM_GRO; + if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP) + encap_type = UDP_ENCAP_ESPINUDP; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); @@ -76,7 +80,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, /* We don't need to handle errors from xfrm_input, it does all * the error handling and frees the resources on error. */ - xfrm_input(skb, IPPROTO_ESP, spi, 0); + xfrm_input(skb, IPPROTO_ESP, spi, encap_type); return ERR_PTR(-EINPROGRESS); out_reset: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index aa32afd871ee..337607b17ebd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2681,6 +2681,17 @@ void udp_destroy_sock(struct sock *sk) } } +static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family, + struct udp_sock *up) +{ +#ifdef CONFIG_XFRM + if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) { + if (family == AF_INET) + up->gro_receive = xfrm4_gro_udp_encap_rcv; + } +#endif +} + /* * Socket option code for UDP */ @@ -2730,12 +2741,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case 0: #ifdef CONFIG_XFRM case UDP_ENCAP_ESPINUDP: + set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, up); + fallthrough; case UDP_ENCAP_ESPINUDP_NON_IKE: #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv; - else #endif + if (sk->sk_family == AF_INET) up->encap_rcv = xfrm4_udp_encap_rcv; #endif fallthrough; @@ -2773,6 +2786,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, udp_tunnel_encap_enable(sk->sk_socket); up->gro_enabled = valbool; up->accept_udp_l4 = valbool; + set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, up); release_sock(sk); break; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index ad2afeef4f10..b57f477c745e 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -17,6 +17,8 @@ #include <linux/netfilter_ipv4.h> #include <net/ip.h> #include <net/xfrm.h> +#include <net/protocol.h> +#include <net/gro.h> static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) return 0; } -/* If it's a keepalive packet, then just eat it. - * If it's an encapsulated packet, then pass it to the - * IPsec xfrm input. - * Returns 0 if skb passed to xfrm or was dropped. - * Returns >0 if skb should be passed to UDP. - * Returns <0 if skb should be resubmitted (-ret is protocol) - */ -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull) { struct udp_sock *up = udp_sk(sk); struct udphdr *uh; @@ -90,8 +85,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) __be32 *udpdata32; __u16 encap_type = up->encap_type; - /* if this is not encapsulated socket, then just return now */ - if (!encap_type) + /* if unknown encap_type then just return now */ + if (encap_type != UDP_ENCAP_ESPINUDP && encap_type != UDP_ENCAP_ESPINUDP_NON_IKE) return 1; /* If this is a paged skb, make sure we pull up @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { /* ESP Packet without Non-ESP header */ len = sizeof(struct udphdr); @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP_NON_IKE: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && udpdata32[0] == 0 && udpdata32[1] == 0) { @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * protocol to ESP, and then call into the transform receiver. */ if (skb_unclone(skb, GFP_ATOMIC)) - goto drop; + return -EINVAL; /* Now we can update and verify the packet length... */ iph = ip_hdr(skb); @@ -147,24 +142,87 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(ntohs(iph->tot_len) - len); if (skb->len < iphlen + len) { /* packet is too small!?! */ - goto drop; + return -EINVAL; } /* pull the data buffer up to the ESP header and set the * transport header to point to ESP. Keep UDP on the stack * for later. */ - __skb_pull(skb, len); - skb_reset_transport_header(skb); + if (pull) { + __skb_pull(skb, len); + skb_reset_transport_header(skb); + } else { + skb_set_transport_header(skb, len); + } /* process ESP */ - return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); - -drop: - kfree_skb(skb); return 0; } +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + + ret = __xfrm4_udp_encap_rcv(sk, skb, true); + if (!ret) + return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, + udp_sk(sk)->encap_type); + + if (ret < 0) { + kfree_skb(skb); + return 0; + } + + return ret; +} + +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + const struct net_offload *ops; + struct sk_buff *pp = NULL; + int ret; + + offset = offset - sizeof(struct udphdr); + + if (!pskb_pull(skb, offset)) + return NULL; + + rcu_read_lock(); + ops = rcu_dereference(inet_offloads[IPPROTO_ESP]); + if (!ops || !ops->callbacks.gro_receive) + goto out; + + ret = __xfrm4_udp_encap_rcv(sk, skb, false); + if (ret) + goto out; + + skb_push(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; + + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); + rcu_read_unlock(); + + return pp; + +out: + rcu_read_unlock(); + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + int xfrm4_rcv(struct sk_buff *skb) { return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);