diff mbox series

[v4,ipsec-next,2/3] xfrm: Support GRO for IPv4 ESP in UDP encapsulation.

Message ID 36dc1203db9169f553797a6e2d2a46265f19dd8b.1692172297.git.antony.antony@secunet.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series xfrm: Support GRO decapsulation for ESP in UDP encapsulation | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply

Commit Message

Antony Antony Aug. 16, 2023, 9:57 a.m. UTC
From: Steffen Klassert <steffen.klassert@secunet.com>

This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
packets. Decapsulation happens at L2 and saves a full round through
the stack for each packet. This is also needed to support HW offload
for ESP in UDP encapsulation.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Co-developed-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 include/net/gro.h       |  2 +-
 include/net/xfrm.h      |  4 ++
 net/ipv4/esp4_offload.c |  6 ++-
 net/ipv4/udp.c          | 16 ++++++-
 net/ipv4/xfrm4_input.c  | 98 ++++++++++++++++++++++++++++++++---------
 5 files changed, 103 insertions(+), 23 deletions(-)

--
2.30.2

Comments

Eyal Birger Aug. 16, 2023, 11:15 a.m. UTC | #1
Hi Antony,

On Wed, Aug 16, 2023 at 12:57 PM Antony Antony
<antony.antony@secunet.com> wrote:
>
> From: Steffen Klassert <steffen.klassert@secunet.com>
>
> This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
> packets. Decapsulation happens at L2 and saves a full round through
> the stack for each packet. This is also needed to support HW offload
> for ESP in UDP encapsulation.
>
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> Co-developed-by: Antony Antony <antony.antony@secunet.com>
> Signed-off-by: Antony Antony <antony.antony@secunet.com>
> ---
>  include/net/gro.h       |  2 +-
>  include/net/xfrm.h      |  4 ++
>  net/ipv4/esp4_offload.c |  6 ++-
>  net/ipv4/udp.c          | 16 ++++++-
>  net/ipv4/xfrm4_input.c  | 98 ++++++++++++++++++++++++++++++++---------
>  5 files changed, 103 insertions(+), 23 deletions(-)
>
> diff --git a/include/net/gro.h b/include/net/gro.h
> index a4fab706240d..41c12c5d1ea1 100644
> --- a/include/net/gro.h
> +++ b/include/net/gro.h
> @@ -29,7 +29,7 @@ struct napi_gro_cb {
>         /* Number of segments aggregated. */
>         u16     count;
>
> -       /* Used in ipv6_gro_receive() and foo-over-udp */
> +       /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
>         u16     proto;
>
>         /* jiffies when first packet was created/queued */
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 33ee3f5936e6..e980f442ddcd 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -1671,6 +1671,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
>  int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
>  int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
>                     int encap_type);
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb);

Why does this function need to be declared twice in this file?

>  int xfrm4_transport_finish(struct sk_buff *skb, int async);
>  int xfrm4_rcv(struct sk_buff *skb);
>
> @@ -1711,6 +1713,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
>  void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
>  int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
>  int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb);
>  int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
>                      int optlen);
>  #else
> diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
> index 77bb01032667..34ebfdf0e986 100644
> --- a/net/ipv4/esp4_offload.c
> +++ b/net/ipv4/esp4_offload.c
> @@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>         int offset = skb_gro_offset(skb);
>         struct xfrm_offload *xo;
>         struct xfrm_state *x;
> +       int encap_type = 0;
>         __be32 seq;
>         __be32 spi;
>
> @@ -69,6 +70,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>
>         xo->flags |= XFRM_GRO;
>
> +       if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
> +               encap_type = UDP_ENCAP_ESPINUDP;
> +
>         XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
>         XFRM_SPI_SKB_CB(skb)->family = AF_INET;
>         XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
> @@ -76,7 +80,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
>
>         /* We don't need to handle errors from xfrm_input, it does all
>          * the error handling and frees the resources on error. */
> -       xfrm_input(skb, IPPROTO_ESP, spi, 0);
> +       xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
>
>         return ERR_PTR(-EINPROGRESS);
>  out_reset:
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index aa32afd871ee..337607b17ebd 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2681,6 +2681,17 @@ void udp_destroy_sock(struct sock *sk)
>         }
>  }
>
> +static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
> +                                      struct udp_sock *up)
> +{
> +#ifdef CONFIG_XFRM
> +       if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) {
> +               if (family == AF_INET)
> +                       up->gro_receive = xfrm4_gro_udp_encap_rcv;
> +       }
> +#endif
> +}
> +
>  /*
>   *     Socket option code for UDP
>   */
> @@ -2730,12 +2741,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
>                 case 0:
>  #ifdef CONFIG_XFRM
>                 case UDP_ENCAP_ESPINUDP:
> +                       set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, up);
> +                       fallthrough;
>                 case UDP_ENCAP_ESPINUDP_NON_IKE:
>  #if IS_ENABLED(CONFIG_IPV6)
>                         if (sk->sk_family == AF_INET6)
>                                 up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
> -                       else
>  #endif
> +                       if (sk->sk_family == AF_INET)

Why is this change needed?

>                                 up->encap_rcv = xfrm4_udp_encap_rcv;
>  #endif
>                         fallthrough;
> @@ -2773,6 +2786,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
>                         udp_tunnel_encap_enable(sk->sk_socket);
>                 up->gro_enabled = valbool;
>                 up->accept_udp_l4 = valbool;
> +               set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, up);
>                 release_sock(sk);
>                 break;
>
> diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
> index ad2afeef4f10..b57f477c745e 100644
> --- a/net/ipv4/xfrm4_input.c
> +++ b/net/ipv4/xfrm4_input.c
> @@ -17,6 +17,8 @@
>  #include <linux/netfilter_ipv4.h>
>  #include <net/ip.h>
>  #include <net/xfrm.h>
> +#include <net/protocol.h>
> +#include <net/gro.h>
>
>  static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
>                                    struct sk_buff *skb)
> @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
>         return 0;
>  }
>
> -/* If it's a keepalive packet, then just eat it.
> - * If it's an encapsulated packet, then pass it to the
> - * IPsec xfrm input.
> - * Returns 0 if skb passed to xfrm or was dropped.
> - * Returns >0 if skb should be passed to UDP.
> - * Returns <0 if skb should be resubmitted (-ret is protocol)
> - */
> -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
>  {
>         struct udp_sock *up = udp_sk(sk);
>         struct udphdr *uh;
> @@ -90,8 +85,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         __be32 *udpdata32;
>         __u16 encap_type = up->encap_type;
>
> -       /* if this is not encapsulated socket, then just return now */
> -       if (!encap_type)
> +       /* if unknown encap_type then just return now */
> +       if (encap_type != UDP_ENCAP_ESPINUDP && encap_type != UDP_ENCAP_ESPINUDP_NON_IKE)

This change is unclear to me - the patch adds support for GRO on
UDP_ENCAP_ESPINUDP.
How can we now get other encap types here? and why wasn't the old condition ok?

>                 return 1;
>
>         /* If this is a paged skb, make sure we pull up
> @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         case UDP_ENCAP_ESPINUDP:
>                 /* Check if this is a keepalive packet.  If so, eat it. */
>                 if (len == 1 && udpdata[0] == 0xff) {
> -                       goto drop;
> +                       return -EINVAL;
>                 } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
>                         /* ESP Packet without Non-ESP header */
>                         len = sizeof(struct udphdr);
> @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         case UDP_ENCAP_ESPINUDP_NON_IKE:
>                 /* Check if this is a keepalive packet.  If so, eat it. */
>                 if (len == 1 && udpdata[0] == 0xff) {
> -                       goto drop;
> +                       return -EINVAL;
>                 } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
>                            udpdata32[0] == 0 && udpdata32[1] == 0) {
>
> @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>          * protocol to ESP, and then call into the transform receiver.
>          */
>         if (skb_unclone(skb, GFP_ATOMIC))
> -               goto drop;
> +               return -EINVAL;
>
>         /* Now we can update and verify the packet length... */
>         iph = ip_hdr(skb);
> @@ -147,24 +142,87 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
>         iph->tot_len = htons(ntohs(iph->tot_len) - len);
>         if (skb->len < iphlen + len) {
>                 /* packet is too small!?! */
> -               goto drop;
> +               return -EINVAL;
>         }
>
>         /* pull the data buffer up to the ESP header and set the
>          * transport header to point to ESP.  Keep UDP on the stack
>          * for later.
>          */
> -       __skb_pull(skb, len);
> -       skb_reset_transport_header(skb);
> +       if (pull) {
> +               __skb_pull(skb, len);
> +               skb_reset_transport_header(skb);
> +       } else {
> +               skb_set_transport_header(skb, len);
> +       }
>
>         /* process ESP */
> -       return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
> -
> -drop:
> -       kfree_skb(skb);
>         return 0;
>  }
>
> +/* If it's a keepalive packet, then just eat it.
> + * If it's an encapsulated packet, then pass it to the
> + * IPsec xfrm input.
> + * Returns 0 if skb passed to xfrm or was dropped.
> + * Returns >0 if skb should be passed to UDP.
> + * Returns <0 if skb should be resubmitted (-ret is protocol)
> + */
> +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> +{
> +       int ret;
> +
> +       ret = __xfrm4_udp_encap_rcv(sk, skb, true);
> +       if (!ret)
> +               return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
> +                                      udp_sk(sk)->encap_type);
> +
> +       if (ret < 0) {
> +               kfree_skb(skb);
> +               return 0;
> +       }
> +
> +       return ret;
> +}
> +
> +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> +                                       struct sk_buff *skb)
> +{
> +       int offset = skb_gro_offset(skb);
> +       const struct net_offload *ops;
> +       struct sk_buff *pp = NULL;
> +       int ret;
> +
> +       offset = offset - sizeof(struct udphdr);
> +
> +       if (!pskb_pull(skb, offset))
> +               return NULL;
> +
> +       rcu_read_lock();
> +       ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
> +       if (!ops || !ops->callbacks.gro_receive)
> +               goto out;
> +
> +       ret = __xfrm4_udp_encap_rcv(sk, skb, false);
> +       if (ret)
> +               goto out;
> +
> +       skb_push(skb, offset);
> +       NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
> +
> +       pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
> +       rcu_read_unlock();
> +
> +       return pp;
> +
> +out:
> +       rcu_read_unlock();
> +       skb_push(skb, offset);
> +       NAPI_GRO_CB(skb)->same_flow = 0;
> +       NAPI_GRO_CB(skb)->flush = 1;
> +
> +       return NULL;
> +}
> +
>  int xfrm4_rcv(struct sk_buff *skb)
>  {
>         return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
> --
> 2.30.2
>
Antony Antony Aug. 16, 2023, 4:59 p.m. UTC | #2
Hi Eyal,

Thanks for your quick review. I have addressed the points you raised for
both v4 and send v5 patches.

On Wed, Aug 16, 2023 at 14:15:01 +0300, Eyal Birger wrote:
> Hi Antony,
> 
> On Wed, Aug 16, 2023 at 12:57 PM Antony Antony
> <antony.antony@secunet.com> wrote:
> >
> > From: Steffen Klassert <steffen.klassert@secunet.com>
> >
> > This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated
> > packets. Decapsulation happens at L2 and saves a full round through
> > the stack for each packet. This is also needed to support HW offload
> > for ESP in UDP encapsulation.
> >
> > Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> > Co-developed-by: Antony Antony <antony.antony@secunet.com>
> > Signed-off-by: Antony Antony <antony.antony@secunet.com>
> > ---
> >  include/net/gro.h       |  2 +-
> >  include/net/xfrm.h      |  4 ++
> >  net/ipv4/esp4_offload.c |  6 ++-
> >  net/ipv4/udp.c          | 16 ++++++-
> >  net/ipv4/xfrm4_input.c  | 98 ++++++++++++++++++++++++++++++++---------
> >  5 files changed, 103 insertions(+), 23 deletions(-)
> >
> > diff --git a/include/net/gro.h b/include/net/gro.h
> > index a4fab706240d..41c12c5d1ea1 100644
> > --- a/include/net/gro.h
> > +++ b/include/net/gro.h
> > @@ -29,7 +29,7 @@ struct napi_gro_cb {
> >         /* Number of segments aggregated. */
> >         u16     count;
> >
> > -       /* Used in ipv6_gro_receive() and foo-over-udp */
> > +       /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
> >         u16     proto;
> >
> >         /* jiffies when first packet was created/queued */
> > diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> > index 33ee3f5936e6..e980f442ddcd 100644
> > --- a/include/net/xfrm.h
> > +++ b/include/net/xfrm.h
> > @@ -1671,6 +1671,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu);
> >  int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
> >  int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
> >                     int encap_type);
> > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> > +                                       struct sk_buff *skb);
> 
> Why does this function need to be declared twice in this file?

no need. Actully the following patch was removed it:) It is fixed in v5.

> 
> >  int xfrm4_transport_finish(struct sk_buff *skb, int async);
> >  int xfrm4_rcv(struct sk_buff *skb);
> >
> > @@ -1711,6 +1713,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
> >  void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
> >  int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
> >  int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
> > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> > +                                       struct sk_buff *skb);
> >  int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
> >                      int optlen);
> >  #else
> > diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
> > index 77bb01032667..34ebfdf0e986 100644
> > --- a/net/ipv4/esp4_offload.c
> > +++ b/net/ipv4/esp4_offload.c
> > @@ -32,6 +32,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
> >         int offset = skb_gro_offset(skb);
> >         struct xfrm_offload *xo;
> >         struct xfrm_state *x;
> > +       int encap_type = 0;
> >         __be32 seq;
> >         __be32 spi;
> >
> > @@ -69,6 +70,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
> >
> >         xo->flags |= XFRM_GRO;
> >
> > +       if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
> > +               encap_type = UDP_ENCAP_ESPINUDP;
> > +
> >         XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
> >         XFRM_SPI_SKB_CB(skb)->family = AF_INET;
> >         XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
> > @@ -76,7 +80,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
> >
> >         /* We don't need to handle errors from xfrm_input, it does all
> >          * the error handling and frees the resources on error. */
> > -       xfrm_input(skb, IPPROTO_ESP, spi, 0);
> > +       xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
> >
> >         return ERR_PTR(-EINPROGRESS);
> >  out_reset:
> > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> > index aa32afd871ee..337607b17ebd 100644
> > --- a/net/ipv4/udp.c
> > +++ b/net/ipv4/udp.c
> > @@ -2681,6 +2681,17 @@ void udp_destroy_sock(struct sock *sk)
> >         }
> >  }
> >
> > +static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
> > +                                      struct udp_sock *up)
> > +{
> > +#ifdef CONFIG_XFRM
> > +       if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) {
> > +               if (family == AF_INET)
> > +                       up->gro_receive = xfrm4_gro_udp_encap_rcv;
> > +       }
> > +#endif
> > +}
> > +
> >  /*
> >   *     Socket option code for UDP
> >   */
> > @@ -2730,12 +2741,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
> >                 case 0:
> >  #ifdef CONFIG_XFRM
> >                 case UDP_ENCAP_ESPINUDP:
> > +                       set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, up);
> > +                       fallthrough;
> >                 case UDP_ENCAP_ESPINUDP_NON_IKE:
> >  #if IS_ENABLED(CONFIG_IPV6)
> >                         if (sk->sk_family == AF_INET6)
> >                                 up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
> > -                       else
> >  #endif
> > +                       if (sk->sk_family == AF_INET)
> 
> Why is this change needed?

It is not necessary. I removed it in v5.

> 
> >                                 up->encap_rcv = xfrm4_udp_encap_rcv;
> >  #endif
> >                         fallthrough;
> > @@ -2773,6 +2786,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
> >                         udp_tunnel_encap_enable(sk->sk_socket);
> >                 up->gro_enabled = valbool;
> >                 up->accept_udp_l4 = valbool;
> > +               set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, up);
> >                 release_sock(sk);
> >                 break;
> >
> > diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
> > index ad2afeef4f10..b57f477c745e 100644
> > --- a/net/ipv4/xfrm4_input.c
> > +++ b/net/ipv4/xfrm4_input.c
> > @@ -17,6 +17,8 @@
> >  #include <linux/netfilter_ipv4.h>
> >  #include <net/ip.h>
> >  #include <net/xfrm.h>
> > +#include <net/protocol.h>
> > +#include <net/gro.h>
> >
> >  static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
> >                                    struct sk_buff *skb)
> > @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
> >         return 0;
> >  }
> >
> > -/* If it's a keepalive packet, then just eat it.
> > - * If it's an encapsulated packet, then pass it to the
> > - * IPsec xfrm input.
> > - * Returns 0 if skb passed to xfrm or was dropped.
> > - * Returns >0 if skb should be passed to UDP.
> > - * Returns <0 if skb should be resubmitted (-ret is protocol)
> > - */
> > -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> > +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
> >  {
> >         struct udp_sock *up = udp_sk(sk);
> >         struct udphdr *uh;
> > @@ -90,8 +85,8 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> >         __be32 *udpdata32;
> >         __u16 encap_type = up->encap_type;
> >
> > -       /* if this is not encapsulated socket, then just return now */
> > -       if (!encap_type)
> > +       /* if unknown encap_type then just return now */
> > +       if (encap_type != UDP_ENCAP_ESPINUDP && encap_type != UDP_ENCAP_ESPINUDP_NON_IKE)
> 
> This change is unclear to me - the patch adds support for GRO on
> UDP_ENCAP_ESPINUDP.

yes.
> How can we now get other encap types here? and why wasn't the old condition ok?

In the current code the old check is enoguh. I removed new code in v5.

> 
> >                 return 1;
> >
> >         /* If this is a paged skb, make sure we pull up
> > @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> >         case UDP_ENCAP_ESPINUDP:
> >                 /* Check if this is a keepalive packet.  If so, eat it. */
> >                 if (len == 1 && udpdata[0] == 0xff) {
> > -                       goto drop;
> > +                       return -EINVAL;
> >                 } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
> >                         /* ESP Packet without Non-ESP header */
> >                         len = sizeof(struct udphdr);
> > @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> >         case UDP_ENCAP_ESPINUDP_NON_IKE:
> >                 /* Check if this is a keepalive packet.  If so, eat it. */
> >                 if (len == 1 && udpdata[0] == 0xff) {
> > -                       goto drop;
> > +                       return -EINVAL;
> >                 } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
> >                            udpdata32[0] == 0 && udpdata32[1] == 0) {
> >
> > @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> >          * protocol to ESP, and then call into the transform receiver.
> >          */
> >         if (skb_unclone(skb, GFP_ATOMIC))
> > -               goto drop;
> > +               return -EINVAL;
> >
> >         /* Now we can update and verify the packet length... */
> >         iph = ip_hdr(skb);
> > @@ -147,24 +142,87 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> >         iph->tot_len = htons(ntohs(iph->tot_len) - len);
> >         if (skb->len < iphlen + len) {
> >                 /* packet is too small!?! */
> > -               goto drop;
> > +               return -EINVAL;
> >         }
> >
> >         /* pull the data buffer up to the ESP header and set the
> >          * transport header to point to ESP.  Keep UDP on the stack
> >          * for later.
> >          */
> > -       __skb_pull(skb, len);
> > -       skb_reset_transport_header(skb);
> > +       if (pull) {
> > +               __skb_pull(skb, len);
> > +               skb_reset_transport_header(skb);
> > +       } else {
> > +               skb_set_transport_header(skb, len);
> > +       }
> >
> >         /* process ESP */
> > -       return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
> > -
> > -drop:
> > -       kfree_skb(skb);
> >         return 0;
> >  }
> >
> > +/* If it's a keepalive packet, then just eat it.
> > + * If it's an encapsulated packet, then pass it to the
> > + * IPsec xfrm input.
> > + * Returns 0 if skb passed to xfrm or was dropped.
> > + * Returns >0 if skb should be passed to UDP.
> > + * Returns <0 if skb should be resubmitted (-ret is protocol)
> > + */
> > +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
> > +{
> > +       int ret;
> > +
> > +       ret = __xfrm4_udp_encap_rcv(sk, skb, true);
> > +       if (!ret)
> > +               return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
> > +                                      udp_sk(sk)->encap_type);
> > +
> > +       if (ret < 0) {
> > +               kfree_skb(skb);
> > +               return 0;
> > +       }
> > +
> > +       return ret;
> > +}
> > +
> > +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
> > +                                       struct sk_buff *skb)
> > +{
> > +       int offset = skb_gro_offset(skb);
> > +       const struct net_offload *ops;
> > +       struct sk_buff *pp = NULL;
> > +       int ret;
> > +
> > +       offset = offset - sizeof(struct udphdr);
> > +
> > +       if (!pskb_pull(skb, offset))
> > +               return NULL;
> > +
> > +       rcu_read_lock();
> > +       ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
> > +       if (!ops || !ops->callbacks.gro_receive)
> > +               goto out;
> > +
> > +       ret = __xfrm4_udp_encap_rcv(sk, skb, false);
> > +       if (ret)
> > +               goto out;
> > +
> > +       skb_push(skb, offset);
> > +       NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
> > +
> > +       pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
> > +       rcu_read_unlock();
> > +
> > +       return pp;
> > +
> > +out:
> > +       rcu_read_unlock();
> > +       skb_push(skb, offset);
> > +       NAPI_GRO_CB(skb)->same_flow = 0;
> > +       NAPI_GRO_CB(skb)->flush = 1;
> > +
> > +       return NULL;
> > +}
> > +
> >  int xfrm4_rcv(struct sk_buff *skb)
> >  {
> >         return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
> > --
> > 2.30.2
> >
diff mbox series

Patch

diff --git a/include/net/gro.h b/include/net/gro.h
index a4fab706240d..41c12c5d1ea1 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -29,7 +29,7 @@  struct napi_gro_cb {
 	/* Number of segments aggregated. */
 	u16	count;

-	/* Used in ipv6_gro_receive() and foo-over-udp */
+	/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
 	u16	proto;

 	/* jiffies when first packet was created/queued */
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 33ee3f5936e6..e980f442ddcd 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1671,6 +1671,8 @@  void xfrm_local_error(struct sk_buff *skb, int mtu);
 int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		    int encap_type);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm4_transport_finish(struct sk_buff *skb, int async);
 int xfrm4_rcv(struct sk_buff *skb);

@@ -1711,6 +1713,8 @@  int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
 int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
 		     int optlen);
 #else
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 77bb01032667..34ebfdf0e986 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -32,6 +32,7 @@  static struct sk_buff *esp4_gro_receive(struct list_head *head,
 	int offset = skb_gro_offset(skb);
 	struct xfrm_offload *xo;
 	struct xfrm_state *x;
+	int encap_type = 0;
 	__be32 seq;
 	__be32 spi;

@@ -69,6 +70,9 @@  static struct sk_buff *esp4_gro_receive(struct list_head *head,

 	xo->flags |= XFRM_GRO;

+	if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+		encap_type = UDP_ENCAP_ESPINUDP;
+
 	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
@@ -76,7 +80,7 @@  static struct sk_buff *esp4_gro_receive(struct list_head *head,

 	/* We don't need to handle errors from xfrm_input, it does all
 	 * the error handling and frees the resources on error. */
-	xfrm_input(skb, IPPROTO_ESP, spi, 0);
+	xfrm_input(skb, IPPROTO_ESP, spi, encap_type);

 	return ERR_PTR(-EINPROGRESS);
 out_reset:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index aa32afd871ee..337607b17ebd 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2681,6 +2681,17 @@  void udp_destroy_sock(struct sock *sk)
 	}
 }

+static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
+				       struct udp_sock *up)
+{
+#ifdef CONFIG_XFRM
+	if (up->gro_enabled && encap_type == UDP_ENCAP_ESPINUDP) {
+		if (family == AF_INET)
+			up->gro_receive = xfrm4_gro_udp_encap_rcv;
+	}
+#endif
+}
+
 /*
  *	Socket option code for UDP
  */
@@ -2730,12 +2741,14 @@  int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		case 0:
 #ifdef CONFIG_XFRM
 		case UDP_ENCAP_ESPINUDP:
+			set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, up);
+			fallthrough;
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
 #if IS_ENABLED(CONFIG_IPV6)
 			if (sk->sk_family == AF_INET6)
 				up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
-			else
 #endif
+			if (sk->sk_family == AF_INET)
 				up->encap_rcv = xfrm4_udp_encap_rcv;
 #endif
 			fallthrough;
@@ -2773,6 +2786,7 @@  int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			udp_tunnel_encap_enable(sk->sk_socket);
 		up->gro_enabled = valbool;
 		up->accept_udp_l4 = valbool;
+		set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, up);
 		release_sock(sk);
 		break;

diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index ad2afeef4f10..b57f477c745e 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -17,6 +17,8 @@ 
 #include <linux/netfilter_ipv4.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>

 static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
 				   struct sk_buff *skb)
@@ -72,14 +74,7 @@  int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	return 0;
 }

-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
 {
 	struct udp_sock *up = udp_sk(sk);
 	struct udphdr *uh;
@@ -90,8 +85,8 @@  int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	__be32 *udpdata32;
 	__u16 encap_type = up->encap_type;

-	/* if this is not encapsulated socket, then just return now */
-	if (!encap_type)
+	/* if unknown encap_type then just return now */
+	if (encap_type != UDP_ENCAP_ESPINUDP && encap_type != UDP_ENCAP_ESPINUDP_NON_IKE)
 		return 1;

 	/* If this is a paged skb, make sure we pull up
@@ -110,7 +105,7 @@  int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
@@ -121,7 +116,7 @@  int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
-			goto drop;
+			return -EINVAL;
 		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
 			   udpdata32[0] == 0 && udpdata32[1] == 0) {

@@ -139,7 +134,7 @@  int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	 * protocol to ESP, and then call into the transform receiver.
 	 */
 	if (skb_unclone(skb, GFP_ATOMIC))
-		goto drop;
+		return -EINVAL;

 	/* Now we can update and verify the packet length... */
 	iph = ip_hdr(skb);
@@ -147,24 +142,87 @@  int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
 	iph->tot_len = htons(ntohs(iph->tot_len) - len);
 	if (skb->len < iphlen + len) {
 		/* packet is too small!?! */
-		goto drop;
+		return -EINVAL;
 	}

 	/* pull the data buffer up to the ESP header and set the
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	__skb_pull(skb, len);
-	skb_reset_transport_header(skb);
+	if (pull) {
+		__skb_pull(skb, len);
+		skb_reset_transport_header(skb);
+	} else {
+		skb_set_transport_header(skb, len);
+	}

 	/* process ESP */
-	return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
-	kfree_skb(skb);
 	return 0;
 }

+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, true);
+	if (!ret)
+		return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
+				       udp_sk(sk)->encap_type);
+
+	if (ret < 0) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return ret;
+}
+
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+					struct sk_buff *skb)
+{
+	int offset = skb_gro_offset(skb);
+	const struct net_offload *ops;
+	struct sk_buff *pp = NULL;
+	int ret;
+
+	offset = offset - sizeof(struct udphdr);
+
+	if (!pskb_pull(skb, offset))
+		return NULL;
+
+	rcu_read_lock();
+	ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out;
+
+	ret = __xfrm4_udp_encap_rcv(sk, skb, false);
+	if (ret)
+		goto out;
+
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+	pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+	rcu_read_unlock();
+
+	return pp;
+
+out:
+	rcu_read_unlock();
+	skb_push(skb, offset);
+	NAPI_GRO_CB(skb)->same_flow = 0;
+	NAPI_GRO_CB(skb)->flush = 1;
+
+	return NULL;
+}
+
 int xfrm4_rcv(struct sk_buff *skb)
 {
 	return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);