Message ID | 20220203115941.3107572-2-toshiaki.makita1@gmail.com (mailing list archive) |
---|---|
State | Awaiting Upstream |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Conntrack GRE offload | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Clearly marked for net-next |
netdev/fixes_present | success | Fixes tag not required for -next series |
netdev/subject_prefix | success | Link |
netdev/cover_letter | success | Series has a cover letter |
netdev/patch_count | success | Link |
netdev/header_inline | success | No static functions without inline keyword in header files |
netdev/build_32bit | success | Errors and warnings before: 38 this patch: 38 |
netdev/cc_maintainers | success | CCed 8 of 8 maintainers |
netdev/build_clang | success | Errors and warnings before: 0 this patch: 0 |
netdev/module_param | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Signed-off-by tag matches author and committer |
netdev/verify_fixes | success | No Fixes tag |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 38 this patch: 38 |
netdev/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 180 lines checked |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/source_inline | success | Was 0 now: 0 |
On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote: > Support GREv0 without NAT. > > Signed-off-by: Toshiaki Makita <toshiaki.makita1@gmail.com> > --- > net/netfilter/nf_flow_table_core.c | 10 +++++-- > net/netfilter/nf_flow_table_ip.c | 54 ++++++++++++++++++++++++++++------- > net/netfilter/nf_flow_table_offload.c | 19 +++++++----- > net/netfilter/nft_flow_offload.c | 13 +++++++++ > 4 files changed, 77 insertions(+), 19 deletions(-) > > diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c > index b90eca7..e66a375 100644 > --- a/net/netfilter/nf_flow_table_core.c > +++ b/net/netfilter/nf_flow_table_core.c > @@ -39,8 +39,14 @@ > > ft->l3proto = ctt->src.l3num; > ft->l4proto = ctt->dst.protonum; > - ft->src_port = ctt->src.u.tcp.port; > - ft->dst_port = ctt->dst.u.tcp.port; > + > + switch (ctt->dst.protonum) { > + case IPPROTO_TCP: > + case IPPROTO_UDP: > + ft->src_port = ctt->src.u.tcp.port; > + ft->dst_port = ctt->dst.u.tcp.port; > + break; > + } > } > > struct flow_offload *flow_offload_alloc(struct nf_conn *ct) > diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c > index 889cf88..48e2f58 100644 > --- a/net/netfilter/nf_flow_table_ip.c > +++ b/net/netfilter/nf_flow_table_ip.c > @@ -172,6 +172,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, > struct flow_ports *ports; > unsigned int thoff; > struct iphdr *iph; > + u8 ipproto; > > if (!pskb_may_pull(skb, sizeof(*iph) + offset)) > return -1; > @@ -185,13 +186,19 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, > > thoff += offset; > > - switch (iph->protocol) { > + ipproto = iph->protocol; > + switch (ipproto) { > case IPPROTO_TCP: > *hdrsize = sizeof(struct tcphdr); > break; > case IPPROTO_UDP: > *hdrsize = sizeof(struct udphdr); > break; > +#ifdef CONFIG_NF_CT_PROTO_GRE > + case IPPROTO_GRE: > + *hdrsize = sizeof(struct gre_base_hdr); > + break; > +#endif > default: > return -1; > } > @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, > if (!pskb_may_pull(skb, thoff + *hdrsize)) > return -1; > > + if (ipproto == IPPROTO_GRE) { No ifdef here? Maybe remove these ifdef everywhere? > + struct gre_base_hdr *greh; > + > + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); > + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) > + return -1; > + } > + > iph = (struct iphdr *)(skb_network_header(skb) + offset); > - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); > > tuple->src_v4.s_addr = iph->saddr; > tuple->dst_v4.s_addr = iph->daddr; > - tuple->src_port = ports->source; > - tuple->dst_port = ports->dest; > + if (ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP) { > + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); > + tuple->src_port = ports->source; > + tuple->dst_port = ports->dest; > + } maybe: switch (ipproto) { case IPPROTO_TCP: case IPPROTO_UDP: ... break; case IPPROTO_GRE: break; } ? > tuple->l3proto = AF_INET; > - tuple->l4proto = iph->protocol; > + tuple->l4proto = ipproto; > tuple->iifidx = dev->ifindex; > nf_flow_tuple_encap(skb, tuple); > > @@ -521,6 +538,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, > struct flow_ports *ports; > struct ipv6hdr *ip6h; > unsigned int thoff; > + u8 nexthdr; > > thoff = sizeof(*ip6h) + offset; > if (!pskb_may_pull(skb, thoff)) > @@ -528,13 +546,19 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, > > ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); > > - switch (ip6h->nexthdr) { > + nexthdr = ip6h->nexthdr; > + switch (nexthdr) { > case IPPROTO_TCP: > *hdrsize = sizeof(struct tcphdr); > break; > case IPPROTO_UDP: > *hdrsize = sizeof(struct udphdr); > break; > +#ifdef CONFIG_NF_CT_PROTO_GRE > + case IPPROTO_GRE: > + *hdrsize = sizeof(struct gre_base_hdr); > + break; > +#endif > default: > return -1; > } > @@ -545,15 +569,25 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, > if (!pskb_may_pull(skb, thoff + *hdrsize)) > return -1; > > + if (nexthdr == IPPROTO_GRE) { > + struct gre_base_hdr *greh; > + > + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); > + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) > + return -1; > + } > + > ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); > - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); > > tuple->src_v6 = ip6h->saddr; > tuple->dst_v6 = ip6h->daddr; > - tuple->src_port = ports->source; > - tuple->dst_port = ports->dest; > + if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP) { > + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); > + tuple->src_port = ports->source; > + tuple->dst_port = ports->dest; > + } > tuple->l3proto = AF_INET6; > - tuple->l4proto = ip6h->nexthdr; > + tuple->l4proto = nexthdr; > tuple->iifidx = dev->ifindex; > nf_flow_tuple_encap(skb, tuple); > > diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c > index b561e0a..9b81080 100644 > --- a/net/netfilter/nf_flow_table_offload.c > +++ b/net/netfilter/nf_flow_table_offload.c > @@ -170,6 +170,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match, > match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP); > break; > case IPPROTO_UDP: > + case IPPROTO_GRE: > break; > default: > return -EOPNOTSUPP; > @@ -178,15 +179,19 @@ static int nf_flow_rule_match(struct nf_flow_match *match, > key->basic.ip_proto = tuple->l4proto; > mask->basic.ip_proto = 0xff; > > - key->tp.src = tuple->src_port; > - mask->tp.src = 0xffff; > - key->tp.dst = tuple->dst_port; > - mask->tp.dst = 0xffff; > - > match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) | > BIT(FLOW_DISSECTOR_KEY_CONTROL) | > - BIT(FLOW_DISSECTOR_KEY_BASIC) | > - BIT(FLOW_DISSECTOR_KEY_PORTS); > + BIT(FLOW_DISSECTOR_KEY_BASIC); > + > + if (tuple->l4proto == IPPROTO_TCP || tuple->l4proto == IPPROTO_UDP) { > + key->tp.src = tuple->src_port; > + mask->tp.src = 0xffff; > + key->tp.dst = tuple->dst_port; > + mask->tp.dst = 0xffff; > + > + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS); > + } > + > return 0; > } > > diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c > index 0af34ad..731b5d8 100644 > --- a/net/netfilter/nft_flow_offload.c > +++ b/net/netfilter/nft_flow_offload.c > @@ -298,6 +298,19 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, > break; > case IPPROTO_UDP: > break; > +#ifdef CONFIG_NF_CT_PROTO_GRE > + case IPPROTO_GRE: { > + struct nf_conntrack_tuple *tuple; > + > + if (ct->status & IPS_NAT_MASK) > + goto out; Why this NAT check? > + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; > + /* No support for GRE v1 */ > + if (tuple->src.u.gre.key || tuple->dst.u.gre.key) > + goto out; > + break; > + } > +#endif > default: > goto out; > } > -- > 1.8.3.1 >
On 2022/02/08 2:56, Pablo Neira Ayuso wrote: > On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote: >> Support GREv0 without NAT. >> >> Signed-off-by: Toshiaki Makita <toshiaki.makita1@gmail.com> >> --- >> net/netfilter/nf_flow_table_core.c | 10 +++++-- >> net/netfilter/nf_flow_table_ip.c | 54 ++++++++++++++++++++++++++++------- >> net/netfilter/nf_flow_table_offload.c | 19 +++++++----- >> net/netfilter/nft_flow_offload.c | 13 +++++++++ >> 4 files changed, 77 insertions(+), 19 deletions(-) >> >> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c >> index b90eca7..e66a375 100644 >> --- a/net/netfilter/nf_flow_table_core.c >> +++ b/net/netfilter/nf_flow_table_core.c >> @@ -39,8 +39,14 @@ >> >> ft->l3proto = ctt->src.l3num; >> ft->l4proto = ctt->dst.protonum; >> - ft->src_port = ctt->src.u.tcp.port; >> - ft->dst_port = ctt->dst.u.tcp.port; >> + >> + switch (ctt->dst.protonum) { >> + case IPPROTO_TCP: >> + case IPPROTO_UDP: >> + ft->src_port = ctt->src.u.tcp.port; >> + ft->dst_port = ctt->dst.u.tcp.port; >> + break; >> + } >> } >> >> struct flow_offload *flow_offload_alloc(struct nf_conn *ct) >> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c >> index 889cf88..48e2f58 100644 >> --- a/net/netfilter/nf_flow_table_ip.c >> +++ b/net/netfilter/nf_flow_table_ip.c >> @@ -172,6 +172,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, >> struct flow_ports *ports; >> unsigned int thoff; >> struct iphdr *iph; >> + u8 ipproto; >> >> if (!pskb_may_pull(skb, sizeof(*iph) + offset)) >> return -1; >> @@ -185,13 +186,19 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, >> >> thoff += offset; >> >> - switch (iph->protocol) { >> + ipproto = iph->protocol; >> + switch (ipproto) { >> case IPPROTO_TCP: >> *hdrsize = sizeof(struct tcphdr); >> break; >> case IPPROTO_UDP: >> *hdrsize = sizeof(struct udphdr); >> break; >> +#ifdef CONFIG_NF_CT_PROTO_GRE >> + case IPPROTO_GRE: >> + *hdrsize = sizeof(struct gre_base_hdr); >> + break; >> +#endif >> default: >> return -1; >> } >> @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, >> if (!pskb_may_pull(skb, thoff + *hdrsize)) >> return -1; >> >> + if (ipproto == IPPROTO_GRE) { > > No ifdef here? Maybe remove these ifdef everywhere? I wanted to avoid adding many ifdefs and I expect this to be compiled out when CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather may have been unintuitive though. Removing all of these ifdefs will cause inconsistent behavior between CONFIG_NF_CT_PROTO_GRE=n/y. When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus it will track GREv1 without key infomation, and the flow will be offloaded. When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not be offloaded. I wanted to just refuse offloading of GRE to avoid this inconsistency. Anyway this kind of inconsistency seems to happen in software conntrack, so if you'd like to remove ifdefs, I will do. >> + struct gre_base_hdr *greh; >> + >> + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); >> + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) >> + return -1; >> + } >> + >> iph = (struct iphdr *)(skb_network_header(skb) + offset); >> - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); >> >> tuple->src_v4.s_addr = iph->saddr; >> tuple->dst_v4.s_addr = iph->daddr; >> - tuple->src_port = ports->source; >> - tuple->dst_port = ports->dest; >> + if (ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP) { >> + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); >> + tuple->src_port = ports->source; >> + tuple->dst_port = ports->dest; >> + } > > maybe: > > switch (ipproto) { > case IPPROTO_TCP: > case IPPROTO_UDP: > ... > break; > case IPPROTO_GRE: > break; > } > > ? Sure, will fix. >> tuple->l3proto = AF_INET; >> - tuple->l4proto = iph->protocol; >> + tuple->l4proto = ipproto; >> tuple->iifidx = dev->ifindex; >> nf_flow_tuple_encap(skb, tuple); >> >> @@ -521,6 +538,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, >> struct flow_ports *ports; >> struct ipv6hdr *ip6h; >> unsigned int thoff; >> + u8 nexthdr; >> >> thoff = sizeof(*ip6h) + offset; >> if (!pskb_may_pull(skb, thoff)) >> @@ -528,13 +546,19 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, >> >> ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); >> >> - switch (ip6h->nexthdr) { >> + nexthdr = ip6h->nexthdr; >> + switch (nexthdr) { >> case IPPROTO_TCP: >> *hdrsize = sizeof(struct tcphdr); >> break; >> case IPPROTO_UDP: >> *hdrsize = sizeof(struct udphdr); >> break; >> +#ifdef CONFIG_NF_CT_PROTO_GRE >> + case IPPROTO_GRE: >> + *hdrsize = sizeof(struct gre_base_hdr); >> + break; >> +#endif >> default: >> return -1; >> } >> @@ -545,15 +569,25 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, >> if (!pskb_may_pull(skb, thoff + *hdrsize)) >> return -1; >> >> + if (nexthdr == IPPROTO_GRE) { >> + struct gre_base_hdr *greh; >> + >> + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); >> + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) >> + return -1; >> + } >> + >> ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); >> - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); >> >> tuple->src_v6 = ip6h->saddr; >> tuple->dst_v6 = ip6h->daddr; >> - tuple->src_port = ports->source; >> - tuple->dst_port = ports->dest; >> + if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP) { >> + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); >> + tuple->src_port = ports->source; >> + tuple->dst_port = ports->dest; >> + } >> tuple->l3proto = AF_INET6; >> - tuple->l4proto = ip6h->nexthdr; >> + tuple->l4proto = nexthdr; >> tuple->iifidx = dev->ifindex; >> nf_flow_tuple_encap(skb, tuple); >> >> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c >> index b561e0a..9b81080 100644 >> --- a/net/netfilter/nf_flow_table_offload.c >> +++ b/net/netfilter/nf_flow_table_offload.c >> @@ -170,6 +170,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match, >> match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP); >> break; >> case IPPROTO_UDP: >> + case IPPROTO_GRE: >> break; >> default: >> return -EOPNOTSUPP; >> @@ -178,15 +179,19 @@ static int nf_flow_rule_match(struct nf_flow_match *match, >> key->basic.ip_proto = tuple->l4proto; >> mask->basic.ip_proto = 0xff; >> >> - key->tp.src = tuple->src_port; >> - mask->tp.src = 0xffff; >> - key->tp.dst = tuple->dst_port; >> - mask->tp.dst = 0xffff; >> - >> match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) | >> BIT(FLOW_DISSECTOR_KEY_CONTROL) | >> - BIT(FLOW_DISSECTOR_KEY_BASIC) | >> - BIT(FLOW_DISSECTOR_KEY_PORTS); >> + BIT(FLOW_DISSECTOR_KEY_BASIC); >> + >> + if (tuple->l4proto == IPPROTO_TCP || tuple->l4proto == IPPROTO_UDP) { >> + key->tp.src = tuple->src_port; >> + mask->tp.src = 0xffff; >> + key->tp.dst = tuple->dst_port; >> + mask->tp.dst = 0xffff; >> + >> + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS); >> + } >> + >> return 0; >> } >> >> diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c >> index 0af34ad..731b5d8 100644 >> --- a/net/netfilter/nft_flow_offload.c >> +++ b/net/netfilter/nft_flow_offload.c >> @@ -298,6 +298,19 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, >> break; >> case IPPROTO_UDP: >> break; >> +#ifdef CONFIG_NF_CT_PROTO_GRE >> + case IPPROTO_GRE: { >> + struct nf_conntrack_tuple *tuple; >> + >> + if (ct->status & IPS_NAT_MASK) >> + goto out; > > Why this NAT check? NAT requires more work. I'd like to start with a minimal GRE support. Maybe we can add NAT support later. Toshiaki Makita >> + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; >> + /* No support for GRE v1 */ >> + if (tuple->src.u.gre.key || tuple->dst.u.gre.key) >> + goto out; >> + break; >> + } >> +#endif >> default: >> goto out; >> } >> -- >> 1.8.3.1 >>
On Tue, Feb 08, 2022 at 11:30:03PM +0900, Toshiaki Makita wrote: > On 2022/02/08 2:56, Pablo Neira Ayuso wrote: > > On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote: [...] > > > diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c > > > index 889cf88..48e2f58 100644 > > > --- a/net/netfilter/nf_flow_table_ip.c > > > +++ b/net/netfilter/nf_flow_table_ip.c [...] > > > @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, > > > if (!pskb_may_pull(skb, thoff + *hdrsize)) > > > return -1; > > > + if (ipproto == IPPROTO_GRE) { > > > > No ifdef here? Maybe remove these ifdef everywhere? > > I wanted to avoid adding many ifdefs and I expect this to be compiled out > when CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather > may have been unintuitive though. > > Removing all of these ifdefs will cause inconsistent behavior between > CONFIG_NF_CT_PROTO_GRE=n/y. > When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus > it will track GREv1 without key infomation, and the flow will be offloaded. > When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not > be offloaded. > I wanted to just refuse offloading of GRE to avoid this inconsistency. > Anyway this kind of inconsistency seems to happen in software conntrack, so > if you'd like to remove ifdefs, I will do. Good point, thanks for explaining. LGTM. [...] > > > diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c > > > index 0af34ad..731b5d8 100644 > > > --- a/net/netfilter/nft_flow_offload.c > > > +++ b/net/netfilter/nft_flow_offload.c > > > @@ -298,6 +298,19 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, > > > break; > > > case IPPROTO_UDP: > > > break; > > > +#ifdef CONFIG_NF_CT_PROTO_GRE > > > + case IPPROTO_GRE: { > > > + struct nf_conntrack_tuple *tuple; > > > + > > > + if (ct->status & IPS_NAT_MASK) > > > + goto out; > > > > Why this NAT check? > > NAT requires more work. I'd like to start with a minimal GRE support. > Maybe we can add NAT support later. Oh well, yes, no problem.
On 2022/02/09 19:01, Pablo Neira Ayuso wrote: > On Tue, Feb 08, 2022 at 11:30:03PM +0900, Toshiaki Makita wrote: >> On 2022/02/08 2:56, Pablo Neira Ayuso wrote: >>> On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote: > [...] >>>> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c >>>> index 889cf88..48e2f58 100644 >>>> --- a/net/netfilter/nf_flow_table_ip.c >>>> +++ b/net/netfilter/nf_flow_table_ip.c > [...] >>>> @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, >>>> if (!pskb_may_pull(skb, thoff + *hdrsize)) >>>> return -1; >>>> + if (ipproto == IPPROTO_GRE) { >>> >>> No ifdef here? Maybe remove these ifdef everywhere? >> >> I wanted to avoid adding many ifdefs and I expect this to be compiled out >> when CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather >> may have been unintuitive though. >> >> Removing all of these ifdefs will cause inconsistent behavior between >> CONFIG_NF_CT_PROTO_GRE=n/y. >> When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus >> it will track GREv1 without key infomation, and the flow will be offloaded. >> When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not >> be offloaded. >> I wanted to just refuse offloading of GRE to avoid this inconsistency. >> Anyway this kind of inconsistency seems to happen in software conntrack, so >> if you'd like to remove ifdefs, I will do. > > Good point, thanks for explaining. LGTM. Let me confirm, did you agree to keep ifdefs, or delete them? Toshiaki Makita
Ping, Pablo? On 2022/02/12 10:54, Toshiaki Makita wrote: > On 2022/02/09 19:01, Pablo Neira Ayuso wrote: >> On Tue, Feb 08, 2022 at 11:30:03PM +0900, Toshiaki Makita wrote: >>> On 2022/02/08 2:56, Pablo Neira Ayuso wrote: >>>> On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote: >> [...] >>>>> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c >>>>> index 889cf88..48e2f58 100644 >>>>> --- a/net/netfilter/nf_flow_table_ip.c >>>>> +++ b/net/netfilter/nf_flow_table_ip.c >> [...] >>>>> @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const >>>>> struct net_device *dev, >>>>> if (!pskb_may_pull(skb, thoff + *hdrsize)) >>>>> return -1; >>>>> + if (ipproto == IPPROTO_GRE) { >>>> >>>> No ifdef here? Maybe remove these ifdef everywhere? >>> >>> I wanted to avoid adding many ifdefs and I expect this to be compiled out >>> when CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather >>> may have been unintuitive though. >>> >>> Removing all of these ifdefs will cause inconsistent behavior between >>> CONFIG_NF_CT_PROTO_GRE=n/y. >>> When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus >>> it will track GREv1 without key infomation, and the flow will be offloaded. >>> When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not >>> be offloaded. >>> I wanted to just refuse offloading of GRE to avoid this inconsistency. >>> Anyway this kind of inconsistency seems to happen in software conntrack, so >>> if you'd like to remove ifdefs, I will do. >> >> Good point, thanks for explaining. LGTM. > > Let me confirm, did you agree to keep ifdefs, or delete them? > > Toshiaki Makita
On Sat, Feb 19, 2022 at 09:51:37PM +0900, Toshiaki Makita wrote: > Ping, Pablo? Please go ahead, these #ifdef can go away later on. > On 2022/02/12 10:54, Toshiaki Makita wrote: > > On 2022/02/09 19:01, Pablo Neira Ayuso wrote: > > > On Tue, Feb 08, 2022 at 11:30:03PM +0900, Toshiaki Makita wrote: > > > > On 2022/02/08 2:56, Pablo Neira Ayuso wrote: > > > > > On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote: > > > [...] > > > > > > diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c > > > > > > index 889cf88..48e2f58 100644 > > > > > > --- a/net/netfilter/nf_flow_table_ip.c > > > > > > +++ b/net/netfilter/nf_flow_table_ip.c > > > [...] > > > > > > @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct > > > > > > sk_buff *skb, const struct net_device *dev, > > > > > > if (!pskb_may_pull(skb, thoff + *hdrsize)) > > > > > > return -1; > > > > > > + if (ipproto == IPPROTO_GRE) { > > > > > > > > > > No ifdef here? Maybe remove these ifdef everywhere? > > > > > > > > I wanted to avoid adding many ifdefs and I expect this to be compiled out > > > > when CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather > > > > may have been unintuitive though. > > > > > > > > Removing all of these ifdefs will cause inconsistent behavior between > > > > CONFIG_NF_CT_PROTO_GRE=n/y. > > > > When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus > > > > it will track GREv1 without key infomation, and the flow will be offloaded. > > > > When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not > > > > be offloaded. > > > > I wanted to just refuse offloading of GRE to avoid this inconsistency. > > > > Anyway this kind of inconsistency seems to happen in software conntrack, so > > > > if you'd like to remove ifdefs, I will do. > > > > > > Good point, thanks for explaining. LGTM. > > > > Let me confirm, did you agree to keep ifdefs, or delete them? > > > > Toshiaki Makita
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index b90eca7..e66a375 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -39,8 +39,14 @@ ft->l3proto = ctt->src.l3num; ft->l4proto = ctt->dst.protonum; - ft->src_port = ctt->src.u.tcp.port; - ft->dst_port = ctt->dst.u.tcp.port; + + switch (ctt->dst.protonum) { + case IPPROTO_TCP: + case IPPROTO_UDP: + ft->src_port = ctt->src.u.tcp.port; + ft->dst_port = ctt->dst.u.tcp.port; + break; + } } struct flow_offload *flow_offload_alloc(struct nf_conn *ct) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 889cf88..48e2f58 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -172,6 +172,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, struct flow_ports *ports; unsigned int thoff; struct iphdr *iph; + u8 ipproto; if (!pskb_may_pull(skb, sizeof(*iph) + offset)) return -1; @@ -185,13 +186,19 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, thoff += offset; - switch (iph->protocol) { + ipproto = iph->protocol; + switch (ipproto) { case IPPROTO_TCP: *hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: *hdrsize = sizeof(struct udphdr); break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + *hdrsize = sizeof(struct gre_base_hdr); + break; +#endif default: return -1; } @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, if (!pskb_may_pull(skb, thoff + *hdrsize)) return -1; + if (ipproto == IPPROTO_GRE) { + struct gre_base_hdr *greh; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) + return -1; + } + iph = (struct iphdr *)(skb_network_header(skb) + offset); - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v4.s_addr = iph->saddr; tuple->dst_v4.s_addr = iph->daddr; - tuple->src_port = ports->source; - tuple->dst_port = ports->dest; + if (ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP) { + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + } tuple->l3proto = AF_INET; - tuple->l4proto = iph->protocol; + tuple->l4proto = ipproto; tuple->iifidx = dev->ifindex; nf_flow_tuple_encap(skb, tuple); @@ -521,6 +538,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, struct flow_ports *ports; struct ipv6hdr *ip6h; unsigned int thoff; + u8 nexthdr; thoff = sizeof(*ip6h) + offset; if (!pskb_may_pull(skb, thoff)) @@ -528,13 +546,19 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); - switch (ip6h->nexthdr) { + nexthdr = ip6h->nexthdr; + switch (nexthdr) { case IPPROTO_TCP: *hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: *hdrsize = sizeof(struct udphdr); break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + *hdrsize = sizeof(struct gre_base_hdr); + break; +#endif default: return -1; } @@ -545,15 +569,25 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, if (!pskb_may_pull(skb, thoff + *hdrsize)) return -1; + if (nexthdr == IPPROTO_GRE) { + struct gre_base_hdr *greh; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff); + if ((greh->flags & GRE_VERSION) != GRE_VERSION_0) + return -1; + } + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset); - ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v6 = ip6h->saddr; tuple->dst_v6 = ip6h->daddr; - tuple->src_port = ports->source; - tuple->dst_port = ports->dest; + if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP) { + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + } tuple->l3proto = AF_INET6; - tuple->l4proto = ip6h->nexthdr; + tuple->l4proto = nexthdr; tuple->iifidx = dev->ifindex; nf_flow_tuple_encap(skb, tuple); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index b561e0a..9b81080 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -170,6 +170,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match, match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP); break; case IPPROTO_UDP: + case IPPROTO_GRE: break; default: return -EOPNOTSUPP; @@ -178,15 +179,19 @@ static int nf_flow_rule_match(struct nf_flow_match *match, key->basic.ip_proto = tuple->l4proto; mask->basic.ip_proto = 0xff; - key->tp.src = tuple->src_port; - mask->tp.src = 0xffff; - key->tp.dst = tuple->dst_port; - mask->tp.dst = 0xffff; - match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) | BIT(FLOW_DISSECTOR_KEY_CONTROL) | - BIT(FLOW_DISSECTOR_KEY_BASIC) | - BIT(FLOW_DISSECTOR_KEY_PORTS); + BIT(FLOW_DISSECTOR_KEY_BASIC); + + if (tuple->l4proto == IPPROTO_TCP || tuple->l4proto == IPPROTO_UDP) { + key->tp.src = tuple->src_port; + mask->tp.src = 0xffff; + key->tp.dst = tuple->dst_port; + mask->tp.dst = 0xffff; + + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS); + } + return 0; } diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 0af34ad..731b5d8 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -298,6 +298,19 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, break; case IPPROTO_UDP: break; +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: { + struct nf_conntrack_tuple *tuple; + + if (ct->status & IPS_NAT_MASK) + goto out; + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + /* No support for GRE v1 */ + if (tuple->src.u.gre.key || tuple->dst.u.gre.key) + goto out; + break; + } +#endif default: goto out; }
Support GREv0 without NAT. Signed-off-by: Toshiaki Makita <toshiaki.makita1@gmail.com> --- net/netfilter/nf_flow_table_core.c | 10 +++++-- net/netfilter/nf_flow_table_ip.c | 54 ++++++++++++++++++++++++++++------- net/netfilter/nf_flow_table_offload.c | 19 +++++++----- net/netfilter/nft_flow_offload.c | 13 +++++++++ 4 files changed, 77 insertions(+), 19 deletions(-)