diff mbox series

[net-next,1/3] netfilter: flowtable: Support GRE

Message ID 20220203115941.3107572-2-toshiaki.makita1@gmail.com (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series Conntrack GRE offload | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 38 this patch: 38
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 38 this patch: 38
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 180 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Toshiaki Makita Feb. 3, 2022, 11:59 a.m. UTC
Support GREv0 without NAT.

Signed-off-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
---
 net/netfilter/nf_flow_table_core.c    | 10 +++++--
 net/netfilter/nf_flow_table_ip.c      | 54 ++++++++++++++++++++++++++++-------
 net/netfilter/nf_flow_table_offload.c | 19 +++++++-----
 net/netfilter/nft_flow_offload.c      | 13 +++++++++
 4 files changed, 77 insertions(+), 19 deletions(-)

Comments

Pablo Neira Ayuso Feb. 7, 2022, 5:56 p.m. UTC | #1
On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote:
> Support GREv0 without NAT.
> 
> Signed-off-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
> ---
>  net/netfilter/nf_flow_table_core.c    | 10 +++++--
>  net/netfilter/nf_flow_table_ip.c      | 54 ++++++++++++++++++++++++++++-------
>  net/netfilter/nf_flow_table_offload.c | 19 +++++++-----
>  net/netfilter/nft_flow_offload.c      | 13 +++++++++
>  4 files changed, 77 insertions(+), 19 deletions(-)
> 
> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
> index b90eca7..e66a375 100644
> --- a/net/netfilter/nf_flow_table_core.c
> +++ b/net/netfilter/nf_flow_table_core.c
> @@ -39,8 +39,14 @@
>  
>  	ft->l3proto = ctt->src.l3num;
>  	ft->l4proto = ctt->dst.protonum;
> -	ft->src_port = ctt->src.u.tcp.port;
> -	ft->dst_port = ctt->dst.u.tcp.port;
> +
> +	switch (ctt->dst.protonum) {
> +	case IPPROTO_TCP:
> +	case IPPROTO_UDP:
> +		ft->src_port = ctt->src.u.tcp.port;
> +		ft->dst_port = ctt->dst.u.tcp.port;
> +		break;
> +	}
>  }
>  
>  struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
> index 889cf88..48e2f58 100644
> --- a/net/netfilter/nf_flow_table_ip.c
> +++ b/net/netfilter/nf_flow_table_ip.c
> @@ -172,6 +172,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>  	struct flow_ports *ports;
>  	unsigned int thoff;
>  	struct iphdr *iph;
> +	u8 ipproto;
>
>  	if (!pskb_may_pull(skb, sizeof(*iph) + offset))
>  		return -1;
> @@ -185,13 +186,19 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>  
>  	thoff += offset;
>  
> -	switch (iph->protocol) {
> +	ipproto = iph->protocol;
> +	switch (ipproto) {
>  	case IPPROTO_TCP:
>  		*hdrsize = sizeof(struct tcphdr);
>  		break;
>  	case IPPROTO_UDP:
>  		*hdrsize = sizeof(struct udphdr);
>  		break;
> +#ifdef CONFIG_NF_CT_PROTO_GRE
> +	case IPPROTO_GRE:
> +		*hdrsize = sizeof(struct gre_base_hdr);
> +		break;
> +#endif
>  	default:
>  		return -1;
>  	}
> @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>  	if (!pskb_may_pull(skb, thoff + *hdrsize))
>  		return -1;
>  
> +	if (ipproto == IPPROTO_GRE) {

No ifdef here? Maybe remove these ifdef everywhere?

> +		struct gre_base_hdr *greh;
> +
> +		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
> +		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
> +			return -1;
> +	}
> +
>  	iph = (struct iphdr *)(skb_network_header(skb) + offset);
> -	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>  
>  	tuple->src_v4.s_addr	= iph->saddr;
>  	tuple->dst_v4.s_addr	= iph->daddr;
> -	tuple->src_port		= ports->source;
> -	tuple->dst_port		= ports->dest;
> +	if (ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP) {
> +		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
> +		tuple->src_port		= ports->source;
> +		tuple->dst_port		= ports->dest;
> +	}

maybe:

        switch (ipproto) {
        case IPPROTO_TCP:
        case IPPROTO_UDP:
                ...
                break;
        case IPPROTO_GRE:
                break;
        }

?

>  	tuple->l3proto		= AF_INET;
> -	tuple->l4proto		= iph->protocol;
> +	tuple->l4proto		= ipproto;
>  	tuple->iifidx		= dev->ifindex;
>  	nf_flow_tuple_encap(skb, tuple);
>  
> @@ -521,6 +538,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>  	struct flow_ports *ports;
>  	struct ipv6hdr *ip6h;
>  	unsigned int thoff;
> +	u8 nexthdr;
>  
>  	thoff = sizeof(*ip6h) + offset;
>  	if (!pskb_may_pull(skb, thoff))
> @@ -528,13 +546,19 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>  
>  	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
>  
> -	switch (ip6h->nexthdr) {
> +	nexthdr = ip6h->nexthdr;
> +	switch (nexthdr) {
>  	case IPPROTO_TCP:
>  		*hdrsize = sizeof(struct tcphdr);
>  		break;
>  	case IPPROTO_UDP:
>  		*hdrsize = sizeof(struct udphdr);
>  		break;
> +#ifdef CONFIG_NF_CT_PROTO_GRE
> +	case IPPROTO_GRE:
> +		*hdrsize = sizeof(struct gre_base_hdr);
> +		break;
> +#endif
>  	default:
>  		return -1;
>  	}
> @@ -545,15 +569,25 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>  	if (!pskb_may_pull(skb, thoff + *hdrsize))
>  		return -1;
>  
> +	if (nexthdr == IPPROTO_GRE) {
> +		struct gre_base_hdr *greh;
> +
> +		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
> +		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
> +			return -1;
> +	}
> +
>  	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
> -	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>  
>  	tuple->src_v6		= ip6h->saddr;
>  	tuple->dst_v6		= ip6h->daddr;
> -	tuple->src_port		= ports->source;
> -	tuple->dst_port		= ports->dest;
> +	if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP) {
> +		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
> +		tuple->src_port		= ports->source;
> +		tuple->dst_port		= ports->dest;
> +	}
>  	tuple->l3proto		= AF_INET6;
> -	tuple->l4proto		= ip6h->nexthdr;
> +	tuple->l4proto		= nexthdr;
>  	tuple->iifidx		= dev->ifindex;
>  	nf_flow_tuple_encap(skb, tuple);
>  
> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
> index b561e0a..9b81080 100644
> --- a/net/netfilter/nf_flow_table_offload.c
> +++ b/net/netfilter/nf_flow_table_offload.c
> @@ -170,6 +170,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
>  		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
>  		break;
>  	case IPPROTO_UDP:
> +	case IPPROTO_GRE:
>  		break;
>  	default:
>  		return -EOPNOTSUPP;
> @@ -178,15 +179,19 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
>  	key->basic.ip_proto = tuple->l4proto;
>  	mask->basic.ip_proto = 0xff;
>  
> -	key->tp.src = tuple->src_port;
> -	mask->tp.src = 0xffff;
> -	key->tp.dst = tuple->dst_port;
> -	mask->tp.dst = 0xffff;
> -
>  	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
>  				      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
> -				      BIT(FLOW_DISSECTOR_KEY_BASIC) |
> -				      BIT(FLOW_DISSECTOR_KEY_PORTS);
> +				      BIT(FLOW_DISSECTOR_KEY_BASIC);
> +
> +	if (tuple->l4proto == IPPROTO_TCP || tuple->l4proto == IPPROTO_UDP) {
> +		key->tp.src = tuple->src_port;
> +		mask->tp.src = 0xffff;
> +		key->tp.dst = tuple->dst_port;
> +		mask->tp.dst = 0xffff;
> +
> +		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
> index 0af34ad..731b5d8 100644
> --- a/net/netfilter/nft_flow_offload.c
> +++ b/net/netfilter/nft_flow_offload.c
> @@ -298,6 +298,19 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
>  		break;
>  	case IPPROTO_UDP:
>  		break;
> +#ifdef CONFIG_NF_CT_PROTO_GRE
> +	case IPPROTO_GRE: {
> +		struct nf_conntrack_tuple *tuple;
> +
> +		if (ct->status & IPS_NAT_MASK)
> +			goto out;

Why this NAT check?

> +		tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
> +		/* No support for GRE v1 */
> +		if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
> +			goto out;
> +		break;
> +	}
> +#endif
>  	default:
>  		goto out;
>  	}
> -- 
> 1.8.3.1
>
Toshiaki Makita Feb. 8, 2022, 2:30 p.m. UTC | #2
On 2022/02/08 2:56, Pablo Neira Ayuso wrote:
> On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote:
>> Support GREv0 without NAT.
>>
>> Signed-off-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
>> ---
>>   net/netfilter/nf_flow_table_core.c    | 10 +++++--
>>   net/netfilter/nf_flow_table_ip.c      | 54 ++++++++++++++++++++++++++++-------
>>   net/netfilter/nf_flow_table_offload.c | 19 +++++++-----
>>   net/netfilter/nft_flow_offload.c      | 13 +++++++++
>>   4 files changed, 77 insertions(+), 19 deletions(-)
>>
>> diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
>> index b90eca7..e66a375 100644
>> --- a/net/netfilter/nf_flow_table_core.c
>> +++ b/net/netfilter/nf_flow_table_core.c
>> @@ -39,8 +39,14 @@
>>   
>>   	ft->l3proto = ctt->src.l3num;
>>   	ft->l4proto = ctt->dst.protonum;
>> -	ft->src_port = ctt->src.u.tcp.port;
>> -	ft->dst_port = ctt->dst.u.tcp.port;
>> +
>> +	switch (ctt->dst.protonum) {
>> +	case IPPROTO_TCP:
>> +	case IPPROTO_UDP:
>> +		ft->src_port = ctt->src.u.tcp.port;
>> +		ft->dst_port = ctt->dst.u.tcp.port;
>> +		break;
>> +	}
>>   }
>>   
>>   struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
>> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
>> index 889cf88..48e2f58 100644
>> --- a/net/netfilter/nf_flow_table_ip.c
>> +++ b/net/netfilter/nf_flow_table_ip.c
>> @@ -172,6 +172,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>>   	struct flow_ports *ports;
>>   	unsigned int thoff;
>>   	struct iphdr *iph;
>> +	u8 ipproto;
>>
>>   	if (!pskb_may_pull(skb, sizeof(*iph) + offset))
>>   		return -1;
>> @@ -185,13 +186,19 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>>   
>>   	thoff += offset;
>>   
>> -	switch (iph->protocol) {
>> +	ipproto = iph->protocol;
>> +	switch (ipproto) {
>>   	case IPPROTO_TCP:
>>   		*hdrsize = sizeof(struct tcphdr);
>>   		break;
>>   	case IPPROTO_UDP:
>>   		*hdrsize = sizeof(struct udphdr);
>>   		break;
>> +#ifdef CONFIG_NF_CT_PROTO_GRE
>> +	case IPPROTO_GRE:
>> +		*hdrsize = sizeof(struct gre_base_hdr);
>> +		break;
>> +#endif
>>   	default:
>>   		return -1;
>>   	}
>> @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>>   	if (!pskb_may_pull(skb, thoff + *hdrsize))
>>   		return -1;
>>   
>> +	if (ipproto == IPPROTO_GRE) {
> 
> No ifdef here? Maybe remove these ifdef everywhere?

I wanted to avoid adding many ifdefs and I expect this to be compiled out when 
CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather may have 
been unintuitive though.

Removing all of these ifdefs will cause inconsistent behavior between 
CONFIG_NF_CT_PROTO_GRE=n/y.
When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus it will 
track GREv1 without key infomation, and the flow will be offloaded.
When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not be 
offloaded.
I wanted to just refuse offloading of GRE to avoid this inconsistency.
Anyway this kind of inconsistency seems to happen in software conntrack, so if you'd 
like to remove ifdefs, I will do.

>> +		struct gre_base_hdr *greh;
>> +
>> +		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
>> +		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
>> +			return -1;
>> +	}
>> +
>>   	iph = (struct iphdr *)(skb_network_header(skb) + offset);
>> -	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>>   
>>   	tuple->src_v4.s_addr	= iph->saddr;
>>   	tuple->dst_v4.s_addr	= iph->daddr;
>> -	tuple->src_port		= ports->source;
>> -	tuple->dst_port		= ports->dest;
>> +	if (ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP) {
>> +		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>> +		tuple->src_port		= ports->source;
>> +		tuple->dst_port		= ports->dest;
>> +	}
> 
> maybe:
> 
>          switch (ipproto) {
>          case IPPROTO_TCP:
>          case IPPROTO_UDP:
>                  ...
>                  break;
>          case IPPROTO_GRE:
>                  break;
>          }
> 
> ?

Sure, will fix.

>>   	tuple->l3proto		= AF_INET;
>> -	tuple->l4proto		= iph->protocol;
>> +	tuple->l4proto		= ipproto;
>>   	tuple->iifidx		= dev->ifindex;
>>   	nf_flow_tuple_encap(skb, tuple);
>>   
>> @@ -521,6 +538,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>>   	struct flow_ports *ports;
>>   	struct ipv6hdr *ip6h;
>>   	unsigned int thoff;
>> +	u8 nexthdr;
>>   
>>   	thoff = sizeof(*ip6h) + offset;
>>   	if (!pskb_may_pull(skb, thoff))
>> @@ -528,13 +546,19 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>>   
>>   	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
>>   
>> -	switch (ip6h->nexthdr) {
>> +	nexthdr = ip6h->nexthdr;
>> +	switch (nexthdr) {
>>   	case IPPROTO_TCP:
>>   		*hdrsize = sizeof(struct tcphdr);
>>   		break;
>>   	case IPPROTO_UDP:
>>   		*hdrsize = sizeof(struct udphdr);
>>   		break;
>> +#ifdef CONFIG_NF_CT_PROTO_GRE
>> +	case IPPROTO_GRE:
>> +		*hdrsize = sizeof(struct gre_base_hdr);
>> +		break;
>> +#endif
>>   	default:
>>   		return -1;
>>   	}
>> @@ -545,15 +569,25 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
>>   	if (!pskb_may_pull(skb, thoff + *hdrsize))
>>   		return -1;
>>   
>> +	if (nexthdr == IPPROTO_GRE) {
>> +		struct gre_base_hdr *greh;
>> +
>> +		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
>> +		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
>> +			return -1;
>> +	}
>> +
>>   	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
>> -	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>>   
>>   	tuple->src_v6		= ip6h->saddr;
>>   	tuple->dst_v6		= ip6h->daddr;
>> -	tuple->src_port		= ports->source;
>> -	tuple->dst_port		= ports->dest;
>> +	if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP) {
>> +		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
>> +		tuple->src_port		= ports->source;
>> +		tuple->dst_port		= ports->dest;
>> +	}
>>   	tuple->l3proto		= AF_INET6;
>> -	tuple->l4proto		= ip6h->nexthdr;
>> +	tuple->l4proto		= nexthdr;
>>   	tuple->iifidx		= dev->ifindex;
>>   	nf_flow_tuple_encap(skb, tuple);
>>   
>> diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
>> index b561e0a..9b81080 100644
>> --- a/net/netfilter/nf_flow_table_offload.c
>> +++ b/net/netfilter/nf_flow_table_offload.c
>> @@ -170,6 +170,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
>>   		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
>>   		break;
>>   	case IPPROTO_UDP:
>> +	case IPPROTO_GRE:
>>   		break;
>>   	default:
>>   		return -EOPNOTSUPP;
>> @@ -178,15 +179,19 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
>>   	key->basic.ip_proto = tuple->l4proto;
>>   	mask->basic.ip_proto = 0xff;
>>   
>> -	key->tp.src = tuple->src_port;
>> -	mask->tp.src = 0xffff;
>> -	key->tp.dst = tuple->dst_port;
>> -	mask->tp.dst = 0xffff;
>> -
>>   	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
>>   				      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
>> -				      BIT(FLOW_DISSECTOR_KEY_BASIC) |
>> -				      BIT(FLOW_DISSECTOR_KEY_PORTS);
>> +				      BIT(FLOW_DISSECTOR_KEY_BASIC);
>> +
>> +	if (tuple->l4proto == IPPROTO_TCP || tuple->l4proto == IPPROTO_UDP) {
>> +		key->tp.src = tuple->src_port;
>> +		mask->tp.src = 0xffff;
>> +		key->tp.dst = tuple->dst_port;
>> +		mask->tp.dst = 0xffff;
>> +
>> +		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
>> +	}
>> +
>>   	return 0;
>>   }
>>   
>> diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
>> index 0af34ad..731b5d8 100644
>> --- a/net/netfilter/nft_flow_offload.c
>> +++ b/net/netfilter/nft_flow_offload.c
>> @@ -298,6 +298,19 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
>>   		break;
>>   	case IPPROTO_UDP:
>>   		break;
>> +#ifdef CONFIG_NF_CT_PROTO_GRE
>> +	case IPPROTO_GRE: {
>> +		struct nf_conntrack_tuple *tuple;
>> +
>> +		if (ct->status & IPS_NAT_MASK)
>> +			goto out;
> 
> Why this NAT check?

NAT requires more work. I'd like to start with a minimal GRE support.
Maybe we can add NAT support later.

Toshiaki Makita

>> +		tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
>> +		/* No support for GRE v1 */
>> +		if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
>> +			goto out;
>> +		break;
>> +	}
>> +#endif
>>   	default:
>>   		goto out;
>>   	}
>> -- 
>> 1.8.3.1
>>
Pablo Neira Ayuso Feb. 9, 2022, 10:01 a.m. UTC | #3
On Tue, Feb 08, 2022 at 11:30:03PM +0900, Toshiaki Makita wrote:
> On 2022/02/08 2:56, Pablo Neira Ayuso wrote:
> > On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote:
[...]
> > > diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
> > > index 889cf88..48e2f58 100644
> > > --- a/net/netfilter/nf_flow_table_ip.c
> > > +++ b/net/netfilter/nf_flow_table_ip.c
[...]
> > > @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
> > >   	if (!pskb_may_pull(skb, thoff + *hdrsize))
> > >   		return -1;
> > > +	if (ipproto == IPPROTO_GRE) {
> > 
> > No ifdef here? Maybe remove these ifdef everywhere?
> 
> I wanted to avoid adding many ifdefs and I expect this to be compiled out
> when CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather
> may have been unintuitive though.
> 
> Removing all of these ifdefs will cause inconsistent behavior between
> CONFIG_NF_CT_PROTO_GRE=n/y.
> When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus
> it will track GREv1 without key infomation, and the flow will be offloaded.
> When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not
> be offloaded.
> I wanted to just refuse offloading of GRE to avoid this inconsistency.
> Anyway this kind of inconsistency seems to happen in software conntrack, so
> if you'd like to remove ifdefs, I will do.

Good point, thanks for explaining. LGTM.

[...]
> > > diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
> > > index 0af34ad..731b5d8 100644
> > > --- a/net/netfilter/nft_flow_offload.c
> > > +++ b/net/netfilter/nft_flow_offload.c
> > > @@ -298,6 +298,19 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
> > >   		break;
> > >   	case IPPROTO_UDP:
> > >   		break;
> > > +#ifdef CONFIG_NF_CT_PROTO_GRE
> > > +	case IPPROTO_GRE: {
> > > +		struct nf_conntrack_tuple *tuple;
> > > +
> > > +		if (ct->status & IPS_NAT_MASK)
> > > +			goto out;
> > 
> > Why this NAT check?
> 
> NAT requires more work. I'd like to start with a minimal GRE support.
> Maybe we can add NAT support later.

Oh well, yes, no problem.
Toshiaki Makita Feb. 12, 2022, 1:54 a.m. UTC | #4
On 2022/02/09 19:01, Pablo Neira Ayuso wrote:
> On Tue, Feb 08, 2022 at 11:30:03PM +0900, Toshiaki Makita wrote:
>> On 2022/02/08 2:56, Pablo Neira Ayuso wrote:
>>> On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote:
> [...]
>>>> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
>>>> index 889cf88..48e2f58 100644
>>>> --- a/net/netfilter/nf_flow_table_ip.c
>>>> +++ b/net/netfilter/nf_flow_table_ip.c
> [...]
>>>> @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
>>>>    	if (!pskb_may_pull(skb, thoff + *hdrsize))
>>>>    		return -1;
>>>> +	if (ipproto == IPPROTO_GRE) {
>>>
>>> No ifdef here? Maybe remove these ifdef everywhere?
>>
>> I wanted to avoid adding many ifdefs and I expect this to be compiled out
>> when CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather
>> may have been unintuitive though.
>>
>> Removing all of these ifdefs will cause inconsistent behavior between
>> CONFIG_NF_CT_PROTO_GRE=n/y.
>> When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus
>> it will track GREv1 without key infomation, and the flow will be offloaded.
>> When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not
>> be offloaded.
>> I wanted to just refuse offloading of GRE to avoid this inconsistency.
>> Anyway this kind of inconsistency seems to happen in software conntrack, so
>> if you'd like to remove ifdefs, I will do.
> 
> Good point, thanks for explaining. LGTM.

Let me confirm, did you agree to keep ifdefs, or delete them?

Toshiaki Makita
Toshiaki Makita Feb. 19, 2022, 12:51 p.m. UTC | #5
Ping, Pablo?

On 2022/02/12 10:54, Toshiaki Makita wrote:
> On 2022/02/09 19:01, Pablo Neira Ayuso wrote:
>> On Tue, Feb 08, 2022 at 11:30:03PM +0900, Toshiaki Makita wrote:
>>> On 2022/02/08 2:56, Pablo Neira Ayuso wrote:
>>>> On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote:
>> [...]
>>>>> diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
>>>>> index 889cf88..48e2f58 100644
>>>>> --- a/net/netfilter/nf_flow_table_ip.c
>>>>> +++ b/net/netfilter/nf_flow_table_ip.c
>> [...]
>>>>> @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const 
>>>>> struct net_device *dev,
>>>>>        if (!pskb_may_pull(skb, thoff + *hdrsize))
>>>>>            return -1;
>>>>> +    if (ipproto == IPPROTO_GRE) {
>>>>
>>>> No ifdef here? Maybe remove these ifdef everywhere?
>>>
>>> I wanted to avoid adding many ifdefs and I expect this to be compiled out
>>> when CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather
>>> may have been unintuitive though.
>>>
>>> Removing all of these ifdefs will cause inconsistent behavior between
>>> CONFIG_NF_CT_PROTO_GRE=n/y.
>>> When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus
>>> it will track GREv1 without key infomation, and the flow will be offloaded.
>>> When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not
>>> be offloaded.
>>> I wanted to just refuse offloading of GRE to avoid this inconsistency.
>>> Anyway this kind of inconsistency seems to happen in software conntrack, so
>>> if you'd like to remove ifdefs, I will do.
>>
>> Good point, thanks for explaining. LGTM.
> 
> Let me confirm, did you agree to keep ifdefs, or delete them?
> 
> Toshiaki Makita
Pablo Neira Ayuso Feb. 23, 2022, 10:44 a.m. UTC | #6
On Sat, Feb 19, 2022 at 09:51:37PM +0900, Toshiaki Makita wrote:
> Ping, Pablo?

Please go ahead, these #ifdef can go away later on.

> On 2022/02/12 10:54, Toshiaki Makita wrote:
> > On 2022/02/09 19:01, Pablo Neira Ayuso wrote:
> > > On Tue, Feb 08, 2022 at 11:30:03PM +0900, Toshiaki Makita wrote:
> > > > On 2022/02/08 2:56, Pablo Neira Ayuso wrote:
> > > > > On Thu, Feb 03, 2022 at 08:59:39PM +0900, Toshiaki Makita wrote:
> > > [...]
> > > > > > diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
> > > > > > index 889cf88..48e2f58 100644
> > > > > > --- a/net/netfilter/nf_flow_table_ip.c
> > > > > > +++ b/net/netfilter/nf_flow_table_ip.c
> > > [...]
> > > > > > @@ -202,15 +209,25 @@ static int nf_flow_tuple_ip(struct
> > > > > > sk_buff *skb, const struct net_device *dev,
> > > > > >        if (!pskb_may_pull(skb, thoff + *hdrsize))
> > > > > >            return -1;
> > > > > > +    if (ipproto == IPPROTO_GRE) {
> > > > > 
> > > > > No ifdef here? Maybe remove these ifdef everywhere?
> > > > 
> > > > I wanted to avoid adding many ifdefs and I expect this to be compiled out
> > > > when CONFIG_NF_CT_PROTO_GRE=n as this block is unreachable anyway. It rather
> > > > may have been unintuitive though.
> > > > 
> > > > Removing all of these ifdefs will cause inconsistent behavior between
> > > > CONFIG_NF_CT_PROTO_GRE=n/y.
> > > > When CONFIG_NF_CT_PROTO_GRE=n, conntrack cannot determine GRE version, thus
> > > > it will track GREv1 without key infomation, and the flow will be offloaded.
> > > > When CONFIG_NF_CT_PROTO_GRE=y, GREv1 will have key information and will not
> > > > be offloaded.
> > > > I wanted to just refuse offloading of GRE to avoid this inconsistency.
> > > > Anyway this kind of inconsistency seems to happen in software conntrack, so
> > > > if you'd like to remove ifdefs, I will do.
> > > 
> > > Good point, thanks for explaining. LGTM.
> > 
> > Let me confirm, did you agree to keep ifdefs, or delete them?
> > 
> > Toshiaki Makita
diff mbox series

Patch

diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index b90eca7..e66a375 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -39,8 +39,14 @@ 
 
 	ft->l3proto = ctt->src.l3num;
 	ft->l4proto = ctt->dst.protonum;
-	ft->src_port = ctt->src.u.tcp.port;
-	ft->dst_port = ctt->dst.u.tcp.port;
+
+	switch (ctt->dst.protonum) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		ft->src_port = ctt->src.u.tcp.port;
+		ft->dst_port = ctt->dst.u.tcp.port;
+		break;
+	}
 }
 
 struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 889cf88..48e2f58 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -172,6 +172,7 @@  static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 	struct flow_ports *ports;
 	unsigned int thoff;
 	struct iphdr *iph;
+	u8 ipproto;
 
 	if (!pskb_may_pull(skb, sizeof(*iph) + offset))
 		return -1;
@@ -185,13 +186,19 @@  static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 
 	thoff += offset;
 
-	switch (iph->protocol) {
+	ipproto = iph->protocol;
+	switch (ipproto) {
 	case IPPROTO_TCP:
 		*hdrsize = sizeof(struct tcphdr);
 		break;
 	case IPPROTO_UDP:
 		*hdrsize = sizeof(struct udphdr);
 		break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE:
+		*hdrsize = sizeof(struct gre_base_hdr);
+		break;
+#endif
 	default:
 		return -1;
 	}
@@ -202,15 +209,25 @@  static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
 	if (!pskb_may_pull(skb, thoff + *hdrsize))
 		return -1;
 
+	if (ipproto == IPPROTO_GRE) {
+		struct gre_base_hdr *greh;
+
+		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
+		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
+			return -1;
+	}
+
 	iph = (struct iphdr *)(skb_network_header(skb) + offset);
-	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
 
 	tuple->src_v4.s_addr	= iph->saddr;
 	tuple->dst_v4.s_addr	= iph->daddr;
-	tuple->src_port		= ports->source;
-	tuple->dst_port		= ports->dest;
+	if (ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP) {
+		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+		tuple->src_port		= ports->source;
+		tuple->dst_port		= ports->dest;
+	}
 	tuple->l3proto		= AF_INET;
-	tuple->l4proto		= iph->protocol;
+	tuple->l4proto		= ipproto;
 	tuple->iifidx		= dev->ifindex;
 	nf_flow_tuple_encap(skb, tuple);
 
@@ -521,6 +538,7 @@  static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 	struct flow_ports *ports;
 	struct ipv6hdr *ip6h;
 	unsigned int thoff;
+	u8 nexthdr;
 
 	thoff = sizeof(*ip6h) + offset;
 	if (!pskb_may_pull(skb, thoff))
@@ -528,13 +546,19 @@  static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 
 	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
 
-	switch (ip6h->nexthdr) {
+	nexthdr = ip6h->nexthdr;
+	switch (nexthdr) {
 	case IPPROTO_TCP:
 		*hdrsize = sizeof(struct tcphdr);
 		break;
 	case IPPROTO_UDP:
 		*hdrsize = sizeof(struct udphdr);
 		break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE:
+		*hdrsize = sizeof(struct gre_base_hdr);
+		break;
+#endif
 	default:
 		return -1;
 	}
@@ -545,15 +569,25 @@  static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
 	if (!pskb_may_pull(skb, thoff + *hdrsize))
 		return -1;
 
+	if (nexthdr == IPPROTO_GRE) {
+		struct gre_base_hdr *greh;
+
+		greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
+		if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
+			return -1;
+	}
+
 	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
-	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
 
 	tuple->src_v6		= ip6h->saddr;
 	tuple->dst_v6		= ip6h->daddr;
-	tuple->src_port		= ports->source;
-	tuple->dst_port		= ports->dest;
+	if (nexthdr == IPPROTO_TCP || nexthdr == IPPROTO_UDP) {
+		ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+		tuple->src_port		= ports->source;
+		tuple->dst_port		= ports->dest;
+	}
 	tuple->l3proto		= AF_INET6;
-	tuple->l4proto		= ip6h->nexthdr;
+	tuple->l4proto		= nexthdr;
 	tuple->iifidx		= dev->ifindex;
 	nf_flow_tuple_encap(skb, tuple);
 
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index b561e0a..9b81080 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -170,6 +170,7 @@  static int nf_flow_rule_match(struct nf_flow_match *match,
 		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
 		break;
 	case IPPROTO_UDP:
+	case IPPROTO_GRE:
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -178,15 +179,19 @@  static int nf_flow_rule_match(struct nf_flow_match *match,
 	key->basic.ip_proto = tuple->l4proto;
 	mask->basic.ip_proto = 0xff;
 
-	key->tp.src = tuple->src_port;
-	mask->tp.src = 0xffff;
-	key->tp.dst = tuple->dst_port;
-	mask->tp.dst = 0xffff;
-
 	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
 				      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
-				      BIT(FLOW_DISSECTOR_KEY_BASIC) |
-				      BIT(FLOW_DISSECTOR_KEY_PORTS);
+				      BIT(FLOW_DISSECTOR_KEY_BASIC);
+
+	if (tuple->l4proto == IPPROTO_TCP || tuple->l4proto == IPPROTO_UDP) {
+		key->tp.src = tuple->src_port;
+		mask->tp.src = 0xffff;
+		key->tp.dst = tuple->dst_port;
+		mask->tp.dst = 0xffff;
+
+		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_PORTS);
+	}
+
 	return 0;
 }
 
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 0af34ad..731b5d8 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -298,6 +298,19 @@  static void nft_flow_offload_eval(const struct nft_expr *expr,
 		break;
 	case IPPROTO_UDP:
 		break;
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE: {
+		struct nf_conntrack_tuple *tuple;
+
+		if (ct->status & IPS_NAT_MASK)
+			goto out;
+		tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+		/* No support for GRE v1 */
+		if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
+			goto out;
+		break;
+	}
+#endif
 	default:
 		goto out;
 	}