diff mbox series

[net-next] openvswitch: prepare for stolen verdict coming from conntrack and nat engine

Message ID 20240703104640.20878-1-fw@strlen.de (mailing list archive)
State Accepted
Commit c7f79f2620b7776586c626edf21eb6ed6ed3d1eb
Delegated to: Netdev Maintainers
Headers show
Series [net-next] openvswitch: prepare for stolen verdict coming from conntrack and nat engine | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 839 this patch: 839
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 3 maintainers not CCed: edumazet@google.com kuba@kernel.org pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 846 this patch: 846
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 846 this patch: 846
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 90 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-07-05--06-00 (tests: 695)

Commit Message

Florian Westphal July 3, 2024, 10:46 a.m. UTC
At this time, conntrack either returns NF_ACCEPT or NF_DROP.
To improve debuging it would be nice to be able to replace NF_DROP
verdict with NF_DROP_REASON() helper,

This helper releases the skb instantly (so drop_monitor can pinpoint
precise location) and returns NF_STOLEN.

Prepare call sites to deal with this before introducing such changes
in conntrack and nat core.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/openvswitch/conntrack.c | 47 +++++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 10 deletions(-)

Comments

Aaron Conole July 3, 2024, 2:59 p.m. UTC | #1
Hi Florian,

Florian Westphal <fw@strlen.de> writes:

> At this time, conntrack either returns NF_ACCEPT or NF_DROP.
> To improve debuging it would be nice to be able to replace NF_DROP
> verdict with NF_DROP_REASON() helper,
>
> This helper releases the skb instantly (so drop_monitor can pinpoint
> precise location) and returns NF_STOLEN.
>
> Prepare call sites to deal with this before introducing such changes
> in conntrack and nat core.
>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---

AFAIU, these changes are only impacting the existing NF_DROP cases, and
won't impact how ovs + netfilter communicate about invalid packets.  One
important thing to note is that we rely on:

 * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be
 * set to NULL and 0 will be returned.

Based on this, my understanding is if packet isn't part of a valid
connection, skb->_nfct is NULL and NF_ACCEPT is returned.

If this changes, those flow pipelines matching on ct_state(+inv+trk)
will no longer function as expected since we will bail early.  I think
this comment will also apply to the act_ct change as well.

>  net/openvswitch/conntrack.c | 47 +++++++++++++++++++++++++++++--------
>  1 file changed, 37 insertions(+), 10 deletions(-)
>
> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> index 3b980bf2770b..8eb1d644b741 100644
> --- a/net/openvswitch/conntrack.c
> +++ b/net/openvswitch/conntrack.c
> @@ -679,6 +679,8 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
>  		action |= BIT(NF_NAT_MANIP_DST);
>  
>  	err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit);
> +	if (err != NF_ACCEPT)
> +		return err;
>  
>  	if (action & BIT(NF_NAT_MANIP_SRC))
>  		ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC);
> @@ -697,6 +699,22 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
>  }
>  #endif
>  
> +static int verdict_to_errno(unsigned int verdict)
> +{
> +	switch (verdict & NF_VERDICT_MASK) {
> +	case NF_ACCEPT:
> +		return 0;
> +	case NF_DROP:
> +		return -EINVAL;
> +	case NF_STOLEN:
> +		return -EINPROGRESS;
> +	default:
> +		break;
> +	}
> +
> +	return -EINVAL;
> +}
> +
>  /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
>   * not done already.  Update key with new CT state after passing the packet
>   * through conntrack.
> @@ -735,7 +753,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
>  
>  		err = nf_conntrack_in(skb, &state);
>  		if (err != NF_ACCEPT)
> -			return -ENOENT;
> +			return verdict_to_errno(err);
>  
>  		/* Clear CT state NAT flags to mark that we have not yet done
>  		 * NAT after the nf_conntrack_in() call.  We can actually clear
> @@ -762,9 +780,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
>  		 * the key->ct_state.
>  		 */
>  		if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) &&
> -		    (nf_ct_is_confirmed(ct) || info->commit) &&
> -		    ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
> -			return -EINVAL;
> +		    (nf_ct_is_confirmed(ct) || info->commit)) {
> +			int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo);
> +
> +			err = verdict_to_errno(err);
> +			if (err)
> +				return err;
>  		}
>  
>  		/* Userspace may decide to perform a ct lookup without a helper
> @@ -795,9 +816,12 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
>  		 * - When committing an unconfirmed connection.
>  		 */
>  		if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
> -					      info->commit) &&
> -		    nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) {
> -			return -EINVAL;
> +					      info->commit)) {
> +			int err = nf_ct_helper(skb, ct, ctinfo, info->family);
> +
> +			err = verdict_to_errno(err);
> +			if (err)
> +				return err;
>  		}
>  
>  		if (nf_ct_protonum(ct) == IPPROTO_TCP &&
> @@ -1001,10 +1025,9 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
>  	/* This will take care of sending queued events even if the connection
>  	 * is already confirmed.
>  	 */
> -	if (nf_conntrack_confirm(skb) != NF_ACCEPT)
> -		return -EINVAL;
> +	err = nf_conntrack_confirm(skb);
>  
> -	return 0;
> +	return verdict_to_errno(err);
>  }
>  
>  /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
> @@ -1039,6 +1062,10 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
>  	else
>  		err = ovs_ct_lookup(net, key, info, skb);
>  
> +	/* conntrack core returned NF_STOLEN */
> +	if (err == -EINPROGRESS)
> +		return err;
> +
>  	skb_push_rcsum(skb, nh_ofs);
>  	if (err)
>  		ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);
Florian Westphal July 3, 2024, 3:19 p.m. UTC | #2
Aaron Conole <aconole@redhat.com> wrote:
> > verdict with NF_DROP_REASON() helper,
> >
> > This helper releases the skb instantly (so drop_monitor can pinpoint
> > precise location) and returns NF_STOLEN.
> >
> > Prepare call sites to deal with this before introducing such changes
> > in conntrack and nat core.
> >
> > Signed-off-by: Florian Westphal <fw@strlen.de>
> > ---
> 
> AFAIU, these changes are only impacting the existing NF_DROP cases, and
> won't impact how ovs + netfilter communicate about invalid packets.  One
> important thing to note is that we rely on:
> 
>  * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be
>  * set to NULL and 0 will be returned.

Right, this is about how to communicate 'packet dropped'.

NF_DROP means 'please call kfree_skb for me'.  Problem from introspection point
of view is that drop monitor will blame nf_hook_slow() (for netfilter)
and ovs resp. act_ct for the drop.

Plan is to allow conntrack/nat engine to return STOLEN verdict ("skb
might have been free'd already").

Example change:
@@ -52,10 +53,8 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
        rt = skb_rtable(skb);
        nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
        newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
-       if (!newsrc) {
-               pr_info("%s ate my IP address\n", out->name);
-               return NF_DROP;
-       }
+       if (!newsrc)
+               return NF_DROP_REASON(skb, SKB_DROP_REASON_NETFILTER_DROP, EADDRNOTAVAIL);


Where NF_DROP_REASON() is:

static __always_inline int
NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err)
{
        BUILD_BUG_ON(err > 0xffff);

        kfree_skb_reason(skb, reason);

        return ((err << 16) | NF_STOLEN);
}

So drop monitoring tools will blame nf_nat_masquerade.c:nf_nat_masquerade_ipv4 and not
the consumer of the NF_DROP verdict.

I can't make such changes ATM because ovs and act_ct assume conntrack
returns only ACCEPT and DROP, so we'd get double-free.  Hope that makes
sense.

Thanks!
Aaron Conole July 3, 2024, 11:21 p.m. UTC | #3
Florian Westphal <fw@strlen.de> writes:

> Aaron Conole <aconole@redhat.com> wrote:
>> > verdict with NF_DROP_REASON() helper,
>> >
>> > This helper releases the skb instantly (so drop_monitor can pinpoint
>> > precise location) and returns NF_STOLEN.
>> >
>> > Prepare call sites to deal with this before introducing such changes
>> > in conntrack and nat core.
>> >
>> > Signed-off-by: Florian Westphal <fw@strlen.de>
>> > ---
>> 
>> AFAIU, these changes are only impacting the existing NF_DROP cases, and
>> won't impact how ovs + netfilter communicate about invalid packets.  One
>> important thing to note is that we rely on:
>> 
>>  * Note that if the packet is deemed invalid by conntrack, skb->_nfct will be
>>  * set to NULL and 0 will be returned.
>
> Right, this is about how to communicate 'packet dropped'.
>
> NF_DROP means 'please call kfree_skb for me'.  Problem from introspection point
> of view is that drop monitor will blame nf_hook_slow() (for netfilter)
> and ovs resp. act_ct for the drop.
>
> Plan is to allow conntrack/nat engine to return STOLEN verdict ("skb
> might have been free'd already").
>
> Example change:
> @@ -52,10 +53,8 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb,
> unsigned int hooknum,
>         rt = skb_rtable(skb);
>         nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
>         newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
> -       if (!newsrc) {
> -               pr_info("%s ate my IP address\n", out->name);
> -               return NF_DROP;
> -       }
> +       if (!newsrc)
> + return NF_DROP_REASON(skb, SKB_DROP_REASON_NETFILTER_DROP,
> EADDRNOTAVAIL);
>
>
> Where NF_DROP_REASON() is:
>
> static __always_inline int
> NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err)
> {
>         BUILD_BUG_ON(err > 0xffff);
>
>         kfree_skb_reason(skb, reason);
>
>         return ((err << 16) | NF_STOLEN);
> }
>
> So drop monitoring tools will blame
> nf_nat_masquerade.c:nf_nat_masquerade_ipv4 and not
> the consumer of the NF_DROP verdict.
>
> I can't make such changes ATM because ovs and act_ct assume conntrack
> returns only ACCEPT and DROP, so we'd get double-free.  Hope that makes
> sense.
>
> Thanks!

Makes sense to me, thanks!
Aaron Conole July 3, 2024, 11:22 p.m. UTC | #4
Florian Westphal <fw@strlen.de> writes:

> At this time, conntrack either returns NF_ACCEPT or NF_DROP.
> To improve debuging it would be nice to be able to replace NF_DROP
> verdict with NF_DROP_REASON() helper,
>
> This helper releases the skb instantly (so drop_monitor can pinpoint
> precise location) and returns NF_STOLEN.
>
> Prepare call sites to deal with this before introducing such changes
> in conntrack and nat core.
>
> Signed-off-by: Florian Westphal <fw@strlen.de>
> ---

Reviewed-by: Aaron Conole <aconole@redhat.om>
patchwork-bot+netdevbpf@kernel.org July 5, 2024, 10:10 a.m. UTC | #5
Hello:

This patch was applied to netdev/net-next.git (main)
by David S. Miller <davem@davemloft.net>:

On Wed,  3 Jul 2024 12:46:34 +0200 you wrote:
> At this time, conntrack either returns NF_ACCEPT or NF_DROP.
> To improve debuging it would be nice to be able to replace NF_DROP
> verdict with NF_DROP_REASON() helper,
> 
> This helper releases the skb instantly (so drop_monitor can pinpoint
> precise location) and returns NF_STOLEN.
> 
> [...]

Here is the summary with links:
  - [net-next] openvswitch: prepare for stolen verdict coming from conntrack and nat engine
    https://git.kernel.org/netdev/net-next/c/c7f79f2620b7

You are awesome, thank you!
diff mbox series

Patch

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3b980bf2770b..8eb1d644b741 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -679,6 +679,8 @@  static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
 		action |= BIT(NF_NAT_MANIP_DST);
 
 	err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit);
+	if (err != NF_ACCEPT)
+		return err;
 
 	if (action & BIT(NF_NAT_MANIP_SRC))
 		ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC);
@@ -697,6 +699,22 @@  static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
 }
 #endif
 
+static int verdict_to_errno(unsigned int verdict)
+{
+	switch (verdict & NF_VERDICT_MASK) {
+	case NF_ACCEPT:
+		return 0;
+	case NF_DROP:
+		return -EINVAL;
+	case NF_STOLEN:
+		return -EINPROGRESS;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
 /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
  * not done already.  Update key with new CT state after passing the packet
  * through conntrack.
@@ -735,7 +753,7 @@  static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 
 		err = nf_conntrack_in(skb, &state);
 		if (err != NF_ACCEPT)
-			return -ENOENT;
+			return verdict_to_errno(err);
 
 		/* Clear CT state NAT flags to mark that we have not yet done
 		 * NAT after the nf_conntrack_in() call.  We can actually clear
@@ -762,9 +780,12 @@  static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 		 * the key->ct_state.
 		 */
 		if (info->nat && !(key->ct_state & OVS_CS_F_NAT_MASK) &&
-		    (nf_ct_is_confirmed(ct) || info->commit) &&
-		    ovs_ct_nat(net, key, info, skb, ct, ctinfo) != NF_ACCEPT) {
-			return -EINVAL;
+		    (nf_ct_is_confirmed(ct) || info->commit)) {
+			int err = ovs_ct_nat(net, key, info, skb, ct, ctinfo);
+
+			err = verdict_to_errno(err);
+			if (err)
+				return err;
 		}
 
 		/* Userspace may decide to perform a ct lookup without a helper
@@ -795,9 +816,12 @@  static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 		 * - When committing an unconfirmed connection.
 		 */
 		if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
-					      info->commit) &&
-		    nf_ct_helper(skb, ct, ctinfo, info->family) != NF_ACCEPT) {
-			return -EINVAL;
+					      info->commit)) {
+			int err = nf_ct_helper(skb, ct, ctinfo, info->family);
+
+			err = verdict_to_errno(err);
+			if (err)
+				return err;
 		}
 
 		if (nf_ct_protonum(ct) == IPPROTO_TCP &&
@@ -1001,10 +1025,9 @@  static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 	/* This will take care of sending queued events even if the connection
 	 * is already confirmed.
 	 */
-	if (nf_conntrack_confirm(skb) != NF_ACCEPT)
-		return -EINVAL;
+	err = nf_conntrack_confirm(skb);
 
-	return 0;
+	return verdict_to_errno(err);
 }
 
 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
@@ -1039,6 +1062,10 @@  int ovs_ct_execute(struct net *net, struct sk_buff *skb,
 	else
 		err = ovs_ct_lookup(net, key, info, skb);
 
+	/* conntrack core returned NF_STOLEN */
+	if (err == -EINPROGRESS)
+		return err;
+
 	skb_push_rcsum(skb, nh_ofs);
 	if (err)
 		ovs_kfree_skb_reason(skb, OVS_DROP_CONNTRACK);