diff mbox series

[bpf-next,v2,1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers

Message ID 20240424030027.3932184-1-brad@faucet.nz (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [bpf-next,v2,1/2] net: netfilter: Make ct zone opts configurable for bpf ct helpers | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 3 maintainers not CCed: kadlec@netfilter.org edumazet@google.com hawk@kernel.org
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc

Commit Message

Brad Cowie April 24, 2024, 3 a.m. UTC
Add ct zone id/flags/dir to bpf_ct_opts so that arbitrary ct zones
can be used for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
and bpf_{xdp,skb}_ct_lookup.

Signed-off-by: Brad Cowie <brad@faucet.nz>
---
v1 -> v2:
  - Make ct zone flags/dir configurable
---
 net/netfilter/nf_conntrack_bpf.c | 97 ++++++++++++++++++++------------
 1 file changed, 61 insertions(+), 36 deletions(-)

Comments

Martin KaFai Lau April 25, 2024, 11:27 p.m. UTC | #1
On 4/23/24 8:00 PM, Brad Cowie wrote:
> Add ct zone id/flags/dir to bpf_ct_opts so that arbitrary ct zones
> can be used for xdp/tc bpf ct helper functions bpf_{xdp,skb}_ct_alloc
> and bpf_{xdp,skb}_ct_lookup.
> 
> Signed-off-by: Brad Cowie <brad@faucet.nz>
> ---
> v1 -> v2:
>    - Make ct zone flags/dir configurable
> ---
>   net/netfilter/nf_conntrack_bpf.c | 97 ++++++++++++++++++++------------
>   1 file changed, 61 insertions(+), 36 deletions(-)
> 
> diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
> index d2492d050fe6..67f73b57089b 100644
> --- a/net/netfilter/nf_conntrack_bpf.c
> +++ b/net/netfilter/nf_conntrack_bpf.c
> @@ -21,41 +21,44 @@
>   /* bpf_ct_opts - Options for CT lookup helpers
>    *
>    * Members:
> - * @netns_id   - Specify the network namespace for lookup
> - *		 Values:
> - *		   BPF_F_CURRENT_NETNS (-1)
> - *		     Use namespace associated with ctx (xdp_md, __sk_buff)
> - *		   [0, S32_MAX]
> - *		     Network Namespace ID
> - * @error      - Out parameter, set for any errors encountered
> - *		 Values:
> - *		   -EINVAL - Passed NULL for bpf_tuple pointer
> - *		   -EINVAL - opts->reserved is not 0
> - *		   -EINVAL - netns_id is less than -1
> - *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
> - *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> - *		   -ENONET - No network namespace found for netns_id
> - *		   -ENOENT - Conntrack lookup could not find entry for tuple
> - *		   -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
> - *				   or sizeof(tuple->ipv6)
> - * @l4proto    - Layer 4 protocol
> - *		 Values:
> - *		   IPPROTO_TCP, IPPROTO_UDP
> - * @dir:       - connection tracking tuple direction.
> - * @reserved   - Reserved member, will be reused for more options in future
> - *		 Values:
> - *		   0
> + * @netns_id	  - Specify the network namespace for lookup
> + *		    Values:
> + *		      BPF_F_CURRENT_NETNS (-1)
> + *		        Use namespace associated with ctx (xdp_md, __sk_buff)
> + *		      [0, S32_MAX]
> + *		        Network Namespace ID
> + * @error	  - Out parameter, set for any errors encountered
> + *		    Values:
> + *		      -EINVAL - Passed NULL for bpf_tuple pointer
> + *		      -EINVAL - netns_id is less than -1
> + *		      -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16)
> + *			        or NF_BPF_CT_OPTS_OLD_SZ (12)
> + *		      -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
> + *		      -ENONET - No network namespace found for netns_id
> + *		      -ENOENT - Conntrack lookup could not find entry for tuple
> + *		      -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
> + *				      or sizeof(tuple->ipv6)
> + * @l4proto	  - Layer 4 protocol
> + *		    Values:
> + *		      IPPROTO_TCP, IPPROTO_UDP
> + * @dir:	  - connection tracking tuple direction.

Please avoid whitespace changes. It is unnecessary code churns.

> + * @ct_zone_id	  - connection tracking zone id.
> + * @ct_zone_flags - connection tracking zone flags.
> + * @ct_zone_dir   - connection tracking zone direction.
>    */
>   struct bpf_ct_opts {
>   	s32 netns_id;
>   	s32 error;
>   	u8 l4proto;
>   	u8 dir;
> -	u8 reserved[2];
> +	u16 ct_zone_id;
> +	u8 ct_zone_flags;
> +	u8 ct_zone_dir;

The size is 16 now with 2 tail paddings.
It needs a "u8 reserved[2];" at the end and need to check its 0.


>   };
>   
>   enum {
> -	NF_BPF_CT_OPTS_SZ = 12,
> +	NF_BPF_CT_OPTS_SZ = 16,
> +	NF_BPF_CT_OPTS_OLD_SZ = 12,

Avoid adding NF_BPF_CT_OPTS_OLD_SZ for now. I don't see how it may be useful.

>   };
>   
>   static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
> @@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>   			u32 timeout)
>   {
>   	struct nf_conntrack_tuple otuple, rtuple;
> +	struct nf_conntrack_zone ct_zone;
>   	struct nf_conn *ct;
>   	int err;
>   
> -	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> -	    opts_len != NF_BPF_CT_OPTS_SZ)
> +	if (!opts || !bpf_tuple)
> +		return ERR_PTR(-EINVAL);
> +	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
>   		return ERR_PTR(-EINVAL);
>   
>   	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
> @@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>   			return ERR_PTR(-ENONET);
>   	}
>   
> -	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
> +	if (opts_len == NF_BPF_CT_OPTS_SZ) {
> +		if (opts->ct_zone_dir == 0)

I don't know the details about the dir in ct_zone, so a question: a 0 
ct_zone_dir is invalid and can be reused to mean NF_CT_DEFAULT_ZONE_DIR?

> +			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> +		nf_ct_zone_init(&ct_zone,
> +				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> +	} else {

Better enforce "ct_zone_id == 0" also instead of ignoring it.

> +		ct_zone = nf_ct_zone_dflt;
> +	}
> +
> +	ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
>   				GFP_ATOMIC);
>   	if (IS_ERR(ct))
>   		goto out;
> @@ -152,11 +166,13 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
>   {
>   	struct nf_conntrack_tuple_hash *hash;
>   	struct nf_conntrack_tuple tuple;
> +	struct nf_conntrack_zone ct_zone;
>   	struct nf_conn *ct;
>   	int err;
>   
> -	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> -	    opts_len != NF_BPF_CT_OPTS_SZ)
> +	if (!opts || !bpf_tuple)
> +		return ERR_PTR(-EINVAL);
> +	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
>   		return ERR_PTR(-EINVAL);
>   	if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
>   		return ERR_PTR(-EPROTO);
> @@ -174,7 +190,16 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
>   			return ERR_PTR(-ENONET);
>   	}
>   
> -	hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
> +	if (opts_len == NF_BPF_CT_OPTS_SZ) {
> +		if (opts->ct_zone_dir == 0)
> +			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> +		nf_ct_zone_init(&ct_zone,
> +				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> +	} else {
> +		ct_zone = nf_ct_zone_dflt;
> +	}
> +
> +	hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
>   	if (opts->netns_id >= 0)
>   		put_net(net);
>   	if (!hash)
> @@ -245,7 +270,7 @@ __bpf_kfunc_start_defs();
>    * @opts	- Additional options for allocation (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)
>    */
>   __bpf_kfunc struct nf_conn___init *
>   bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -279,7 +304,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
>    * @opts	- Additional options for lookup (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)

Either 16 or 12. Same for below.

>    */
>   __bpf_kfunc struct nf_conn *
>   bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -312,7 +337,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
>    * @opts	- Additional options for allocation (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)
>    */
>   __bpf_kfunc struct nf_conn___init *
>   bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
> @@ -347,7 +372,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
>    * @opts	- Additional options for lookup (documented above)
>    *		    Cannot be NULL
>    * @opts__sz	- Length of the bpf_ct_opts structure
> - *		    Must be NF_BPF_CT_OPTS_SZ (12)
> + *		    Must be NF_BPF_CT_OPTS_SZ (16)
>    */
>   __bpf_kfunc struct nf_conn *
>   bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
Brad Cowie May 1, 2024, 4:59 a.m. UTC | #2
On Fri, 26 Apr 2024 at 11:27, Martin KaFai Lau <martin.lau@linux.dev> wrote:
> On 4/23/24 8:00 PM, Brad Cowie wrote:
> >   };
> >
> >   static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
> > @@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
> >   			u32 timeout)
> >   {
> >   	struct nf_conntrack_tuple otuple, rtuple;
> > +	struct nf_conntrack_zone ct_zone;
> >   	struct nf_conn *ct;
> >   	int err;
> >
> > -	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
> > -	    opts_len != NF_BPF_CT_OPTS_SZ)
> > +	if (!opts || !bpf_tuple)
> > +		return ERR_PTR(-EINVAL);
> > +	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
> >   		return ERR_PTR(-EINVAL);
> >
> >   	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
> > @@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
> >   			return ERR_PTR(-ENONET);
> >   	}
> >
> > -	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
> > +	if (opts_len == NF_BPF_CT_OPTS_SZ) {
> > +		if (opts->ct_zone_dir == 0)
>
> I don't know the details about the dir in ct_zone, so a question: a 0 
> ct_zone_dir is invalid and can be reused to mean NF_CT_DEFAULT_ZONE_DIR?

ct_zone_dir is a bitmask that can have two different bits set,
NF_CT_ZONE_DIR_ORIG (1) and NF_CT_ZONE_DIR_REPL (2).

The comparison function nf_ct_zone_matches_dir() in nf_conntrack_zones.h
checks if ct_zone_dir & (1 << ip_conntrack_dir dir). ip_conntrack_dir
has two possible values IP_CT_DIR_ORIGINAL (0) and IP_CT_DIR_REPLY (1).

If ct_zone_dir has a value of 0, this makes nf_ct_zone_matches_dir()
always return false which makes nf_ct_zone_id() always return
NF_CT_DEFAULT_ZONE_ID instead of the specified ct zone id.

I chose to override ct_zone_dir here and set NF_CT_DEFAULT_ZONE_DIR (3),
to make the behaviour more obvious when a user calls the bpf ct helper
kfuncs while only setting ct_zone_id but not ct_zone_dir.

> > +			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
> > +		nf_ct_zone_init(&ct_zone,
> > +				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
> > +	} else {
>
> Better enforce "ct_zone_id == 0" also instead of ignoring it.

Could I ask for clarification here, do you mean changing this
else statement to:

+	} else if (opts->ct_zone_id == 0) {

Or should I be setting opts->ct_zone_id = 0 inside the else block?
Martin KaFai Lau May 1, 2024, 10:11 p.m. UTC | #3
On 4/30/24 9:59 PM, Brad Cowie wrote:
> On Fri, 26 Apr 2024 at 11:27, Martin KaFai Lau <martin.lau@linux.dev> wrote:
>> On 4/23/24 8:00 PM, Brad Cowie wrote:
>>>    };
>>>
>>>    static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
>>> @@ -104,11 +107,13 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>>>    			u32 timeout)
>>>    {
>>>    	struct nf_conntrack_tuple otuple, rtuple;
>>> +	struct nf_conntrack_zone ct_zone;
>>>    	struct nf_conn *ct;
>>>    	int err;
>>>
>>> -	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
>>> -	    opts_len != NF_BPF_CT_OPTS_SZ)
>>> +	if (!opts || !bpf_tuple)
>>> +		return ERR_PTR(-EINVAL);
>>> +	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
>>>    		return ERR_PTR(-EINVAL);
>>>
>>>    	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
>>> @@ -130,7 +135,16 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
>>>    			return ERR_PTR(-ENONET);
>>>    	}
>>>
>>> -	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
>>> +	if (opts_len == NF_BPF_CT_OPTS_SZ) {
>>> +		if (opts->ct_zone_dir == 0)
>>
>> I don't know the details about the dir in ct_zone, so a question: a 0
>> ct_zone_dir is invalid and can be reused to mean NF_CT_DEFAULT_ZONE_DIR?
> 
> ct_zone_dir is a bitmask that can have two different bits set,
> NF_CT_ZONE_DIR_ORIG (1) and NF_CT_ZONE_DIR_REPL (2).
> 
> The comparison function nf_ct_zone_matches_dir() in nf_conntrack_zones.h
> checks if ct_zone_dir & (1 << ip_conntrack_dir dir). ip_conntrack_dir
> has two possible values IP_CT_DIR_ORIGINAL (0) and IP_CT_DIR_REPLY (1).
> 
> If ct_zone_dir has a value of 0, this makes nf_ct_zone_matches_dir()
> always return false which makes nf_ct_zone_id() always return
> NF_CT_DEFAULT_ZONE_ID instead of the specified ct zone id.
> 
> I chose to override ct_zone_dir here and set NF_CT_DEFAULT_ZONE_DIR (3),
> to make the behaviour more obvious when a user calls the bpf ct helper
> kfuncs while only setting ct_zone_id but not ct_zone_dir.

Ok. make sense.

> 
>>> +			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
>>> +		nf_ct_zone_init(&ct_zone,
>>> +				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
>>> +	} else {
>>
>> Better enforce "ct_zone_id == 0" also instead of ignoring it.
> 
> Could I ask for clarification here, do you mean changing this
> else statement to:
> 
> +	} else if (opts->ct_zone_id == 0) {
> 
> Or should I be setting opts->ct_zone_id = 0 inside the else block?

Testing non zero inside the else and return error instead of silently ignoring 
it and then use nf_ct_zone_dflt:

	} else {
		if (opts->ct_zone_id)
			/* don't ignore the opts->ct_zone_id */
			return ERR_PTR(-EINVAL);
		
		ct_zone = nf_ct_zone_dflt;
	}
diff mbox series

Patch

diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index d2492d050fe6..67f73b57089b 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -21,41 +21,44 @@ 
 /* bpf_ct_opts - Options for CT lookup helpers
  *
  * Members:
- * @netns_id   - Specify the network namespace for lookup
- *		 Values:
- *		   BPF_F_CURRENT_NETNS (-1)
- *		     Use namespace associated with ctx (xdp_md, __sk_buff)
- *		   [0, S32_MAX]
- *		     Network Namespace ID
- * @error      - Out parameter, set for any errors encountered
- *		 Values:
- *		   -EINVAL - Passed NULL for bpf_tuple pointer
- *		   -EINVAL - opts->reserved is not 0
- *		   -EINVAL - netns_id is less than -1
- *		   -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
- *		   -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
- *		   -ENONET - No network namespace found for netns_id
- *		   -ENOENT - Conntrack lookup could not find entry for tuple
- *		   -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
- *				   or sizeof(tuple->ipv6)
- * @l4proto    - Layer 4 protocol
- *		 Values:
- *		   IPPROTO_TCP, IPPROTO_UDP
- * @dir:       - connection tracking tuple direction.
- * @reserved   - Reserved member, will be reused for more options in future
- *		 Values:
- *		   0
+ * @netns_id	  - Specify the network namespace for lookup
+ *		    Values:
+ *		      BPF_F_CURRENT_NETNS (-1)
+ *		        Use namespace associated with ctx (xdp_md, __sk_buff)
+ *		      [0, S32_MAX]
+ *		        Network Namespace ID
+ * @error	  - Out parameter, set for any errors encountered
+ *		    Values:
+ *		      -EINVAL - Passed NULL for bpf_tuple pointer
+ *		      -EINVAL - netns_id is less than -1
+ *		      -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (16)
+ *			        or NF_BPF_CT_OPTS_OLD_SZ (12)
+ *		      -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
+ *		      -ENONET - No network namespace found for netns_id
+ *		      -ENOENT - Conntrack lookup could not find entry for tuple
+ *		      -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
+ *				      or sizeof(tuple->ipv6)
+ * @l4proto	  - Layer 4 protocol
+ *		    Values:
+ *		      IPPROTO_TCP, IPPROTO_UDP
+ * @dir:	  - connection tracking tuple direction.
+ * @ct_zone_id	  - connection tracking zone id.
+ * @ct_zone_flags - connection tracking zone flags.
+ * @ct_zone_dir   - connection tracking zone direction.
  */
 struct bpf_ct_opts {
 	s32 netns_id;
 	s32 error;
 	u8 l4proto;
 	u8 dir;
-	u8 reserved[2];
+	u16 ct_zone_id;
+	u8 ct_zone_flags;
+	u8 ct_zone_dir;
 };
 
 enum {
-	NF_BPF_CT_OPTS_SZ = 12,
+	NF_BPF_CT_OPTS_SZ = 16,
+	NF_BPF_CT_OPTS_OLD_SZ = 12,
 };
 
 static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
@@ -104,11 +107,13 @@  __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
 			u32 timeout)
 {
 	struct nf_conntrack_tuple otuple, rtuple;
+	struct nf_conntrack_zone ct_zone;
 	struct nf_conn *ct;
 	int err;
 
-	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
-	    opts_len != NF_BPF_CT_OPTS_SZ)
+	if (!opts || !bpf_tuple)
+		return ERR_PTR(-EINVAL);
+	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
 		return ERR_PTR(-EINVAL);
 
 	if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
@@ -130,7 +135,16 @@  __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
 			return ERR_PTR(-ENONET);
 	}
 
-	ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
+	if (opts_len == NF_BPF_CT_OPTS_SZ) {
+		if (opts->ct_zone_dir == 0)
+			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
+		nf_ct_zone_init(&ct_zone,
+				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
+	} else {
+		ct_zone = nf_ct_zone_dflt;
+	}
+
+	ct = nf_conntrack_alloc(net, &ct_zone, &otuple, &rtuple,
 				GFP_ATOMIC);
 	if (IS_ERR(ct))
 		goto out;
@@ -152,11 +166,13 @@  static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
 {
 	struct nf_conntrack_tuple_hash *hash;
 	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_zone ct_zone;
 	struct nf_conn *ct;
 	int err;
 
-	if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
-	    opts_len != NF_BPF_CT_OPTS_SZ)
+	if (!opts || !bpf_tuple)
+		return ERR_PTR(-EINVAL);
+	if (!(opts_len == NF_BPF_CT_OPTS_SZ || opts_len == NF_BPF_CT_OPTS_OLD_SZ))
 		return ERR_PTR(-EINVAL);
 	if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
 		return ERR_PTR(-EPROTO);
@@ -174,7 +190,16 @@  static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
 			return ERR_PTR(-ENONET);
 	}
 
-	hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
+	if (opts_len == NF_BPF_CT_OPTS_SZ) {
+		if (opts->ct_zone_dir == 0)
+			opts->ct_zone_dir = NF_CT_DEFAULT_ZONE_DIR;
+		nf_ct_zone_init(&ct_zone,
+				opts->ct_zone_id, opts->ct_zone_dir, opts->ct_zone_flags);
+	} else {
+		ct_zone = nf_ct_zone_dflt;
+	}
+
+	hash = nf_conntrack_find_get(net, &ct_zone, &tuple);
 	if (opts->netns_id >= 0)
 		put_net(net);
 	if (!hash)
@@ -245,7 +270,7 @@  __bpf_kfunc_start_defs();
  * @opts	- Additional options for allocation (documented above)
  *		    Cannot be NULL
  * @opts__sz	- Length of the bpf_ct_opts structure
- *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ *		    Must be NF_BPF_CT_OPTS_SZ (16)
  */
 __bpf_kfunc struct nf_conn___init *
 bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -279,7 +304,7 @@  bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
  * @opts	- Additional options for lookup (documented above)
  *		    Cannot be NULL
  * @opts__sz	- Length of the bpf_ct_opts structure
- *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ *		    Must be NF_BPF_CT_OPTS_SZ (16)
  */
 __bpf_kfunc struct nf_conn *
 bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -312,7 +337,7 @@  bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
  * @opts	- Additional options for allocation (documented above)
  *		    Cannot be NULL
  * @opts__sz	- Length of the bpf_ct_opts structure
- *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ *		    Must be NF_BPF_CT_OPTS_SZ (16)
  */
 __bpf_kfunc struct nf_conn___init *
 bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
@@ -347,7 +372,7 @@  bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
  * @opts	- Additional options for lookup (documented above)
  *		    Cannot be NULL
  * @opts__sz	- Length of the bpf_ct_opts structure
- *		    Must be NF_BPF_CT_OPTS_SZ (12)
+ *		    Must be NF_BPF_CT_OPTS_SZ (16)
  */
 __bpf_kfunc struct nf_conn *
 bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,