diff mbox series

[bpf-next,v2,1/2] bpf: Add ifindex to bpf_sk_lookup

Message ID 20211104122304.962104-2-markpash@cloudflare.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Get ifindex in BPF_SK_LOOKUP prog type | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 11541 this patch: 11541
netdev/cc_maintainers warning 10 maintainers not CCed: brouer@redhat.com joe@cilium.io revest@chromium.org songliubraving@fb.com jackmanb@google.com yhs@fb.com kafai@fb.com liuhangbin@gmail.com kpsingh@kernel.org john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 2378 this patch: 2378
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 11262 this patch: 11262
netdev/checkpatch warning WARNING: line length of 85 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next success VM_Test
bpf/vmtest-bpf-next-PR success PR summary

Commit Message

Mark Pashmfouroush Nov. 4, 2021, 12:23 p.m. UTC
It may be helpful to have access to the ifindex during bpf socket
lookup. An example may be to scope certain socket lookup logic to
specific interfaces, i.e. an interface may be made exempt from custom
lookup code.

Add the ifindex of the arriving connection to the bpf_sk_lookup API.

Signed-off-by: Mark Pashmfouroush <markpash@cloudflare.com>

Comments

Yonghong Song Nov. 4, 2021, 6:06 p.m. UTC | #1
On 11/4/21 5:23 AM, Mark Pashmfouroush wrote:
> It may be helpful to have access to the ifindex during bpf socket
> lookup. An example may be to scope certain socket lookup logic to
> specific interfaces, i.e. an interface may be made exempt from custom
> lookup code.
> 
> Add the ifindex of the arriving connection to the bpf_sk_lookup API.
> 
> Signed-off-by: Mark Pashmfouroush <markpash@cloudflare.com>
> 
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 24b7ed2677af..0012a5176a32 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -1374,6 +1374,7 @@ struct bpf_sk_lookup_kern {
>   		const struct in6_addr *daddr;
>   	} v6;
>   	struct sock	*selected_sk;
> +	u32		ifindex;

In struct __sk_buff, we have two ifindex related fields:

         __u32 ingress_ifindex;
         __u32 ifindex;

Does newly-added ifindex corresponds to skb->ingress_ifindex or
skb->ifindex? From comments:
   > +	__u32 ifindex;		/* The arriving interface. Determined by inet_iif. */

looks like it corresponds to ingress? Should be use the name
ingress_ifindex to be consistent with __sk_buff?

>   	bool		no_reuseport;
>   };
>   
> @@ -1436,7 +1437,7 @@ extern struct static_key_false bpf_sk_lookup_enabled;
>   static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
>   					const __be32 saddr, const __be16 sport,
>   					const __be32 daddr, const u16 dport,
> -					struct sock **psk)
> +					const int ifindex, struct sock **psk)
>   {
>   	struct bpf_prog_array *run_array;
>   	struct sock *selected_sk = NULL;
> @@ -1452,6 +1453,7 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
>   			.v4.daddr	= daddr,
>   			.sport		= sport,
>   			.dport		= dport,
> +			.ifindex	= ifindex,
>   		};
>   		u32 act;
>   
> @@ -1474,7 +1476,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
>   					const __be16 sport,
>   					const struct in6_addr *daddr,
>   					const u16 dport,
> -					struct sock **psk)
> +					const int ifindex, struct sock **psk)
>   {
>   	struct bpf_prog_array *run_array;
>   	struct sock *selected_sk = NULL;
> @@ -1490,6 +1492,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
>   			.v6.daddr	= daddr,
>   			.sport		= sport,
>   			.dport		= dport,
> +			.ifindex	= ifindex,
>   		};
>   		u32 act;
>   
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index ba5af15e25f5..5b8618a4d485 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -6296,6 +6296,7 @@ struct bpf_sk_lookup {
>   	__u32 local_ip4;	/* Network byte order */
>   	__u32 local_ip6[4];	/* Network byte order */
>   	__u32 local_port;	/* Host byte order */
> +	__u32 ifindex;		/* The arriving interface. Determined by inet_iif. */
>   };
[...]
Jakub Sitnicki Nov. 5, 2021, 2:47 p.m. UTC | #2
On Thu, Nov 04, 2021 at 07:06 PM CET, 'Yonghong Song' via kernel-team+notifications wrote:
> On 11/4/21 5:23 AM, Mark Pashmfouroush wrote:
>> It may be helpful to have access to the ifindex during bpf socket
>> lookup. An example may be to scope certain socket lookup logic to
>> specific interfaces, i.e. an interface may be made exempt from custom
>> lookup code.
>> Add the ifindex of the arriving connection to the bpf_sk_lookup API.
>> Signed-off-by: Mark Pashmfouroush <markpash@cloudflare.com>
>> diff --git a/include/linux/filter.h b/include/linux/filter.h
>> index 24b7ed2677af..0012a5176a32 100644
>> --- a/include/linux/filter.h
>> +++ b/include/linux/filter.h
>> @@ -1374,6 +1374,7 @@ struct bpf_sk_lookup_kern {
>>   		const struct in6_addr *daddr;
>>   	} v6;
>>   	struct sock	*selected_sk;
>> +	u32		ifindex;
>
> In struct __sk_buff, we have two ifindex related fields:
>
>         __u32 ingress_ifindex;
>         __u32 ifindex;
>
> Does newly-added ifindex corresponds to skb->ingress_ifindex or
> skb->ifindex? From comments:
>   > +	__u32 ifindex;		/* The arriving interface. Determined by inet_iif. */
>
> looks like it corresponds to ingress? Should be use the name
> ingress_ifindex to be consistent with __sk_buff?
>

On ingress these two (skb->skb_iif and skb->dev-ifindex) are the same,
if I read the code correctly [1].

That said, I agree that ingress_ifindex would be less ambiguous (iif ->
ingress interface, can't get that wrong).

Also, as Yonghong points out __sk_buff and xdp_md context objects
already use this identifier for the same bit of information, so it will
be less of surprise.

[1] https://elixir.bootlin.com/linux/latest/source/net/core/dev.c#L5258

[...]
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 24b7ed2677af..0012a5176a32 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1374,6 +1374,7 @@  struct bpf_sk_lookup_kern {
 		const struct in6_addr *daddr;
 	} v6;
 	struct sock	*selected_sk;
+	u32		ifindex;
 	bool		no_reuseport;
 };
 
@@ -1436,7 +1437,7 @@  extern struct static_key_false bpf_sk_lookup_enabled;
 static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
 					const __be32 saddr, const __be16 sport,
 					const __be32 daddr, const u16 dport,
-					struct sock **psk)
+					const int ifindex, struct sock **psk)
 {
 	struct bpf_prog_array *run_array;
 	struct sock *selected_sk = NULL;
@@ -1452,6 +1453,7 @@  static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
 			.v4.daddr	= daddr,
 			.sport		= sport,
 			.dport		= dport,
+			.ifindex	= ifindex,
 		};
 		u32 act;
 
@@ -1474,7 +1476,7 @@  static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 					const __be16 sport,
 					const struct in6_addr *daddr,
 					const u16 dport,
-					struct sock **psk)
+					const int ifindex, struct sock **psk)
 {
 	struct bpf_prog_array *run_array;
 	struct sock *selected_sk = NULL;
@@ -1490,6 +1492,7 @@  static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 			.v6.daddr	= daddr,
 			.sport		= sport,
 			.dport		= dport,
+			.ifindex	= ifindex,
 		};
 		u32 act;
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ba5af15e25f5..5b8618a4d485 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6296,6 +6296,7 @@  struct bpf_sk_lookup {
 	__u32 local_ip4;	/* Network byte order */
 	__u32 local_ip6[4];	/* Network byte order */
 	__u32 local_port;	/* Host byte order */
+	__u32 ifindex;		/* The arriving interface. Determined by inet_iif. */
 };
 
 /*
diff --git a/net/core/filter.c b/net/core/filter.c
index 8e8d3b49c297..1b83111a996f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10491,6 +10491,7 @@  static bool sk_lookup_is_valid_access(int off, int size,
 	case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
 	case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
 	case bpf_ctx_range(struct bpf_sk_lookup, local_port):
+	case bpf_ctx_range(struct bpf_sk_lookup, ifindex):
 		bpf_ctx_record_field_size(info, sizeof(__u32));
 		return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
 
@@ -10580,6 +10581,12 @@  static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
 				      bpf_target_off(struct bpf_sk_lookup_kern,
 						     dport, 2, target_size));
 		break;
+
+	case offsetof(struct bpf_sk_lookup, ifindex):
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      bpf_target_off(struct bpf_sk_lookup_kern,
+						     ifindex, 4, target_size));
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 75737267746f..30ab717ff1b8 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -307,7 +307,7 @@  static inline struct sock *inet_lookup_run_bpf(struct net *net,
 					       struct inet_hashinfo *hashinfo,
 					       struct sk_buff *skb, int doff,
 					       __be32 saddr, __be16 sport,
-					       __be32 daddr, u16 hnum)
+					       __be32 daddr, u16 hnum, const int dif)
 {
 	struct sock *sk, *reuse_sk;
 	bool no_reuseport;
@@ -315,8 +315,8 @@  static inline struct sock *inet_lookup_run_bpf(struct net *net,
 	if (hashinfo != &tcp_hashinfo)
 		return NULL; /* only TCP is supported */
 
-	no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
-					    saddr, sport, daddr, hnum, &sk);
+	no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport,
+					    daddr, hnum, dif, &sk);
 	if (no_reuseport || IS_ERR_OR_NULL(sk))
 		return sk;
 
@@ -340,7 +340,7 @@  struct sock *__inet_lookup_listener(struct net *net,
 	/* Lookup redirect from BPF */
 	if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
 		result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
-					     saddr, sport, daddr, hnum);
+					     saddr, sport, daddr, hnum, dif);
 		if (result)
 			goto done;
 	}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2fffcf2b54f3..5fceee3de65d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -460,7 +460,7 @@  static struct sock *udp4_lookup_run_bpf(struct net *net,
 					struct udp_table *udptable,
 					struct sk_buff *skb,
 					__be32 saddr, __be16 sport,
-					__be32 daddr, u16 hnum)
+					__be32 daddr, u16 hnum, const int dif)
 {
 	struct sock *sk, *reuse_sk;
 	bool no_reuseport;
@@ -468,8 +468,8 @@  static struct sock *udp4_lookup_run_bpf(struct net *net,
 	if (udptable != &udp_table)
 		return NULL; /* only UDP is supported */
 
-	no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP,
-					    saddr, sport, daddr, hnum, &sk);
+	no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport,
+					    daddr, hnum, dif, &sk);
 	if (no_reuseport || IS_ERR_OR_NULL(sk))
 		return sk;
 
@@ -505,7 +505,7 @@  struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	/* Lookup redirect from BPF */
 	if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
 		sk = udp4_lookup_run_bpf(net, udptable, skb,
-					 saddr, sport, daddr, hnum);
+					 saddr, sport, daddr, hnum, dif);
 		if (sk) {
 			result = sk;
 			goto done;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 67c9114835c8..4514444e96c8 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -165,7 +165,7 @@  static inline struct sock *inet6_lookup_run_bpf(struct net *net,
 						const struct in6_addr *saddr,
 						const __be16 sport,
 						const struct in6_addr *daddr,
-						const u16 hnum)
+						const u16 hnum, const int dif)
 {
 	struct sock *sk, *reuse_sk;
 	bool no_reuseport;
@@ -173,8 +173,8 @@  static inline struct sock *inet6_lookup_run_bpf(struct net *net,
 	if (hashinfo != &tcp_hashinfo)
 		return NULL; /* only TCP is supported */
 
-	no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP,
-					    saddr, sport, daddr, hnum, &sk);
+	no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport,
+					    daddr, hnum, dif, &sk);
 	if (no_reuseport || IS_ERR_OR_NULL(sk))
 		return sk;
 
@@ -198,7 +198,7 @@  struct sock *inet6_lookup_listener(struct net *net,
 	/* Lookup redirect from BPF */
 	if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
 		result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
-					      saddr, sport, daddr, hnum);
+					      saddr, sport, daddr, hnum, dif);
 		if (result)
 			goto done;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 12c12619ee35..ea4ea525f94a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -195,7 +195,7 @@  static inline struct sock *udp6_lookup_run_bpf(struct net *net,
 					       const struct in6_addr *saddr,
 					       __be16 sport,
 					       const struct in6_addr *daddr,
-					       u16 hnum)
+					       u16 hnum, const int dif)
 {
 	struct sock *sk, *reuse_sk;
 	bool no_reuseport;
@@ -203,8 +203,8 @@  static inline struct sock *udp6_lookup_run_bpf(struct net *net,
 	if (udptable != &udp_table)
 		return NULL; /* only UDP is supported */
 
-	no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP,
-					    saddr, sport, daddr, hnum, &sk);
+	no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport,
+					    daddr, hnum, dif, &sk);
 	if (no_reuseport || IS_ERR_OR_NULL(sk))
 		return sk;
 
@@ -240,7 +240,7 @@  struct sock *__udp6_lib_lookup(struct net *net,
 	/* Lookup redirect from BPF */
 	if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
 		sk = udp6_lookup_run_bpf(net, udptable, skb,
-					 saddr, sport, daddr, hnum);
+					 saddr, sport, daddr, hnum, dif);
 		if (sk) {
 			result = sk;
 			goto done;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ba5af15e25f5..5b8618a4d485 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6296,6 +6296,7 @@  struct bpf_sk_lookup {
 	__u32 local_ip4;	/* Network byte order */
 	__u32 local_ip6[4];	/* Network byte order */
 	__u32 local_port;	/* Host byte order */
+	__u32 ifindex;		/* The arriving interface. Determined by inet_iif. */
 };
 
 /*