diff mbox series

[bpf-next,1/2] bpf: Add ifindex to bpf_sk_lookup

Message ID 20211015112336.1973229-2-markpash@cloudflare.com (mailing list archive)
State New
Headers show
Series Get ifindex in BPF_SK_LOOKUP prog type | expand

Commit Message

Mark Pashmfouroush Oct. 15, 2021, 11:23 a.m. UTC
It may be helpful to have access to the ifindex during bpf socket
lookup. Add this to the bpf_sk_lookup API.

Signed-off-by: Mark Pashmfouroush <markpash@cloudflare.com>

Comments

Alexei Starovoitov Oct. 21, 2021, 1:39 a.m. UTC | #1
On Fri, Oct 15, 2021 at 4:24 AM Mark Pashmfouroush
<markpash@cloudflare.com> wrote:
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 6fc59d61937a..9bd3e8b8a659 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -6262,6 +6262,7 @@ struct bpf_sk_lookup {
>         __u32 local_ip4;        /* Network byte order */
>         __u32 local_ip6[4];     /* Network byte order */
>         __u32 local_port;       /* Host byte order */
> +       __u32 ifindex;          /* Maps to skb->dev->ifindex */

Is the comment accurate?
The bpf_sk_lookup_kern ifindex is populated with inet_iif(skb).
Which is skb->skb_iif at this point (I think).
skb->dev->ifindex would typically mean destination or egress ifindex.
In __sk_buff we have 'ifindex' and 'ingress_ifindex' to differentiate them.
If it's really dev->ifindex than keeping 'ifindex' name here would be correct,
but looking at how it's populated in inet/udp_lookup makes me wonder
whether it should be named 'ingress_ifindex' instead and comment clarified.

If/when you resubmit please trim cc list to a minimum.
John Fastabend Oct. 21, 2021, 7 p.m. UTC | #2
Mark Pashmfouroush wrote:
> It may be helpful to have access to the ifindex during bpf socket
> lookup. Add this to the bpf_sk_lookup API.
> 
> Signed-off-by: Mark Pashmfouroush <markpash@cloudflare.com>
> 

Would be nice to have more details on the 'use case' here. I
don't know off-hand how it 'may be helpful'.

For the actual code though LGTM.

Acked-by: John Fastabend <john.fastabend@gmail.com>
John Fastabend Oct. 21, 2021, 7:07 p.m. UTC | #3
Alexei Starovoitov wrote:
> On Fri, Oct 15, 2021 at 4:24 AM Mark Pashmfouroush
> <markpash@cloudflare.com> wrote:
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 6fc59d61937a..9bd3e8b8a659 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -6262,6 +6262,7 @@ struct bpf_sk_lookup {
> >         __u32 local_ip4;        /* Network byte order */
> >         __u32 local_ip6[4];     /* Network byte order */
> >         __u32 local_port;       /* Host byte order */
> > +       __u32 ifindex;          /* Maps to skb->dev->ifindex */
> 
> Is the comment accurate?
> The bpf_sk_lookup_kern ifindex is populated with inet_iif(skb).
> Which is skb->skb_iif at this point (I think).
> skb->dev->ifindex would typically mean destination or egress ifindex.
> In __sk_buff we have 'ifindex' and 'ingress_ifindex' to differentiate them.
> If it's really dev->ifindex than keeping 'ifindex' name here would be correct,
> but looking at how it's populated in inet/udp_lookup makes me wonder
> whether it should be named 'ingress_ifindex' instead and comment clarified.
> 
> If/when you resubmit please trim cc list to a minimum.

At least in the tcp cases its coming from inet_iif which is either
the rtable or skb->skb_iif. Agree would be nice to fixup the comment.

Thanks.
diff mbox series

Patch

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 47f80adbe744..54ffd8036be6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1370,6 +1370,7 @@  struct bpf_sk_lookup_kern {
 		const struct in6_addr *daddr;
 	} v6;
 	struct sock	*selected_sk;
+	u32		ifindex;
 	bool		no_reuseport;
 };
 
@@ -1432,7 +1433,7 @@  extern struct static_key_false bpf_sk_lookup_enabled;
 static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
 					const __be32 saddr, const __be16 sport,
 					const __be32 daddr, const u16 dport,
-					struct sock **psk)
+					const int ifindex, struct sock **psk)
 {
 	struct bpf_prog_array *run_array;
 	struct sock *selected_sk = NULL;
@@ -1448,6 +1449,7 @@  static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
 			.v4.daddr	= daddr,
 			.sport		= sport,
 			.dport		= dport,
+			.ifindex	= ifindex,
 		};
 		u32 act;
 
@@ -1470,7 +1472,7 @@  static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 					const __be16 sport,
 					const struct in6_addr *daddr,
 					const u16 dport,
-					struct sock **psk)
+					const int ifindex, struct sock **psk)
 {
 	struct bpf_prog_array *run_array;
 	struct sock *selected_sk = NULL;
@@ -1486,6 +1488,7 @@  static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 			.v6.daddr	= daddr,
 			.sport		= sport,
 			.dport		= dport,
+			.ifindex	= ifindex,
 		};
 		u32 act;
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6fc59d61937a..9bd3e8b8a659 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6262,6 +6262,7 @@  struct bpf_sk_lookup {
 	__u32 local_ip4;	/* Network byte order */
 	__u32 local_ip6[4];	/* Network byte order */
 	__u32 local_port;	/* Host byte order */
+	__u32 ifindex;		/* Maps to skb->dev->ifindex */
 };
 
 /*
diff --git a/net/core/filter.c b/net/core/filter.c
index 4bace37a6a44..9514c6bbd117 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10491,6 +10491,7 @@  static bool sk_lookup_is_valid_access(int off, int size,
 	case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
 	case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
 	case bpf_ctx_range(struct bpf_sk_lookup, local_port):
+	case bpf_ctx_range(struct bpf_sk_lookup, ifindex):
 		bpf_ctx_record_field_size(info, sizeof(__u32));
 		return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
 
@@ -10580,6 +10581,12 @@  static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
 				      bpf_target_off(struct bpf_sk_lookup_kern,
 						     dport, 2, target_size));
 		break;
+
+	case offsetof(struct bpf_sk_lookup, ifindex):
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      bpf_target_off(struct bpf_sk_lookup_kern,
+						     ifindex, 4, target_size));
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 80aeaf9e6e16..088bb6c27114 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -305,7 +305,7 @@  static inline struct sock *inet_lookup_run_bpf(struct net *net,
 					       struct inet_hashinfo *hashinfo,
 					       struct sk_buff *skb, int doff,
 					       __be32 saddr, __be16 sport,
-					       __be32 daddr, u16 hnum)
+					       __be32 daddr, u16 hnum, const int dif)
 {
 	struct sock *sk, *reuse_sk;
 	bool no_reuseport;
@@ -313,8 +313,8 @@  static inline struct sock *inet_lookup_run_bpf(struct net *net,
 	if (hashinfo != &tcp_hashinfo)
 		return NULL; /* only TCP is supported */
 
-	no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
-					    saddr, sport, daddr, hnum, &sk);
+	no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport,
+					    daddr, hnum, dif, &sk);
 	if (no_reuseport || IS_ERR_OR_NULL(sk))
 		return sk;
 
@@ -338,7 +338,7 @@  struct sock *__inet_lookup_listener(struct net *net,
 	/* Lookup redirect from BPF */
 	if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
 		result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
-					     saddr, sport, daddr, hnum);
+					     saddr, sport, daddr, hnum, dif);
 		if (result)
 			goto done;
 	}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2a7825a5b842..f4ddfa38449e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -459,7 +459,7 @@  static struct sock *udp4_lookup_run_bpf(struct net *net,
 					struct udp_table *udptable,
 					struct sk_buff *skb,
 					__be32 saddr, __be16 sport,
-					__be32 daddr, u16 hnum)
+					__be32 daddr, u16 hnum, const int dif)
 {
 	struct sock *sk, *reuse_sk;
 	bool no_reuseport;
@@ -467,8 +467,8 @@  static struct sock *udp4_lookup_run_bpf(struct net *net,
 	if (udptable != &udp_table)
 		return NULL; /* only UDP is supported */
 
-	no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP,
-					    saddr, sport, daddr, hnum, &sk);
+	no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_UDP, saddr, sport,
+					    daddr, hnum, dif, &sk);
 	if (no_reuseport || IS_ERR_OR_NULL(sk))
 		return sk;
 
@@ -504,7 +504,7 @@  struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	/* Lookup redirect from BPF */
 	if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
 		sk = udp4_lookup_run_bpf(net, udptable, skb,
-					 saddr, sport, daddr, hnum);
+					 saddr, sport, daddr, hnum, dif);
 		if (sk) {
 			result = sk;
 			goto done;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 55c290d55605..8d25cb5d124b 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -165,7 +165,7 @@  static inline struct sock *inet6_lookup_run_bpf(struct net *net,
 						const struct in6_addr *saddr,
 						const __be16 sport,
 						const struct in6_addr *daddr,
-						const u16 hnum)
+						const u16 hnum, const int dif)
 {
 	struct sock *sk, *reuse_sk;
 	bool no_reuseport;
@@ -173,8 +173,8 @@  static inline struct sock *inet6_lookup_run_bpf(struct net *net,
 	if (hashinfo != &tcp_hashinfo)
 		return NULL; /* only TCP is supported */
 
-	no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP,
-					    saddr, sport, daddr, hnum, &sk);
+	no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport,
+					    daddr, hnum, dif, &sk);
 	if (no_reuseport || IS_ERR_OR_NULL(sk))
 		return sk;
 
@@ -198,7 +198,7 @@  struct sock *inet6_lookup_listener(struct net *net,
 	/* Lookup redirect from BPF */
 	if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
 		result = inet6_lookup_run_bpf(net, hashinfo, skb, doff,
-					      saddr, sport, daddr, hnum);
+					      saddr, sport, daddr, hnum, dif);
 		if (result)
 			goto done;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e505bb007e9f..77ba0917b3ea 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -194,7 +194,7 @@  static inline struct sock *udp6_lookup_run_bpf(struct net *net,
 					       const struct in6_addr *saddr,
 					       __be16 sport,
 					       const struct in6_addr *daddr,
-					       u16 hnum)
+					       u16 hnum, const int dif)
 {
 	struct sock *sk, *reuse_sk;
 	bool no_reuseport;
@@ -202,8 +202,8 @@  static inline struct sock *udp6_lookup_run_bpf(struct net *net,
 	if (udptable != &udp_table)
 		return NULL; /* only UDP is supported */
 
-	no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP,
-					    saddr, sport, daddr, hnum, &sk);
+	no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport,
+					    daddr, hnum, dif, &sk);
 	if (no_reuseport || IS_ERR_OR_NULL(sk))
 		return sk;
 
@@ -239,7 +239,7 @@  struct sock *__udp6_lib_lookup(struct net *net,
 	/* Lookup redirect from BPF */
 	if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
 		sk = udp6_lookup_run_bpf(net, udptable, skb,
-					 saddr, sport, daddr, hnum);
+					 saddr, sport, daddr, hnum, dif);
 		if (sk) {
 			result = sk;
 			goto done;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6fc59d61937a..9bd3e8b8a659 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6262,6 +6262,7 @@  struct bpf_sk_lookup {
 	__u32 local_ip4;	/* Network byte order */
 	__u32 local_ip6[4];	/* Network byte order */
 	__u32 local_port;	/* Host byte order */
+	__u32 ifindex;		/* Maps to skb->dev->ifindex */
 };
 
 /*