diff mbox series

[bpf-next,V1,1/2] bpf: BPF-helper for MTU checking add length input

Message ID 161340436275.1234345.10317671988851040002.stgit@firesoul (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Updates for BPF-helper bpf_check_mtu | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 11 maintainers not CCed: daniel@iogearbox.net andrii@kernel.org kuba@kernel.org hawk@kernel.org yhs@fb.com davem@davemloft.net john.fastabend@gmail.com kpsingh@kernel.org ast@kernel.org songliubraving@fb.com kafai@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 12238 this patch: 12238
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 66 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 12886 this patch: 12886
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Jesper Dangaard Brouer Feb. 15, 2021, 3:52 p.m. UTC
The FIB lookup example[1] show how the IP-header field tot_len
(iph->tot_len) is used as input to perform the MTU check.

This patch extend the BPF-helper bpf_check_mtu() with the same ability
to provide the length as user parameter input, via mtu_len parameter.

[1] samples/bpf/xdp_fwd_kern.c

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/uapi/linux/bpf.h |   17 +++++++++++------
 net/core/filter.c        |   12 ++++++++++--
 2 files changed, 21 insertions(+), 8 deletions(-)

Comments

John Fastabend Feb. 17, 2021, 8:20 p.m. UTC | #1
Jesper Dangaard Brouer wrote:
> The FIB lookup example[1] show how the IP-header field tot_len
> (iph->tot_len) is used as input to perform the MTU check.
> 
> This patch extend the BPF-helper bpf_check_mtu() with the same ability
> to provide the length as user parameter input, via mtu_len parameter.
> 
> [1] samples/bpf/xdp_fwd_kern.c
> 
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> ---
>  include/uapi/linux/bpf.h |   17 +++++++++++------
>  net/core/filter.c        |   12 ++++++++++--
>  2 files changed, 21 insertions(+), 8 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4c24daa43bac..9c8aa50dc8a5 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3850,8 +3850,7 @@ union bpf_attr {
>   *
>   * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
>   *	Description
> -
> - *		Check ctx packet size against exceeding MTU of net device (based
> + *		Check packet size against exceeding MTU of net device (based
>   *		on *ifindex*).  This helper will likely be used in combination
>   *		with helpers that adjust/change the packet size.
>   *
> @@ -3868,6 +3867,14 @@ union bpf_attr {
>   *		against the current net device.  This is practical if this isn't
>   *		used prior to redirect.
>   *
> + *		On input *mtu_len* must be a valid pointer, else verifier will
> + *		reject BPF program.  If the value *mtu_len* is initialized to
> + *		zero then the ctx packet size is use.  When value *mtu_len* is
> + *		provided as input this specify the L3 length that the MTU check
> + *		is done against. Remeber XDP and TC length operate at L2, but
                                 ^^^^^^^
typo, Remember

Acked-by: John Fastabend <john.fastabend@gmail.com>
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4c24daa43bac..9c8aa50dc8a5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3850,8 +3850,7 @@  union bpf_attr {
  *
  * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
  *	Description
-
- *		Check ctx packet size against exceeding MTU of net device (based
+ *		Check packet size against exceeding MTU of net device (based
  *		on *ifindex*).  This helper will likely be used in combination
  *		with helpers that adjust/change the packet size.
  *
@@ -3868,6 +3867,14 @@  union bpf_attr {
  *		against the current net device.  This is practical if this isn't
  *		used prior to redirect.
  *
+ *		On input *mtu_len* must be a valid pointer, else verifier will
+ *		reject BPF program.  If the value *mtu_len* is initialized to
+ *		zero then the ctx packet size is use.  When value *mtu_len* is
+ *		provided as input this specify the L3 length that the MTU check
+ *		is done against. Remeber XDP and TC length operate at L2, but
+ *		this value is L3 as this correlate to MTU and IP-header tot_len
+ *		values which are L3 (similar behavior as bpf_fib_lookup).
+ *
  *		The Linux kernel route table can configure MTUs on a more
  *		specific per route level, which is not provided by this helper.
  *		For route level MTU checks use the **bpf_fib_lookup**\ ()
@@ -3892,11 +3899,9 @@  union bpf_attr {
  *
  *		On return *mtu_len* pointer contains the MTU value of the net
  *		device.  Remember the net device configured MTU is the L3 size,
- *		which is returned here and XDP and TX length operate at L2.
+ *		which is returned here and XDP and TC length operate at L2.
  *		Helper take this into account for you, but remember when using
- *		MTU value in your BPF-code.  On input *mtu_len* must be a valid
- *		pointer and be initialized (to zero), else verifier will reject
- *		BPF program.
+ *		MTU value in your BPF-code.
  *
  *	Return
  *		* 0 on success, and populate MTU value in *mtu_len* pointer.
diff --git a/net/core/filter.c b/net/core/filter.c
index 7059cf604d94..fcc3bda85960 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5660,7 +5660,7 @@  BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
 	if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
 		return -EINVAL;
 
-	if (unlikely(flags & BPF_MTU_CHK_SEGS && len_diff))
+	if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len)))
 		return -EINVAL;
 
 	dev = __dev_via_ifindex(dev, ifindex);
@@ -5670,7 +5670,11 @@  BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
 	mtu = READ_ONCE(dev->mtu);
 
 	dev_len = mtu + dev->hard_header_len;
-	skb_len = skb->len + len_diff; /* minus result pass check */
+
+	/* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
+	skb_len = *mtu_len ? *mtu_len + dev->hard_header_len : skb->len;
+
+	skb_len += len_diff; /* minus result pass check */
 	if (skb_len <= dev_len) {
 		ret = BPF_MTU_CHK_RET_SUCCESS;
 		goto out;
@@ -5715,6 +5719,10 @@  BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
 	/* Add L2-header as dev MTU is L3 size */
 	dev_len = mtu + dev->hard_header_len;
 
+	/* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
+	if (*mtu_len)
+		xdp_len = *mtu_len + dev->hard_header_len;
+
 	xdp_len += len_diff; /* minus result pass check */
 	if (xdp_len > dev_len)
 		ret = BPF_MTU_CHK_RET_FRAG_NEEDED;