@@ -3718,6 +3718,50 @@ union bpf_attr {
* never return NULL.
* Return
* A pointer pointing to the kernel percpu variable on this cpu.
+ *
+ * int bpf_mtu_check(void *ctx, u32 ifindex, u32 *mtu_result, s32 len_diff, u64 flags)
+ * Description
+ * Check ctx packet size against MTU of net device (based on
+ * *ifindex*). This helper will likely be used in combination with
+ * helpers that adjust/change the packet size. The argument
+ * *len_diff* can be used for querying with a planned size
+ * change. This allows to check MTU prior to changing packet ctx.
+ *
+ * The Linux kernel route table can configure MTUs on a more
+ * specific per route level, which is not provided by this helper.
+ * For route level MTU checks use the **bpf_fib_lookup**\ ()
+ * helper.
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** for tc cls_act programs.
+ *
+ * The *flags* argument can be a combination of one or more of the
+ * following values:
+ *
+ * **BPF_MTU_CHK_RELAX**
+ * This flag relax or increase the MTU with room for one
+ * VLAN header (4 bytes) and take into account net device
+ * hard_header_len. This relaxation is also used by the
+ * kernels own forwarding MTU checks.
+ *
+ * **BPF_MTU_CHK_GSO**
+ * This flag will only works for *ctx* **struct sk_buff**.
+ * If packet context contains extra packet segment buffers
+ * (often knows as frags), then those are also checked
+ * against the MTU size.
+ *
+ * Return
+ * * 0 on success, and populate MTU value in *mtu_result* pointer.
+ *
+ * * < 0 if any input argument is invalid (*mtu_result* not updated)
+ *
+ * MTU violations return positive values, but also populate MTU
+ * value in *mtu_result* pointer, as this can be needed for
+ * implemeting PMTU handing:
+ *
+ * * **BPF_MTU_CHK_RET_FRAG_NEEDED**
+ * * **BPF_MTU_CHK_RET_GSO_TOOBIG**
+ *
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3875,6 +3919,7 @@ union bpf_attr {
FN(redirect_neigh), \
FN(bpf_per_cpu_ptr), \
FN(bpf_this_cpu_ptr), \
+ FN(mtu_check), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4889,6 +4934,18 @@ struct bpf_fib_lookup {
__u8 dmac[6]; /* ETH_ALEN */
};
+/* bpf_mtu_check flags*/
+enum bpf_mtu_check_flags {
+ BPF_MTU_CHK_RELAX = (1U << 0),
+ BPF_MTU_CHK_GSO = (1U << 1),
+};
+
+enum bpf_mtu_check_ret {
+ BPF_MTU_CHK_RET_SUCCESS, /* check and lookup successful */
+ BPF_MTU_CHK_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+ BPF_MTU_CHK_RET_GSO_TOOBIG, /* GSO re-segmentation needed to fwd */
+};
+
enum bpf_task_fd_type {
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
BPF_FD_TYPE_TRACEPOINT, /* tp name */
@@ -5512,6 +5512,119 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
.arg4_type = ARG_ANYTHING,
};
+static int bpf_mtu_lookup(struct net *netns, u32 ifindex, u64 flags)
+{
+ struct net_device *dev;
+ int mtu;
+
+ dev = dev_get_by_index_rcu(netns, ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ mtu = dev->mtu;
+
+ /* Same relax as xdp_ok_fwd_dev() and is_skb_forwardable() */
+ if (flags & BPF_MTU_CHK_RELAX)
+ mtu += dev->hard_header_len + VLAN_HLEN;
+
+ return mtu;
+}
+
+static unsigned int __bpf_len_adjust_positive(unsigned int len, int len_diff)
+{
+ int len_new = len + len_diff; /* notice len_diff can be negative */
+
+ if (len_new > 0)
+ return len_new;
+
+ return 0;
+}
+
+BPF_CALL_5(bpf_skb_mtu_check, struct sk_buff *, skb,
+ u32, ifindex, u32 *, mtu_result, s32, len_diff, u64, flags)
+{
+ struct net *netns = dev_net(skb->dev);
+ int ret = BPF_MTU_CHK_RET_SUCCESS;
+ unsigned int len = skb->len;
+ int mtu;
+
+ if (flags & ~(BPF_MTU_CHK_RELAX|BPF_MTU_CHK_GSO))
+ return -EINVAL;
+
+ mtu = bpf_mtu_lookup(netns, ifindex, flags);
+ if (unlikely(mtu < 0))
+ return mtu; /* errno */
+
+ len = __bpf_len_adjust_positive(len, len_diff);
+ if (len > mtu) {
+ ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
+ goto out;
+ }
+
+ if (flags & BPF_MTU_CHK_GSO &&
+ skb_is_gso(skb) &&
+ skb_gso_validate_network_len(skb, mtu)) {
+ ret = BPF_MTU_CHK_RET_GSO_TOOBIG;
+ goto out;
+ }
+
+out:
+ if (mtu_result)
+ *mtu_result = mtu;
+
+ return ret;
+}
+
+BPF_CALL_5(bpf_xdp_mtu_check, struct xdp_buff *, xdp,
+ u32, ifindex, u32 *, mtu_result, s32, len_diff, u64, flags)
+{
+ unsigned int len = xdp->data_end - xdp->data;
+ struct net_device *dev = xdp->rxq->dev;
+ struct net *netns = dev_net(dev);
+ int ret = BPF_MTU_CHK_RET_SUCCESS;
+ int mtu;
+
+ /* XDP variant doesn't support multi-buffer segment check (yet) */
+ if (flags & ~BPF_MTU_CHK_RELAX)
+ return -EINVAL;
+
+ mtu = bpf_mtu_lookup(netns, ifindex, flags);
+ if (unlikely(mtu < 0))
+ return mtu; /* errno */
+
+ len = __bpf_len_adjust_positive(len, len_diff);
+ if (len > mtu) {
+ ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
+ goto out;
+ }
+out:
+ if (mtu_result)
+ *mtu_result = mtu;
+
+ return ret;}
+
+static const struct bpf_func_proto bpf_skb_mtu_check_proto = {
+ .func = bpf_skb_mtu_check,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto bpf_xdp_mtu_check_proto = {
+ .func = bpf_xdp_mtu_check,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_MEM,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
{
@@ -7075,6 +7188,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_uid_proto;
case BPF_FUNC_fib_lookup:
return &bpf_skb_fib_lookup_proto;
+ case BPF_FUNC_mtu_check:
+ return &bpf_skb_mtu_check_proto;
case BPF_FUNC_sk_fullsock:
return &bpf_sk_fullsock_proto;
case BPF_FUNC_sk_storage_get:
@@ -7144,6 +7259,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_adjust_tail_proto;
case BPF_FUNC_fib_lookup:
return &bpf_xdp_fib_lookup_proto;
+ case BPF_FUNC_mtu_check:
+ return &bpf_xdp_mtu_check_proto;
#ifdef CONFIG_INET
case BPF_FUNC_sk_lookup_udp:
return &bpf_xdp_sk_lookup_udp_proto;
This BPF-helper bpf_mtu_check() works for both XDP and TC-BPF programs. The API is designed to help the BPF-programmer, that want to do packet context size changes, which involves other helpers. These other helpers usually does a delta size adjustment. This helper also support a delta size (len_diff), which allow BPF-programmer to reuse arguments needed by these other helpers, and perform the MTU check prior to doing any actual size adjustment of the packet context. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> --- include/uapi/linux/bpf.h | 57 ++++++++++++++++++++++ net/core/filter.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+)