From patchwork Fri Feb 7 10:49:24 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Geliang Tang X-Patchwork-Id: 13964784 X-Patchwork-Delegate: matthieu.baerts@tessares.net Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 660592B9B9 for ; Fri, 7 Feb 2025 10:49:36 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738925377; cv=none; b=rWDWnEFVN5EiXZWAlpFN70ZoIiHt5b4IVDKO9upjatIsTZnPK03ngTpfI1OOXd9RFiK3Qb5xDCX3vvWPJX1DUeAaVOE3b2drDHGFnv8XUnnKK7tQmNgThhwFTEX6DqUovNDB70IKrj8BAK6qui+ECD20OVBZKJ7HHlpZPJS/u5c= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738925377; c=relaxed/simple; bh=5dLVV+WaAjRc0+IkEshtURdok0xhhbHnBqAq2GUfkdg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Mrl79BKuWctbViFOauGF/HEqqly0DssBpRBLEROJzJYv08LQYn/O00v4Ayx53FCExvd8Jp8lu5qCYf3PlqS6z4ig0M4x6HuLQUbvdGS5B8UYNYwRUf/eL0TGBD7TKuxOGQpBVc+LV1yNKekcw6ycY9BLneBEvV9dZvuwSYnWV3U= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=SA8XmgrO; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="SA8XmgrO" Received: by smtp.kernel.org (Postfix) with ESMTPSA id BD957C4CED6; Fri, 7 Feb 2025 10:49:35 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1738925376; bh=5dLVV+WaAjRc0+IkEshtURdok0xhhbHnBqAq2GUfkdg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=SA8XmgrOwiwr0vBxpkJFRuBD40vuUjKQgB7ltV4Z4W5oliGfKVcOom7OaxWRUdtuM AHbAJp7niRajY5YSAL+vqnB5zLnm9FxBbxUgwGtK/MepdkBMdvC30OOpbxVlDjuB5l q4A5vmgKyEobgkwnkxO8/KGC89Z3Q8S7YFpolVlysLw8yKTCmou1isoIivwDtq20vf 3f4wDiqgEQxdMZojDPashTcO7+KWyknmwMQm0Y4ac3nUWWKtOyUuHbzr03y+G2Zk1o DanZfH+B1pHs7ylsf4+ONUY4yZ0C3Qtl3zznu6DcM9gnNeH/h3HpV7wI393Ts3dDzE COyKeYMPWUsvA== From: Geliang Tang To: mptcp@lists.linux.dev Cc: Geliang Tang Subject: [PATCH mptcp-next v1 1/5] bpf: Add mptcp path manager struct_ops Date: Fri, 7 Feb 2025 18:49:24 +0800 Message-ID: X-Mailer: git-send-email 2.43.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Geliang Tang This patch implements a new struct bpf_struct_ops for MPTCP BPF path manager: bpf_mptcp_pm_ops. Register and unregister the bpf path manager in .reg and .unreg. Add write access for some fields of struct mptcp_addr_info, struct mptcp_pm_addr_entry and struct mptcp_sock in .btf_struct_access. This MPTCP BPF path manager implementation is similar to BPF TCP CC. And net/ipv4/bpf_tcp_ca.c is a frame of reference for this patch. Signed-off-by: Geliang Tang --- net/mptcp/bpf.c | 276 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 275 insertions(+), 1 deletion(-) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 4854160c25df..2676d6c64908 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -17,9 +17,282 @@ #include "protocol.h" #ifdef CONFIG_BPF_JIT +static struct bpf_struct_ops bpf_mptcp_pm_ops; +static u32 mptcp_sock_id, + mptcp_entry_id; + +/* MPTCP BPF path manager */ + +static const struct bpf_func_proto * +bpf_mptcp_pm_get_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_sk_storage_get: + return &bpf_sk_storage_get_proto; + case BPF_FUNC_sk_storage_delete: + return &bpf_sk_storage_delete_proto; + default: + return bpf_base_func_proto(func_id, prog); + } +} + +static int bpf_mptcp_pm_btf_struct_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size) +{ + u32 id = reg->btf_id; + size_t end; + + if (id == mptcp_sock_id) { + switch (off) { + case offsetof(struct mptcp_sock, pm.add_addr_signaled): + end = offsetofend(struct mptcp_sock, pm.add_addr_signaled); + break; + case offsetof(struct mptcp_sock, pm.local_addr_used): + end = offsetofend(struct mptcp_sock, pm.local_addr_used); + break; + case offsetof(struct mptcp_sock, pm.subflows): + end = offsetofend(struct mptcp_sock, pm.subflows); + break; + default: + bpf_log(log, "no write support to mptcp_sock at off %d\n", + off); + return -EACCES; + } + } else if (id == mptcp_entry_id) { + switch (off) { + case offsetof(struct mptcp_pm_addr_entry, addr.id): + end = offsetofend(struct mptcp_pm_addr_entry, addr.id); + break; + case offsetof(struct mptcp_pm_addr_entry, addr.family): + end = offsetofend(struct mptcp_pm_addr_entry, addr.family); + break; + case offsetof(struct mptcp_pm_addr_entry, addr.port): + end = offsetofend(struct mptcp_pm_addr_entry, addr.port); + break; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[0]): + end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[0]); + break; + case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[1]): + end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[1]); + break; + case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[2]): + end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[2]); + break; + case offsetof(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[3]): + end = offsetofend(struct mptcp_pm_addr_entry, addr.addr6.s6_addr32[3]); + break; +#else + case offsetof(struct mptcp_pm_addr_entry, addr.addr.s_addr): + end = offsetofend(struct mptcp_pm_addr_entry, addr.addr.s_addr); + break; +#endif + case offsetof(struct mptcp_pm_addr_entry, flags): + end = offsetofend(struct mptcp_pm_addr_entry, flags); + break; + case offsetof(struct mptcp_pm_addr_entry, ifindex): + end = offsetofend(struct mptcp_pm_addr_entry, ifindex); + break; + default: + bpf_log(log, "no write support to mptcp_pm_addr_entry at off %d\n", + off); + return -EACCES; + } + } else { + bpf_log(log, "only access to mptcp sock or addr or entry is supported\n"); + return -EACCES; + } + + if (off + size > end) { + bpf_log(log, "access beyond %s at off %u size %u ended at %zu", + id == mptcp_sock_id ? "mptcp_sock" : + (id == mptcp_entry_id ? "mptcp_pm_addr_entry" : "mptcp_addr_info"), + off, size, end); + return -EACCES; + } + + return NOT_INIT; +} + +static const struct bpf_verifier_ops bpf_mptcp_pm_verifier_ops = { + .get_func_proto = bpf_mptcp_pm_get_func_proto, + .is_valid_access = bpf_tracing_btf_ctx_access, + .btf_struct_access = bpf_mptcp_pm_btf_struct_access, +}; + +static int bpf_mptcp_pm_reg(void *kdata, struct bpf_link *link) +{ + return mptcp_pm_register(kdata); +} + +static void bpf_mptcp_pm_unreg(void *kdata, struct bpf_link *link) +{ + mptcp_pm_unregister(kdata); +} + +static int bpf_mptcp_pm_check_member(const struct btf_type *t, + const struct btf_member *member, + const struct bpf_prog *prog) +{ + return 0; +} + +static int bpf_mptcp_pm_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + const struct mptcp_pm_ops *upm; + struct mptcp_pm_ops *pm; + u32 moff; + + upm = (const struct mptcp_pm_ops *)udata; + pm = (struct mptcp_pm_ops *)kdata; + + moff = __btf_member_bit_offset(t, member) / 8; + switch (moff) { + case offsetof(struct mptcp_pm_ops, type): + pm->type = upm->type; + return 1; + } + + return 0; +} + +static int bpf_mptcp_pm_init(struct btf *btf) +{ + s32 type_id; + + type_id = btf_find_by_name_kind(btf, "mptcp_sock", + BTF_KIND_STRUCT); + if (type_id < 0) + return -EINVAL; + mptcp_sock_id = type_id; + + type_id = btf_find_by_name_kind(btf, "mptcp_pm_addr_entry", + BTF_KIND_STRUCT); + if (type_id < 0) + return -EINVAL; + mptcp_entry_id = type_id; + + return 0; +} + +static int bpf_mptcp_pm_validate(void *kdata) +{ + return mptcp_pm_validate(kdata); +} + +static int __bpf_mptcp_pm_address_created(struct mptcp_sock *msk) +{ + return 0; +} + +static int __bpf_mptcp_pm_address_established(struct mptcp_sock *msk) +{ + return 0; +} + +static int __bpf_mptcp_pm_address_closed(struct mptcp_sock *msk) +{ + return 0; +} + +static int __bpf_mptcp_pm_address_announced(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *addr) +{ + return 0; +} + +static int __bpf_mptcp_pm_address_removed(struct mptcp_sock *msk, u8 id) +{ + return 0; +} + +static int __bpf_mptcp_pm_subflow_established(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *local, + struct mptcp_addr_info *remote) +{ + return 0; +} + +static int __bpf_mptcp_pm_subflow_closed(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *local, + struct mptcp_addr_info *remote) +{ + return 0; +} + +static int __bpf_mptcp_pm_get_local_id(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *skc) +{ + return 0; +} + +static bool __bpf_mptcp_pm_get_priority(struct mptcp_sock *msk, + struct mptcp_addr_info *skc) +{ + return 0; +} + +static int __bpf_mptcp_pm_set_priority(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *local, + struct mptcp_addr_info *remote) +{ + return 0; +} + +static int __bpf_mptcp_pm_address_listener_created(struct mptcp_sock *msk) +{ + return 0; +} + +static int __bpf_mptcp_pm_address_listener_closed(struct mptcp_sock *msk) +{ + return 0; +} + +static void __bpf_mptcp_pm_init(struct mptcp_sock *msk) +{ +} + +static void __bpf_mptcp_pm_release(struct mptcp_sock *msk) +{ +} + +static struct mptcp_pm_ops __bpf_mptcp_pm_ops = { + .created = __bpf_mptcp_pm_address_created, + .established = __bpf_mptcp_pm_address_established, + .closed = __bpf_mptcp_pm_address_closed, + .address_announced = __bpf_mptcp_pm_address_announced, + .address_removed = __bpf_mptcp_pm_address_removed, + .subflow_established = __bpf_mptcp_pm_subflow_established, + .subflow_closed = __bpf_mptcp_pm_subflow_closed, + .get_local_id = __bpf_mptcp_pm_get_local_id, + .get_priority = __bpf_mptcp_pm_get_priority, + .set_priority = __bpf_mptcp_pm_set_priority, + .listener_created = __bpf_mptcp_pm_address_listener_created, + .listener_closed = __bpf_mptcp_pm_address_listener_closed, + .init = __bpf_mptcp_pm_init, + .release = __bpf_mptcp_pm_release, +}; + +static struct bpf_struct_ops bpf_mptcp_pm_ops = { + .verifier_ops = &bpf_mptcp_pm_verifier_ops, + .reg = bpf_mptcp_pm_reg, + .unreg = bpf_mptcp_pm_unreg, + .check_member = bpf_mptcp_pm_check_member, + .init_member = bpf_mptcp_pm_init_member, + .init = bpf_mptcp_pm_init, + .validate = bpf_mptcp_pm_validate, + .name = "mptcp_pm_ops", + .cfi_stubs = &__bpf_mptcp_pm_ops, +}; + static struct bpf_struct_ops bpf_mptcp_sched_ops; static const struct btf_type *mptcp_sock_type, *mptcp_subflow_type __read_mostly; -static u32 mptcp_sock_id, mptcp_subflow_id; +static u32 mptcp_subflow_id; static const struct bpf_func_proto * bpf_mptcp_sched_get_func_proto(enum bpf_func_id func_id, @@ -424,6 +697,7 @@ static int __init bpf_mptcp_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_mptcp_sched_kfunc_set); #ifdef CONFIG_BPF_JIT + ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_pm_ops, mptcp_pm_ops); ret = ret ?: register_bpf_struct_ops(&bpf_mptcp_sched_ops, mptcp_sched_ops); #endif From patchwork Fri Feb 7 10:49:25 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Geliang Tang X-Patchwork-Id: 13964785 X-Patchwork-Delegate: matthieu.baerts@tessares.net Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E40492B9B9 for ; Fri, 7 Feb 2025 10:49:38 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738925379; cv=none; b=rrhraz3rNxykRof+/gPy2DQHBilbqtFTHsaB71wQayAcuFxxLVee4DMYJsmMUBECLYcD4TdHdfuSC2bPRNOMwcSqNPonLukklk4x63loeYCVjTUuVEM1yGpjCUl/5MhihsR0upK4pS50WV68SwR4aRznd91bAEa8G5R4lYV3j3U= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738925379; c=relaxed/simple; bh=LWVquMCImtYFMCEXSDHJPCOgZQAcQQbXkcbEPXFPJqw=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=IPuZxLjtk1iZENZvqG+YP8E5dq6yW2jQ9dQQblSAda0EWYRG5Sn7Jxa6ufV6lWyFkVq0XV2HcoGVdX1U4vo0oeXHH2E68KapNviGZ1KIel4WD2VmuLcSiJRlo0ZKYh7ATPxveKMY/3pZ46pwJIzxSPgbsQMBK4OZOo+n8A3jH5Y= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Dnpvh5zU; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Dnpvh5zU" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7E332C4CED1; Fri, 7 Feb 2025 10:49:37 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1738925378; bh=LWVquMCImtYFMCEXSDHJPCOgZQAcQQbXkcbEPXFPJqw=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Dnpvh5zUEoigLX/xJKERy928bo4i98mVZYIY48hwnYbyEWlKQagaapInczaPTJr1S EK778KmjX+nPDB+yEPlVJU6S0/EJ9RPX5xeHN2ZuReq0k47EJSOL9HKyQabZHv6U2u r8v6Ox7Bee2xbPDdkvvboLssm+2veKUrDzLRuBgkPrqqxL/3S5Mu84Edmsslpt/f9+ FuDKDNOW1a1kXHdkMHEusWQmqTxIsn0fYeVevgnbkU9VqEnzYJtDPEUu0i0hWHGDmA AxohgQeA3w1rCwrCPxwkDUf9eE1t/IPwrkU6BDhBKxB8JA1l+d/efN6+mPI1tEFnuc Ea7bRcVcL0ymQ== From: Geliang Tang To: mptcp@lists.linux.dev Cc: Geliang Tang Subject: [PATCH mptcp-next v1 2/5] bpf: Register mptcp struct_ops kfunc set Date: Fri, 7 Feb 2025 18:49:25 +0800 Message-ID: <632c93b22ec0ec39fd65c4ca9f6cea359e8d885f.1738924875.git.tanggeliang@kylinos.cn> X-Mailer: git-send-email 2.43.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Geliang Tang This patch exports mptcp path manager helpers into BPF, adds these kfunc names into struct_ops kfunc_set and register this set with BPF_PROG_TYPE_STRUCT_OPS type. Signed-off-by: Geliang Tang --- net/mptcp/bpf.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c index 2676d6c64908..1f963c860f2e 100644 --- a/net/mptcp/bpf.c +++ b/net/mptcp/bpf.c @@ -15,6 +15,7 @@ #include #include #include "protocol.h" +#include "mib.h" #ifdef CONFIG_BPF_JIT static struct bpf_struct_ops bpf_mptcp_pm_ops; @@ -521,6 +522,15 @@ bpf_mptcp_subflow_ctx(const struct sock *sk) return NULL; } +__bpf_kfunc static struct sock * +bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) +{ + if (!subflow) + return NULL; + + return mptcp_subflow_tcp_sock(subflow); +} + __bpf_kfunc static int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it, struct mptcp_sock *msk) @@ -644,6 +654,71 @@ __bpf_kfunc static bool bpf_ipv4_is_private_10(__be32 addr) return ipv4_is_private_10(addr); } +__bpf_kfunc static void bpf_list_add_tail_rcu(struct list_head *new, + struct list_head *head) +{ + list_add_tail_rcu(new, head); +} + +__bpf_kfunc static void bpf_list_del_rcu(struct list_head *entry) +{ + list_del_rcu(entry); +} + +__bpf_kfunc static struct mptcp_pm_addr_entry * +bpf_sock_kmalloc_entry(struct sock *sk, int size, gfp_t priority) +{ + return sock_kmalloc(sk, size, priority); +} + +__bpf_kfunc static void +bpf_sock_kfree_entry(struct sock *sk, struct mptcp_pm_addr_entry *entry, + int size) +{ + sock_kfree_s(sk, entry, size); +} + +__bpf_kfunc static void bpf_bitmap_zero(unsigned long *dst__ign, unsigned int nbits) +{ + bitmap_zero(dst__ign, nbits); +} + +__bpf_kfunc static void bpf_set_bit(unsigned long nr, unsigned long *addr__ign) +{ + __set_bit(nr, addr__ign); +} + +__bpf_kfunc static __u8 bpf_find_next_zero_bit(const unsigned long *addr__ign, + unsigned long size, unsigned long offset) +{ + return find_next_zero_bit(addr__ign, size, offset); +} + +__bpf_kfunc static int +bpf_mptcp_subflow_connect(struct sock *sk, + const struct mptcp_pm_addr_entry *entry, + const struct mptcp_addr_info *remote) +{ + struct mptcp_pm_local local; + + local.addr = entry->addr; + local.flags = entry->flags; + local.ifindex = entry->ifindex; + + return __mptcp_subflow_connect(sk, &local, remote); +} + +__bpf_kfunc static struct net *bpf_sock_net(const struct sock *sk) +{ + return sock_net(sk); +} + +__bpf_kfunc static void BPF_MPTCP_INC_STATS(struct net *net, + enum linux_mptcp_mib_field field) +{ + MPTCP_INC_STATS(net, field); +} + __bpf_kfunc static bool bpf_mptcp_subflow_queues_empty(struct sock *sk) { return tcp_rtx_queue_empty(sk); @@ -653,6 +728,7 @@ __bpf_kfunc_end_defs(); BTF_KFUNCS_START(bpf_mptcp_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_mptcp_subflow_ctx, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_mptcp_subflow_tcp_sock, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_new, KF_ITER_NEW | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_mptcp_subflow_destroy, KF_ITER_DESTROY) @@ -662,6 +738,24 @@ BTF_ID_FLAGS(func, bpf_iter_mptcp_userspace_pm_addr_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_spin_lock_bh) BTF_ID_FLAGS(func, bpf_spin_unlock_bh) BTF_ID_FLAGS(func, bpf_ipv4_is_private_10) +BTF_ID_FLAGS(func, bpf_list_add_tail_rcu) +BTF_ID_FLAGS(func, bpf_list_del_rcu) +BTF_ID_FLAGS(func, bpf_sock_kmalloc_entry) +BTF_ID_FLAGS(func, bpf_sock_kfree_entry) +BTF_ID_FLAGS(func, mptcp_pm_alloc_anno_list) +BTF_ID_FLAGS(func, mptcp_pm_announce_addr) +BTF_ID_FLAGS(func, mptcp_pm_nl_addr_send_ack, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_bitmap_zero) +BTF_ID_FLAGS(func, bpf_set_bit) +BTF_ID_FLAGS(func, bpf_find_next_zero_bit) +BTF_ID_FLAGS(func, mptcp_pm_remove_addr) +BTF_ID_FLAGS(func, mptcp_pm_remove_addr_entry, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_mptcp_subflow_connect, KF_SLEEPABLE) +BTF_ID_FLAGS(func, mptcp_subflow_shutdown, KF_SLEEPABLE) +BTF_ID_FLAGS(func, mptcp_close_ssk, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_sock_net) +BTF_ID_FLAGS(func, BPF_MPTCP_INC_STATS) +BTF_ID_FLAGS(func, mptcp_pm_nl_mp_prio_send_ack, KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_mptcp_sock_acquire, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_mptcp_sock_release, KF_RELEASE) BTF_KFUNCS_END(bpf_mptcp_common_kfunc_ids) @@ -694,6 +788,8 @@ static int __init bpf_mptcp_kfunc_init(void) ret = register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCKOPT, &bpf_mptcp_common_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, + &bpf_mptcp_common_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_mptcp_sched_kfunc_set); #ifdef CONFIG_BPF_JIT From patchwork Fri Feb 7 10:49:26 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Geliang Tang X-Patchwork-Id: 13964786 X-Patchwork-Delegate: matthieu.baerts@tessares.net Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0FD8C2B9B9 for ; Fri, 7 Feb 2025 10:49:40 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738925381; cv=none; b=q5T0tcpDt4RKdsHBtYN+rFofIglTpIx72D7Kr+HrzSAt4HHqioJ7TJQ9iigCK30/IsTTOBSBagDm7XSoYfUxXtgsb6Wz1HsOsrDB1IRq5DAk/cWo5x1djCTHyUELuuQ+gkU6J6FeDxJQ8TBgbCivZQS3Ikow2qsar0LQincIGOA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738925381; c=relaxed/simple; bh=tCUPmZC8B4CYwZpaqxRTQNnL9SVCp8aQHUul6PyYCPU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=AsW4ZpbRe4rAzFJ0kDqdE/QH/6WsO0S7m5npvfyEcA+MopX3fCvXr++DRc9uO0cHXCEajMFdH5iI/Lp2lSW1i4VE5a/P+mK8YqR8mlKqQ4HlDr8UauWZBhSBMueRlHsgnB1vM4QydZJ07Jp5099lMu0fuXDfNuiOAKjadhScQJU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=KgTXl0+n; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="KgTXl0+n" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7143CC4CED1; Fri, 7 Feb 2025 10:49:39 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1738925380; bh=tCUPmZC8B4CYwZpaqxRTQNnL9SVCp8aQHUul6PyYCPU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=KgTXl0+nhoBgIeuDA1PUzeBAU/OgqY/MD15Gh/GM2V8s9AMC2WyJzQ27YvXaHv486 EZxrlPW/JHOLiaD6JOYOlgo3lunl1z5O3DeevE3XWC2H57PQnOxGJPwLQMoGbMsnsI Wy9HWwxBG6tzLT/NjKOjlcdUssWpW8jaa54oQogrjOE5WZiBMan+FNvZIBK7aKyGZA PdzB6Q3CxyK9vtnU9G5zmundKjKZTDeCGjPHZ2a3hVYmoTvyQaZajfUsPmxPaB2BiT 91GH90gZ2jWo0XrQE7dnSmvpgYeU2kf5wtwBey9fAeS05+TnF42Zv6hYb/ZWLET1jj JEoXw7Fe2wXFg== From: Geliang Tang To: mptcp@lists.linux.dev Cc: Geliang Tang Subject: [PATCH mptcp-next v1 3/5] selftests/bpf: Add mptcp userspace pm subtest Date: Fri, 7 Feb 2025 18:49:26 +0800 Message-ID: <816bc05971406156c8f805a0176c4f4d877d05d5.1738924875.git.tanggeliang@kylinos.cn> X-Mailer: git-send-email 2.43.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Geliang Tang To verify that the behavior of BPF path manager is the same as that of userspace pm in the kernel, a userspace pm self-test has been added. BPF path manager in the next commit will also use this test. Signed-off-by: Geliang Tang --- .../testing/selftests/bpf/prog_tests/mptcp.c | 160 ++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 3fff78781bac..cbe41bb39603 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -66,6 +66,12 @@ enum mptcp_pm_type { __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1, }; +enum mptcp_pm_family { + IPV4 = 0, + IPV4MAPPED, + IPV6, +}; + static const unsigned int total_bytes = 10 * 1024 * 1024; static int duration; @@ -779,6 +785,158 @@ static void test_iters_address(void) close(cgroup_fd); } +static int userspace_pm_add_addr(__u32 token, char *addr, __u8 id) +{ + return SYS_NOFAIL("ip netns exec %s %s ann %s id %u token %u", + NS_TEST, PM_CTL, addr, id, token); +} + +static int userspace_pm_rm_addr(__u32 token, __u8 id) +{ + return SYS_NOFAIL("ip netns exec %s %s rem id %u token %u", + NS_TEST, PM_CTL, id, token); +} + +static int userspace_pm_rm_subflow(__u32 token, char *addr, __u8 id) +{ + bool ipv6 = strstr(addr, ":"); + char line[1024], *str; + __u32 sport, dport; + + if (userspace_pm_get_events_line("type:10", line)) + return -1; + + str = strstr(line, "sport"); + if (!str || sscanf(str, "sport:%u,dport:%u,", &sport, &dport) != 2) { + log_err("rm_subflow error, str=%s\n", str); + return -1; + } + + str = ipv6 ? (strstr(addr, ".") ? "::ffff:"ADDR_1 : ADDR6_1) : ADDR_1; + return SYS_NOFAIL("ip netns exec %s %s dsf lip %s lport %u rip %s rport %u token %u", + NS_TEST, PM_CTL, addr, sport, str, dport, token); +} + +static int userspace_pm_set_flags(__u32 token, char *addr, char *flags) +{ + bool ipv6 = strstr(addr, ":"); + char line[1024], *str; + __u32 sport, dport; + + if (userspace_pm_get_events_line("type:10", line)) + return -1; + + str = strstr(line, "sport"); + if (!str || sscanf(str, "sport:%u,dport:%u,", &sport, &dport) != 2) { + log_err("set_flags error, str=%s\n", str); + return -1; + } + + str = ipv6 ? (strstr(addr, ".") ? "::ffff:"ADDR_1 : ADDR6_1) : ADDR_1; + return SYS_NOFAIL("ip netns exec %s %s set %s port %u rip %s rport %u flags %s token %u", + NS_TEST, PM_CTL, addr, sport, str, dport, flags, token); +} + +static void run_userspace_pm(enum mptcp_pm_family family) +{ + bool ipv4mapped = (family == IPV4MAPPED); + bool ipv6 = (family == IPV6 || ipv4mapped); + int server_fd, client_fd, accept_fd; + __u32 token; + char *addr; + int err; + + addr = ipv6 ? (ipv4mapped ? "::ffff:"ADDR_1 : ADDR6_1) : ADDR_1; + server_fd = start_mptcp_server(ipv6 ? AF_INET6 : AF_INET, addr, PORT_1, 0); + if (!ASSERT_OK_FD(server_fd, "start_mptcp_server")) + return; + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_OK_FD(client_fd, "connect_to_fd")) + goto close_server; + + accept_fd = accept(server_fd, NULL, NULL); + if (!ASSERT_OK_FD(accept_fd, "accept")) + goto close_client; + + token = userspace_pm_get_token(client_fd); + if (!token) + goto close_client; + recv_byte(accept_fd); + usleep(200000); /* 0.2s */ + + addr = ipv6 ? (ipv4mapped ? "::ffff:"ADDR_2 : ADDR6_2) : ADDR_2; + err = userspace_pm_add_subflow(token, addr, 100); + if (!ASSERT_OK(err, "userspace_pm_add_subflow 100")) + goto close_accept; + + send_byte(accept_fd); + recv_byte(client_fd); + + err = userspace_pm_set_flags(token, addr, "backup"); + if (!ASSERT_OK(err, "userspace_pm_set_flags backup")) + goto close_accept; + + send_byte(client_fd); + recv_byte(accept_fd); + + err = userspace_pm_set_flags(token, addr, "nobackup"); + if (!ASSERT_OK(err, "userspace_pm_set_flags nobackup")) + goto close_accept; + + send_byte(accept_fd); + recv_byte(client_fd); + + err = userspace_pm_rm_subflow(token, addr, 100); + if (!ASSERT_OK(err, "userspace_pm_rm_subflow 100")) + goto close_accept; + + send_byte(client_fd); + recv_byte(accept_fd); + + addr = ipv6 ? (ipv4mapped ? "::ffff:"ADDR_3 : ADDR6_3) : ADDR_3; + err = userspace_pm_add_addr(token, addr, 200); + if (!ASSERT_OK(err, "userspace_pm_add_addr 200")) + goto close_accept; + + send_byte(accept_fd); + recv_byte(client_fd); + + err = userspace_pm_rm_addr(token, 200); + if (!ASSERT_OK(err, "userspace_pm_rm_addr 200")) + goto close_accept; + + send_byte(client_fd); + recv_byte(accept_fd); + +close_accept: + close(accept_fd); +close_client: + close(client_fd); +close_server: + close(server_fd); +} + +static void test_userspace_pm(void) +{ + struct netns_obj *netns; + int err; + + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new")) + return; + + err = userspace_pm_init(MPTCP_PM_TYPE_USERSPACE); + if (!ASSERT_OK(err, "userspace_pm_init: userspace pm")) + goto fail; + + run_userspace_pm(IPV4MAPPED); + + userspace_pm_cleanup(); +fail: + netns_free(netns); +} + static struct netns_obj *sched_init(char *flags, char *sched) { struct netns_obj *netns; @@ -974,6 +1132,8 @@ void test_mptcp(void) test_iters_subflow(); if (test__start_subtest("iters_address")) test_iters_address(); + if (test__start_subtest("userspace_pm")) + test_userspace_pm(); if (test__start_subtest("default")) test_default(); if (test__start_subtest("first")) From patchwork Fri Feb 7 10:49:27 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Geliang Tang X-Patchwork-Id: 13964787 X-Patchwork-Delegate: matthieu.baerts@tessares.net Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B7E2C2B9B9 for ; Fri, 7 Feb 2025 10:49:42 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738925382; cv=none; b=flrwW4+9mVuQQOJX1Jxc192TA3JJhxUCFt6p7ldA9bI3Gryu8zwaiC3wWMocHZj8TYqqJdFSzSTg7zeaqEzEdRhaQQGQg/wiFbl/r9QNohVKVkzurjZHLQArfb175l3w8G2/MuYu8RkuuCNxijorNMwlPaR2h/zehQRqxSxxriw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738925382; c=relaxed/simple; bh=TAluxb8dZGbKvspSdFH8CgVcZECfYCHygPJpm7dANwU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=o9pyT1CC6fydFwgE1VBsayOoG0IhRXc9o+Hmn1tyDCvrEQ8G4C7VVCjB1kopXx6//1ShnKQAoPhbvXoCZ+eHJ3M2wpKvQKDPouNVBNvaz27+wFhtBzvoiYkSJETU5Dz1B57nuElFgpYu0XIqLS7FpR2SZj4O0lKdJM5Pr5gNVWE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=k0x5rh3X; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="k0x5rh3X" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 1FCD9C4CED6; Fri, 7 Feb 2025 10:49:40 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1738925382; bh=TAluxb8dZGbKvspSdFH8CgVcZECfYCHygPJpm7dANwU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=k0x5rh3XoOMr/lJwpk7uDWbHO0UVVMSti9XYJRvJHWdaS4kwkopPnwpwYF+hWlM0D hTsCAUYyjjnCa2R0mUwQkFMDmIkNqOMDXj8+FDZjxnfZUBTcjgGVMHX6TphLdjRPp5 /Fv3g+RMfzHNYD2+rY1J6f3b4H6K4PjGkTlt+15fkaSGdo5O+9CqFnxcPEr5olGK0h ii9r9fQSsrwoI7NH42IoVoE/Hp+HIuT3VKvszuUQrTsPONg9EEV3kYqJprOgawyfOP bu2a2rqX9X21t9WLdmkklSkMp/4IA8ajK9z2ACz0OUVk6/hd/36mc8tnOLXUrlqNPK E0RQNPlvLfJIQ== From: Geliang Tang To: mptcp@lists.linux.dev Cc: Geliang Tang Subject: [PATCH mptcp-next v1 4/5] selftests/bpf: Implement mptcp pm helpers in BPF Date: Fri, 7 Feb 2025 18:49:27 +0800 Message-ID: <174476b578f6665cc96d03f30ae8b440b84b8b07.1738924875.git.tanggeliang@kylinos.cn> X-Mailer: git-send-email 2.43.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Geliang Tang This patch implements MPTCP path manager helpers mptcp_pm_copy_addr(), mptcp_pm_copy_entry(), ipv6_addr_equal(), mptcp_addresses_equal() and mptcp_pm_find_ssk() in BPF. Signed-off-by: Geliang Tang --- tools/testing/selftests/bpf/progs/mptcp_bpf.h | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h index 5e29ac93d823..816917e59995 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -3,6 +3,7 @@ #define __MPTCP_BPF_H__ #include "bpf_experimental.h" +#include "bpf_tracing_net.h" /* mptcp helpers from include/net/mptcp.h */ #define MPTCP_SUBFLOWS_MAX 8 @@ -36,6 +37,11 @@ static inline int list_is_head(const struct list_head *list, #define mptcp_for_each_subflow(__msk, __subflow) \ list_for_each_entry(__subflow, &((__msk)->conn_list), node) +/* errno macros from include/uapi/asm-generic/errno-base.h */ +#define ESRCH 3 /* No such process */ +#define ENOMEM 12 /* Out of Memory */ +#define EINVAL 22 /* Invalid argument */ + static __always_inline struct sock * mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) { @@ -62,4 +68,108 @@ extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, extern struct mptcp_subflow_context * bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym; +/* reimplemented BPF helpers */ +static __always_inline void +mptcp_pm_copy_addr(struct mptcp_addr_info *dst, + struct mptcp_addr_info *src) +{ + dst->id = src->id; + dst->family = src->family; + dst->port = src->port; + + if (src->family == AF_INET) { + dst->addr.s_addr = src->addr.s_addr; + } else if (src->family == AF_INET6) { + dst->addr6.s6_addr32[0] = src->addr6.s6_addr32[0]; + dst->addr6.s6_addr32[1] = src->addr6.s6_addr32[1]; + dst->addr6.s6_addr32[2] = src->addr6.s6_addr32[2]; + dst->addr6.s6_addr32[3] = src->addr6.s6_addr32[3]; + } +} + +static __always_inline void +mptcp_pm_copy_entry(struct mptcp_pm_addr_entry *dst, + struct mptcp_pm_addr_entry *src) +{ + mptcp_pm_copy_addr(&dst->addr, &src->addr); + + dst->flags = src->flags; + dst->ifindex = src->ifindex; +} + +#define inet_sk(ptr) container_of(ptr, struct inet_sock, sk) + +#define ipv6_addr_equal(a, b) ((a).s6_addr32[0] == (b).s6_addr32[0] && \ + (a).s6_addr32[1] == (b).s6_addr32[1] && \ + (a).s6_addr32[2] == (b).s6_addr32[2] && \ + (a).s6_addr32[3] == (b).s6_addr32[3]) + +static __always_inline bool +mptcp_addresses_equal(const struct mptcp_addr_info *a, + const struct mptcp_addr_info *b, bool use_port) +{ + bool addr_equals = false; + + if (a->family == b->family) { + if (a->family == AF_INET) + addr_equals = a->addr.s_addr == b->addr.s_addr; + else + addr_equals = ipv6_addr_equal(a->addr6, b->addr6); + } + + if (!addr_equals) + return false; + if (!use_port) + return true; + + return a->port == b->port; +} + +static __always_inline struct sock * +mptcp_pm_find_ssk(struct mptcp_sock *msk, + const struct mptcp_addr_info *local, + const struct mptcp_addr_info *remote) +{ + struct mptcp_subflow_context *subflow; + + if (local->family != remote->family) + return NULL; + + bpf_for_each(mptcp_subflow, subflow, msk) { + const struct inet_sock *issk; + struct sock *ssk; + + ssk = bpf_mptcp_subflow_tcp_sock(subflow); + if (!ssk) + continue; + + if (local->family != ssk->sk_family) + continue; + + issk = bpf_core_cast(inet_sk(ssk), struct inet_sock); + + switch (ssk->sk_family) { + case AF_INET: + if (issk->inet_saddr != local->addr.s_addr || + issk->inet_daddr != remote->addr.s_addr) + continue; + break; + case AF_INET6: { + if (!ipv6_addr_equal(local->addr6, issk->pinet6->saddr) || + !ipv6_addr_equal(remote->addr6, ssk->sk_v6_daddr)) + continue; + break; + } + default: + continue; + } + + if (issk->inet_sport == local->port && + issk->inet_dport == remote->port) + return ssk; + } + + return NULL; +} + #endif From patchwork Fri Feb 7 10:49:28 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Geliang Tang X-Patchwork-Id: 13964788 X-Patchwork-Delegate: matthieu.baerts@tessares.net Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C67B82B9B9 for ; Fri, 7 Feb 2025 10:49:43 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738925383; cv=none; b=pridJqObyZyxeGJ49NcWWV8nMiNrgEedfUAyXlvuN6AqaD6v2xenQsKwZwHJx1kPAVKW4ThbmaXX6uE1++FbS0C8FxVoHB8e14gnkNDC2nYzyFqaWt+KEMev7qRdvJli2pI1xhr6/lG0iec9OkuFhqlNMCNED/wq2hUAqxK7aAs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738925383; c=relaxed/simple; bh=DQNMmzSTs1myTzu7XTVfAjAyJ0+npCjHGqgATb0s+5A=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=McO+PYJdWm1pzpc+Y8UKn4tTacplvESPx0QAjOS2v4nsQMyOMwP58uXKZMo5hPZk+5zipVkqGO/BrBS9qrLOZlMX2TRZ2B735OkhJkeFhjNvaWPp9fRyOxATwaDXqgD+K3xIvHMOR5n8bRSVe4vz00UyW6ZaqNLikkZ35h3hw7k= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Q8rpPsY8; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Q8rpPsY8" Received: by smtp.kernel.org (Postfix) with ESMTPSA id A7456C4CED1; Fri, 7 Feb 2025 10:49:42 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1738925383; bh=DQNMmzSTs1myTzu7XTVfAjAyJ0+npCjHGqgATb0s+5A=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Q8rpPsY8B4dqEOjiJnafxBdpOA3/iqzVQMIQ4WGx8I6rCYdnY1QA2/thqic3aFd4L 83H5dKeP00vwBGS1LSKvG9xXU6kUjW4r9gG/POYyw7mtd+nw3TXApb+ZkEpX5ZtY2m YQ0C36AvGbBK1vdE09m4ptZH5665DVT8mel6jnNo0wFp47H6AN9Ue5eto8kR3eQiAq d0L7tFiT6vWhYIJqligjfGEokM2kbroHqNe5PjgIv+vaJ0tu/F2LwlMxVNpKwZ99qq EGrp9S+T9OSo/R3FXkeUZEgSw9hsnvlBS8sGWeEpeEqpRu0Qgyn3c4kUyXu/dGBTHH wVSBUr4jxRTdQ== From: Geliang Tang To: mptcp@lists.linux.dev Cc: Geliang Tang Subject: [PATCH mptcp-next v1 5/5] selftests/bpf: Add mptcp bpf path manager subtest Date: Fri, 7 Feb 2025 18:49:28 +0800 Message-ID: <87f7786b26b7d0d84938db34871d82293da19e10.1738924875.git.tanggeliang@kylinos.cn> X-Mailer: git-send-email 2.43.0 In-Reply-To: References: Precedence: bulk X-Mailing-List: mptcp@lists.linux.dev List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Geliang Tang This patch adds an mptcp bpf userspace pm example program, implements address_announce, address_remove, subflow_create, subflow_destroy, get_local_id, is_backup, and set_flags interfaces using almost the same logic as the userspace pm in kernel. Signed-off-by: Geliang Tang --- .../testing/selftests/bpf/prog_tests/mptcp.c | 51 ++++ tools/testing/selftests/bpf/progs/mptcp_bpf.h | 75 +++++ .../bpf/progs/mptcp_bpf_userspace_pm.c | 275 ++++++++++++++++++ 3 files changed, 401 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index cbe41bb39603..bed6d2dda337 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -12,6 +12,7 @@ #include "mptcpify.skel.h" #include "mptcp_subflow.skel.h" #include "mptcp_bpf_iters.skel.h" +#include "mptcp_bpf_userspace_pm.skel.h" #include "mptcp_bpf_first.skel.h" #include "mptcp_bpf_bkup.skel.h" #include "mptcp_bpf_rr.skel.h" @@ -61,6 +62,7 @@ enum mptcp_pm_type { MPTCP_PM_TYPE_KERNEL = 0, MPTCP_PM_TYPE_USERSPACE, + MPTCP_PM_TYPE_BPF_USERSPACE, __MPTCP_PM_TYPE_NR, __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1, @@ -937,6 +939,53 @@ static void test_userspace_pm(void) netns_free(netns); } +static void test_bpf_path_manager(void) +{ + struct mptcp_bpf_userspace_pm *skel; + struct netns_obj *netns; + int err; + + skel = mptcp_bpf_userspace_pm__open(); + if (!ASSERT_OK_PTR(skel, "open: userspace_pm")) + return; + + err = bpf_program__set_flags(skel->progs.mptcp_userspace_pm_address_announced, + BPF_F_SLEEPABLE); + err = err ?: bpf_program__set_flags(skel->progs.mptcp_userspace_pm_address_removed, + BPF_F_SLEEPABLE); + err = err ?: bpf_program__set_flags(skel->progs.mptcp_userspace_pm_subflow_established, + BPF_F_SLEEPABLE); + err = err ?: bpf_program__set_flags(skel->progs.mptcp_userspace_pm_subflow_closed, + BPF_F_SLEEPABLE); + err = err ?: bpf_program__set_flags(skel->progs.mptcp_userspace_pm_set_priority, + BPF_F_SLEEPABLE); + if (!ASSERT_OK(err, "set sleepable flags")) + goto skel_destroy; + + if (!ASSERT_OK(mptcp_bpf_userspace_pm__load(skel), "load: userspace_pm")) + goto skel_destroy; + + err = mptcp_bpf_userspace_pm__attach(skel); + if (!ASSERT_OK(err, "attach: userspace_pm")) + goto skel_destroy; + + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new")) + goto skel_destroy; + + err = userspace_pm_init(MPTCP_PM_TYPE_BPF_USERSPACE); + if (!ASSERT_OK(err, "userspace_pm_init: bpf pm")) + goto close_netns; + + run_userspace_pm(skel->kconfig->CONFIG_MPTCP_IPV6 ? IPV6 : IPV4); + + userspace_pm_cleanup(); +close_netns: + netns_free(netns); +skel_destroy: + mptcp_bpf_userspace_pm__destroy(skel); +} + static struct netns_obj *sched_init(char *flags, char *sched) { struct netns_obj *netns; @@ -1134,6 +1183,8 @@ void test_mptcp(void) test_iters_address(); if (test__start_subtest("userspace_pm")) test_userspace_pm(); + if (test__start_subtest("bpf_path_manager")) + test_bpf_path_manager(); if (test__start_subtest("default")) test_default(); if (test__start_subtest("first")) diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h index 816917e59995..1abfd033b84f 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -42,6 +42,41 @@ static inline int list_is_head(const struct list_head *list, #define ENOMEM 12 /* Out of Memory */ #define EINVAL 22 /* Invalid argument */ +/* mptcp helpers from include/net/mptcp.h */ +#define U8_MAX ((u8)~0U) + +/* max value of mptcp_addr_info.id */ +#define MPTCP_PM_MAX_ADDR_ID U8_MAX + +/* mptcp macros from include/uapi/linux/mptcp.h */ +#define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0) +#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1) +#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2) +#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3) +#define MPTCP_PM_ADDR_FLAG_IMPLICIT (1 << 4) + +/* address families macros from include/linux/socket.h */ +#define AF_UNSPEC 0 +#define AF_INET 2 +#define AF_INET6 10 + +/* shutdown macros from include/net/sock.h */ +#define RCV_SHUTDOWN 1 +#define SEND_SHUTDOWN 2 + +/* GFP macros from include/linux/gfp_types.h */ +#define __AC(X,Y) (X##Y) +#define _AC(X,Y) __AC(X,Y) +#define _UL(x) (_AC(x, UL)) +#define UL(x) (_UL(x)) +#define BIT(nr) (UL(1) << (nr)) + +#define ___GFP_HIGH BIT(___GFP_HIGH_BIT) +#define __GFP_HIGH ((gfp_t)___GFP_HIGH) +#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT) +#define __GFP_KSWAPD_RECLAIM ((gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ +#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM) + static __always_inline struct sock * mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) { @@ -62,6 +97,46 @@ extern void bpf_spin_unlock_bh(spinlock_t *lock) __ksym; extern bool bpf_ipv4_is_private_10(__be32 addr) __ksym; +extern struct mptcp_pm_addr_entry * +bpf_sock_kmalloc_entry(struct sock *sk, int size, gfp_t priority) __ksym; +extern void +bpf_sock_kfree_entry(struct sock *sk, struct mptcp_pm_addr_entry *entry, + int size) __ksym; + +extern bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) __ksym; +extern int mptcp_pm_announce_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr, + bool echo) __ksym; +extern void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) __ksym; + +extern void bpf_bitmap_zero(unsigned long *dst, unsigned int nbits) __ksym; +extern void bpf_set_bit(unsigned long nr, unsigned long *addr) __ksym; +extern u8 bpf_find_next_zero_bit(const unsigned long *addr, + unsigned long size, unsigned long offset) __ksym; + +extern int mptcp_pm_remove_addr(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) __ksym; +extern void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry) __ksym; + +extern int bpf_mptcp_subflow_connect(struct sock *sk, + const struct mptcp_pm_addr_entry *entry, + const struct mptcp_addr_info *remote) __ksym; + +extern void +mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) __ksym; +extern void mptcp_close_ssk(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow) __ksym; +extern struct net *bpf_sock_net(const struct sock *sk) __ksym; +extern void BPF_MPTCP_INC_STATS(struct net *net, + enum linux_mptcp_mib_field field) __ksym; + +extern int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, + struct mptcp_addr_info *addr, + struct mptcp_addr_info *rem, + u8 bkup) __ksym; + extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled) __ksym; diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c new file mode 100644 index 000000000000..477d467a5ece --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_userspace_pm.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025, Kylin Software */ + +#include "mptcp_bpf.h" + +char _license[] SEC("license") = "GPL"; + +extern bool CONFIG_MPTCP_IPV6 __kconfig __weak; + +extern void bpf_list_add_tail_rcu(struct list_head *new, + struct list_head *head) __ksym; +extern void bpf_list_del_rcu(struct list_head *entry) __ksym; + +SEC("struct_ops") +void BPF_PROG(mptcp_userspace_pm_init, struct mptcp_sock *msk) +{ + bpf_printk("BPF userspace PM (%s)", + CONFIG_MPTCP_IPV6 ? "IPv6" : "IPv4"); +} + +SEC("struct_ops") +void BPF_PROG(mptcp_userspace_pm_release, struct mptcp_sock *msk) +{ +} + +static struct mptcp_pm_addr_entry * +mptcp_userspace_pm_lookup_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + struct mptcp_pm_addr_entry *entry; + + bpf_for_each(mptcp_userspace_pm_addr, entry, (struct sock *)msk) { + if (mptcp_addresses_equal(&entry->addr, addr, false)) + return entry; + } + return NULL; +} + +static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry, + bool needs_id) +{ + struct sock *sk = (struct sock *)msk; + unsigned long id_bitmap[4] = { 0 }; + struct mptcp_pm_addr_entry *e; + bool addr_match = false; + bool id_match = false; + int ret = -EINVAL; + + bpf_bitmap_zero(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + + bpf_spin_lock_bh(&msk->pm.lock); + bpf_for_each(mptcp_userspace_pm_addr, e, sk) { + addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); + if (addr_match && entry->addr.id == 0 && needs_id) + entry->addr.id = e->addr.id; + id_match = (e->addr.id == entry->addr.id); + if (addr_match || id_match) + break; + bpf_set_bit(e->addr.id, id_bitmap); + } + + if (!addr_match && !id_match) { + /* Memory for the entry is allocated from the + * sock option buffer. + */ + e = bpf_sock_kmalloc_entry(sk, sizeof(*e), GFP_ATOMIC); + if (!e) { + ret = -ENOMEM; + goto append_err; + } + + mptcp_pm_copy_entry(e, entry); + if (!e->addr.id && needs_id) + e->addr.id = bpf_find_next_zero_bit(id_bitmap, + MPTCP_PM_MAX_ADDR_ID + 1, + 1); + bpf_list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list); + msk->pm.local_addr_used++; + ret = e->addr.id; + } else if (addr_match && id_match) { + ret = entry->addr.id; + } + +append_err: + bpf_spin_unlock_bh(&msk->pm.lock); + return ret; +} + +SEC("struct_ops") +int BPF_PROG(mptcp_userspace_pm_address_announced, struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *local) +{ + int err; + + err = mptcp_userspace_pm_append_new_local_addr(msk, local, false); + if (err < 0) + return err; + + bpf_spin_lock_bh(&msk->pm.lock); + + if (mptcp_pm_alloc_anno_list(msk, &local->addr)) { + msk->pm.add_addr_signaled++; + mptcp_pm_announce_addr(msk, &local->addr, false); + mptcp_pm_nl_addr_send_ack(msk); + } + + bpf_spin_unlock_bh(&msk->pm.lock); + + return 0; +} + +static struct mptcp_pm_addr_entry * +mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) +{ + struct mptcp_pm_addr_entry *entry; + + bpf_for_each(mptcp_userspace_pm_addr, entry, (struct sock *)msk) { + if (entry->addr.id == id) + return entry; + } + return NULL; +} + +SEC("struct_ops") +int BPF_PROG(mptcp_userspace_pm_address_removed, struct mptcp_sock *msk, u8 id) +{ + struct mptcp_pm_addr_entry *entry; + + bpf_spin_lock_bh(&msk->pm.lock); + entry = mptcp_userspace_pm_lookup_addr_by_id(msk, id); + if (!entry) { + bpf_spin_unlock_bh(&msk->pm.lock); + return -EINVAL; + } + + bpf_list_del_rcu(&entry->list); + bpf_spin_unlock_bh(&msk->pm.lock); + + mptcp_pm_remove_addr_entry(msk, entry); + + bpf_sock_kfree_entry((struct sock *)msk, entry, sizeof(*entry)); + + return 0; +} + +static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *addr) +{ + struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry *entry; + + entry = mptcp_userspace_pm_lookup_addr(msk, &addr->addr); + if (!entry) + return -EINVAL; + + bpf_list_del_rcu(&entry->list); + bpf_sock_kfree_entry(sk, entry, sizeof(*entry)); + msk->pm.local_addr_used--; + return 0; +} + +SEC("struct_ops") +int BPF_PROG(mptcp_userspace_pm_subflow_established, struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *local, struct mptcp_addr_info *remote) +{ + struct sock *sk = (struct sock *)msk; + int err; + + err = mptcp_userspace_pm_append_new_local_addr(msk, local, false); + if (err < 0) + return err; + + err = bpf_mptcp_subflow_connect(sk, local, remote); + bpf_spin_lock_bh(&msk->pm.lock); + if (err) + mptcp_userspace_pm_delete_local_addr(msk, local); + else + msk->pm.subflows++; + bpf_spin_unlock_bh(&msk->pm.lock); + + return err; +} + +SEC("struct_ops") +int BPF_PROG(mptcp_userspace_pm_subflow_closed, struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *local, struct mptcp_addr_info *remote) +{ + struct sock *ssk, *sk = (struct sock *)msk; + struct mptcp_subflow_context *subflow; + + ssk = mptcp_pm_find_ssk(msk, &local->addr, remote); + if (!ssk) + return -ESRCH; + + subflow = bpf_mptcp_subflow_ctx(ssk); + if (!subflow) + return -EINVAL; + + bpf_spin_lock_bh(&msk->pm.lock); + mptcp_userspace_pm_delete_local_addr(msk, local); + bpf_spin_unlock_bh(&msk->pm.lock); + mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); + mptcp_close_ssk(sk, ssk, subflow); + BPF_MPTCP_INC_STATS(bpf_sock_net(sk), MPTCP_MIB_RMSUBFLOW); + + return 0; +} + +SEC("struct_ops") +int BPF_PROG(mptcp_userspace_pm_get_local_id, struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *skc) +{ + struct mptcp_pm_addr_entry *entry; + + bpf_spin_lock_bh(&msk->pm.lock); + entry = mptcp_userspace_pm_lookup_addr(msk, &skc->addr); + bpf_spin_unlock_bh(&msk->pm.lock); + if (entry) + return entry->addr.id; + + return mptcp_userspace_pm_append_new_local_addr(msk, skc, true); +} + +SEC("struct_ops") +bool BPF_PROG(mptcp_userspace_pm_get_priority, struct mptcp_sock *msk, + struct mptcp_addr_info *skc) +{ + struct mptcp_pm_addr_entry *entry; + bool backup; + + bpf_spin_lock_bh(&msk->pm.lock); + entry = mptcp_userspace_pm_lookup_addr(msk, skc); + backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + bpf_spin_unlock_bh(&msk->pm.lock); + + return backup; +} + +SEC("struct_ops") +int BPF_PROG(mptcp_userspace_pm_set_priority, struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *local, struct mptcp_addr_info *remote) +{ + struct mptcp_pm_addr_entry *entry; + u8 bkup = 0; + + if (local->flags & MPTCP_PM_ADDR_FLAG_BACKUP) + bkup = 1; + + bpf_spin_lock_bh(&msk->pm.lock); + entry = mptcp_userspace_pm_lookup_addr(msk, &local->addr); + if (entry) { + if (bkup) + entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else + entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + } + bpf_spin_unlock_bh(&msk->pm.lock); + + return mptcp_pm_nl_mp_prio_send_ack(msk, &local->addr, remote, bkup); +} + +SEC(".struct_ops.link") +struct mptcp_pm_ops userspace_pm = { + .address_announced = (void *)mptcp_userspace_pm_address_announced, + .address_removed = (void *)mptcp_userspace_pm_address_removed, + .subflow_established = (void *)mptcp_userspace_pm_subflow_established, + .subflow_closed = (void *)mptcp_userspace_pm_subflow_closed, + .get_local_id = (void *)mptcp_userspace_pm_get_local_id, + .get_priority = (void *)mptcp_userspace_pm_get_priority, + .set_priority = (void *)mptcp_userspace_pm_set_priority, + .init = (void *)mptcp_userspace_pm_init, + .release = (void *)mptcp_userspace_pm_release, + .type = MPTCP_PM_TYPE_BPF_USERSPACE, +};