From patchwork Thu Jan 16 17:18:49 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942055 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 2D70222DC5D; Thu, 16 Jan 2025 17:19:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047962; cv=none; b=mlYt6OaN1AxmGf1sIRz1b2IcyW8DKmeKqPfPfu/oyDex5WwgUOIVilGhoO4NaL7k8pyFXcAe209GyBq3fnks+knZZ4uXnk02U93PB1TX+uH7xt5C632tf+Of2+9sW1IMGLRmt2FRvthqjm7T9n9Z5AOYuf4ZCp8vUSvzcNAw6yI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047962; c=relaxed/simple; bh=U+nBK0+llVcRBaMUE8DpCRqKcQW+7j9d1BPr4Uj0mP0=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=lQEI4cKuI53vMBCoClXaZD4fOmwAQ8EIY2t2ulrKgE05jI4SeSQm0R57HeoIslIQnmTRCHcT/DN07KI0PcEfbQwmgrgFTF28cikMftaCz7+LBISfZMToon/bWgZeYOmVmcPHFJnzJ6Y7WIvWwhzjxkLtkslPrjoXsMvlJGQmk+k= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 01/14] netfilter: nf_tables: fix set size with rbtree backend Date: Thu, 16 Jan 2025 18:18:49 +0100 Message-Id: <20250116171902.1783620-2-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org The existing rbtree implementation uses singleton elements to represent ranges, however, userspace provides a set size according to the number of ranges in the set. Adjust provided userspace set size to the number of singleton elements in the kernel by multiplying the range by two. Check if the no-match all-zero element is already in the set, in such case release one slot in the set size. Fixes: 0ed6389c483d ("netfilter: nf_tables: rename set implementations") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 ++ net/netfilter/nf_tables_api.c | 49 +++++++++++++++++++++++++++++-- net/netfilter/nft_set_rbtree.c | 43 +++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 0027beca5cd5..7dcea247f853 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -495,6 +495,9 @@ struct nft_set_ops { const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags); + u32 (*ksize)(u32 size); + u32 (*usize)(u32 size); + u32 (*adjust_maxsize)(const struct nft_set *set); void (*commit)(struct nft_set *set); void (*abort)(const struct nft_set *set); u64 (*privsize)(const struct nlattr * const nla[], diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 83f3face8bb3..de9c4335ef47 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4752,6 +4752,14 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb, return 0; } +static u32 nft_set_userspace_size(const struct nft_set_ops *ops, u32 size) +{ + if (ops->usize) + return ops->usize(size); + + return size; +} + static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, const struct nft_set *set, u16 event, u16 flags) { @@ -4822,7 +4830,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, if (!nest) goto nla_put_failure; if (set->size && - nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size))) + nla_put_be32(skb, NFTA_SET_DESC_SIZE, + htonl(nft_set_userspace_size(set->ops, set->size)))) goto nla_put_failure; if (set->field_count > 1 && @@ -5190,6 +5199,15 @@ static bool nft_set_is_same(const struct nft_set *set, return true; } +static u32 nft_set_kernel_size(const struct nft_set_ops *ops, + const struct nft_set_desc *desc) +{ + if (ops->ksize) + return ops->ksize(desc->size); + + return desc->size; +} + static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { @@ -5372,6 +5390,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (err < 0) return err; + if (desc.size) + desc.size = nft_set_kernel_size(set->ops, &desc); + err = 0; if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); @@ -5394,6 +5415,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (IS_ERR(ops)) return PTR_ERR(ops); + if (desc.size) + desc.size = nft_set_kernel_size(ops, &desc); + udlen = 0; if (nla[NFTA_SET_USERDATA]) udlen = nla_len(nla[NFTA_SET_USERDATA]); @@ -7050,6 +7074,27 @@ static bool nft_setelem_valid_key_end(const struct nft_set *set, return true; } +static u32 nft_set_maxsize(const struct nft_set *set) +{ + u32 maxsize, delta; + + if (!set->size) + return UINT_MAX; + + if (set->ops->adjust_maxsize) + delta = set->ops->adjust_maxsize(set); + else + delta = 0; + + if (check_add_overflow(set->size, set->ndeact, &maxsize)) + return UINT_MAX; + + if (check_add_overflow(maxsize, delta, &maxsize)) + return UINT_MAX; + + return maxsize; +} + static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) { @@ -7422,7 +7467,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } if (!(flags & NFT_SET_ELEM_CATCHALL)) { - unsigned int max = set->size ? set->size + set->ndeact : UINT_MAX; + unsigned int max = nft_set_maxsize(set); if (!atomic_add_unless(&set->nelems, 1, max)) { err = -ENFILE; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index b7ea21327549..2e8ef16ff191 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -750,6 +750,46 @@ static void nft_rbtree_gc_init(const struct nft_set *set) priv->last_gc = jiffies; } +/* rbtree stores ranges as singleton elements, each range is composed of two + * elements ... + */ +static u32 nft_rbtree_ksize(u32 size) +{ + return size * 2; +} + +/* ... hide this detail to userspace. */ +static u32 nft_rbtree_usize(u32 size) +{ + if (!size) + return 0; + + return size / 2; +} + +static u32 nft_rbtree_adjust_maxsize(const struct nft_set *set) +{ + struct nft_rbtree *priv = nft_set_priv(set); + struct nft_rbtree_elem *rbe; + struct rb_node *node; + const void *key; + + node = rb_last(&priv->root); + if (!node) + return 0; + + rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (!nft_rbtree_interval_end(rbe)) + return 0; + + key = nft_set_ext_key(&rbe->ext); + if (memchr(key, 1, set->klen)) + return 0; + + /* this is the all-zero no-match element. */ + return 1; +} + const struct nft_set_type nft_set_rbtree_type = { .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .ops = { @@ -768,5 +808,8 @@ const struct nft_set_type nft_set_rbtree_type = { .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .get = nft_rbtree_get, + .ksize = nft_rbtree_ksize, + .usize = nft_rbtree_usize, + .adjust_maxsize = nft_rbtree_adjust_maxsize, }, }; From patchwork Thu Jan 16 17:18:50 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942056 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 97F621917F1; Thu, 16 Jan 2025 17:19:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047962; cv=none; b=d3vT+Bm3N9Sa0beNr8gDK2lTTmapTBSdjyu8w2Enj8MJw70O5I2Jk/5AmEyDZ0N5k+fhyk6NCWWO3CGwJbCygXjUG125kHO9iZp7TzjxHRskPqgNwBuLAS7KtfFR/1b3r3vZ/9jMSVhv+m46ZRmSL6+0tggK0713bC0phF4JWIs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047962; c=relaxed/simple; bh=sD4stBf1DJAqhPgon8HexB2qc/HARSwB2akVvF2B5O0=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=RWfjo+q+Hst1wOGq3IWNk2HrfYDMpRI1V3pcqUkgtG5x1Yw38QchwaPHlOnDdcilnvtvG2AtS0rdAlp9n4YqP4Q8lH363t14SpAmxS4lEC0qniqS5Y8JvH2Xbq8ec9oCpOPZnwOW5Q9UGdmvIihZwvC5zyPhmwWfRzUFioWrQjg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 02/14] netfilter: br_netfilter: remove unused conditional and dead code Date: Thu, 16 Jan 2025 18:18:50 +0100 Message-Id: <20250116171902.1783620-3-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org From: Antoine Tenart The SKB_DROP_REASON_IP_INADDRERRORS drop reason is never returned from any function, as such it cannot be returned from the ip_route_input call tree. The 'reason != SKB_DROP_REASON_IP_INADDRERRORS' conditional is thus always true. Looking back at history, commit 50038bf38e65 ("net: ip: make ip_route_input() return drop reasons") changed the ip_route_input returned value check in br_nf_pre_routing_finish from -EHOSTUNREACH to SKB_DROP_REASON_IP_INADDRERRORS. It turns out -EHOSTUNREACH could not be returned either from the ip_route_input call tree and this since commit 251da4130115 ("ipv4: Cache ip_error() routes even when not forwarding."). Not a fix as this won't change the behavior. While at it use kfree_skb_reason. Signed-off-by: Antoine Tenart Reviewed-by: Simon Horman --- net/bridge/br_netfilter_hooks.c | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 451e45b9a6a5..94cbe967d1c1 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -393,38 +393,10 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ reason = ip_route_input(skb, iph->daddr, iph->saddr, ip4h_dscp(iph), dev); if (reason) { - struct in_device *in_dev = __in_dev_get_rcu(dev); - - /* If err equals -EHOSTUNREACH the error is due to a - * martian destination or due to the fact that - * forwarding is disabled. For most martian packets, - * ip_route_output_key() will fail. It won't fail for 2 types of - * martian destinations: loopback destinations and destination - * 0.0.0.0. In both cases the packet will be dropped because the - * destination is the loopback device and not the bridge. */ - if (reason != SKB_DROP_REASON_IP_INADDRERRORS || !in_dev || - IN_DEV_FORWARD(in_dev)) - goto free_skb; - - rt = ip_route_output(net, iph->daddr, 0, - ip4h_dscp(iph), 0, - RT_SCOPE_UNIVERSE); - if (!IS_ERR(rt)) { - /* - Bridged-and-DNAT'ed traffic doesn't - * require ip_forwarding. */ - if (rt->dst.dev == dev) { - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - goto bridged_dnat; - } - ip_rt_put(rt); - } -free_skb: - kfree_skb(skb); + kfree_skb_reason(skb, reason); return 0; } else { if (skb_dst(skb)->dev == dev) { -bridged_dnat: skb->dev = br_indev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); From patchwork Thu Jan 16 17:18:51 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942058 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 97F06153808; Thu, 16 Jan 2025 17:19:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047963; cv=none; b=fCYTJMPptQwKWwBWwMg2wNgItBgJe+nJKJllQdQLEVBk6O3fh/hs+g8pyK1NHb7/KeG9Ul5ODX4qB34Av7MCPHEsmFYduj+zl2Fon3e8ohMlEOsrWvob0SGrTuBru/4aWxl4WILi7I+KGOf4rOJwbaCuGVZ6P2ooVRjlYaSETtI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047963; c=relaxed/simple; bh=YCUIgTXnZXL17oryx4O46qWOfHe2x1XhubsaVrXO574=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=XEqBqg7s4Sc7wacUOHueVCZzg9WoGiSKOmcUbH3uBqAbZqXaJ7YgN13n76IYMCSDBHaJ0e6rqnhSo4qtkKf9rMYHY4aGE7PmHLYRJ1G2dm2PFxWPQ0DgBmoKnZ6hzHpdtG0AUns9m6FrB7akZ7nJrbHC+cZzj64urPHcZxTrWqM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 03/14] netfilter: nf_tables: Flowtable hook's pf value never varies Date: Thu, 16 Jan 2025 18:18:51 +0100 Message-Id: <20250116171902.1783620-4-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org From: Phil Sutter When checking for duplicate hooks in nft_register_flowtable_net_hooks(), comparing ops.pf value is pointless as it is always NFPROTO_NETDEV with flowtable hooks. Dropping the check leaves the search identical to the one in nft_hook_list_find() so call that function instead of open coding. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index de9c4335ef47..e41c77e5eefd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8895,7 +8895,7 @@ static int nft_register_flowtable_net_hooks(struct net *net, struct list_head *hook_list, struct nft_flowtable *flowtable) { - struct nft_hook *hook, *hook2, *next; + struct nft_hook *hook, *next; struct nft_flowtable *ft; int err, i = 0; @@ -8904,12 +8904,9 @@ static int nft_register_flowtable_net_hooks(struct net *net, if (!nft_is_active_next(net, ft)) continue; - list_for_each_entry(hook2, &ft->hook_list, list) { - if (hook->ops.dev == hook2->ops.dev && - hook->ops.pf == hook2->ops.pf) { - err = -EEXIST; - goto err_unregister_net_hooks; - } + if (nft_hook_list_find(&ft->hook_list, hook)) { + err = -EEXIST; + goto err_unregister_net_hooks; } } From patchwork Thu Jan 16 17:18:52 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942057 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id D306A1547D8; Thu, 16 Jan 2025 17:19:21 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047963; cv=none; b=cmIlPtmXcotyFR4dJU9S15HLv+VKD48EbhxCkyZzFdr9BWhojRcdqAcVenghKwSp77PbBnDpo3ZGIUwG6C8lHFHHvkb0+Y7eHRpV7CwepG4LWtMnBQpcoX3yuYekPpujshTE9pfbFj1bwnLsXmzFWgKsNfKu+WJbAe+Evyfmd7g= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047963; c=relaxed/simple; bh=qeu8DXk77WA52Oinxzb134Fm5lfLpbVOpYGurIMNy6Q=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=Fed+ONhb7JjlqpYXIP6TT9mRjfCUcv2JrZOZT6kcn0VYINgOvY2qQBCK8L2i1cVcDyAU7csfImf7So6KfnvD1/ubRtsRFJSMqZIqMjl6KC87FxjiggzOaTl6oW1K1D0ZLyX/qeJPhGDocichv8UBG6w2e7LBMm1/AOwd2U/S32k= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 04/14] netfilter: nf_tables: Store user-defined hook ifname Date: Thu, 16 Jan 2025 18:18:52 +0100 Message-Id: <20250116171902.1783620-5-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org From: Phil Sutter Prepare for hooks with NULL ops.dev pointer (due to non-existent device) and store the interface name and length as specified by the user upon creation. No functional change intended. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 7dcea247f853..d1f274af5b70 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1198,6 +1198,8 @@ struct nft_hook { struct list_head list; struct nf_hook_ops ops; struct rcu_head rcu; + char ifname[IFNAMSIZ]; + u8 ifnamelen; }; /** diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e41c77e5eefd..95d8d33589b1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2276,7 +2276,6 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, const struct nlattr *attr) { struct net_device *dev; - char ifname[IFNAMSIZ]; struct nft_hook *hook; int err; @@ -2286,12 +2285,17 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, goto err_hook_alloc; } - nla_strscpy(ifname, attr, IFNAMSIZ); + err = nla_strscpy(hook->ifname, attr, IFNAMSIZ); + if (err < 0) + goto err_hook_dev; + + hook->ifnamelen = nla_len(attr); + /* nf_tables_netdev_event() is called under rtnl_mutex, this is * indirectly serializing all the other holders of the commit_mutex with * the rtnl_mutex. */ - dev = __dev_get_by_name(net, ifname); + dev = __dev_get_by_name(net, hook->ifname); if (!dev) { err = -ENOENT; goto err_hook_dev; From patchwork Thu Jan 16 17:18:53 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942065 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 607EB234964; Thu, 16 Jan 2025 17:19:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047972; cv=none; b=IQFDtehbSRkaathJZT1ic+gKCtM89kQDNTD43pmi/HB14zqFCv+DyM4S1sav86/z1S87Axuj22tkV/qDSs5AExabG/Np+TjGUNUQEfduhhDHOeVxHTViRfby+jhedP63s9yUZBVqhoFVDZ7QhFp3j06NAdFuiLyEi6OGpOM1/zI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047972; c=relaxed/simple; bh=Nkd46QEpYCHldISYDFEY+9AVgnHq3zTgxeiRXfhjR1E=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=rjxHrn4Kx+L/9/so56qyaveicEkGHOLCoe8TgD6lS3QcNwe1luvAD42+Q0VXopaBo5afPyzxsjBBjse0ewv/G4bwH/J9Z7JhzT84UmmddN1URvIAWLbSzGqqfjeTS/RG86+gC/aY1QBK25xB4nGLtRedHwDweHQ+XstzHZp8q1k= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 05/14] netfilter: nf_tables: Use stored ifname in netdev hook dumps Date: Thu, 16 Jan 2025 18:18:53 +0100 Message-Id: <20250116171902.1783620-6-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org From: Phil Sutter The stored ifname and ops.dev->name may deviate after creation due to interface name changes. Prefer the more deterministic stored name in dumps which also helps avoiding inadvertent changes to stored ruleset dumps. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 95d8d33589b1..87175cd1d39b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1956,15 +1956,16 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, if (!first) first = hook; - if (nla_put_string(skb, NFTA_DEVICE_NAME, - hook->ops.dev->name)) + if (nla_put(skb, NFTA_DEVICE_NAME, + hook->ifnamelen, hook->ifname)) goto nla_put_failure; n++; } nla_nest_end(skb, nest_devs); if (n == 1 && - nla_put_string(skb, NFTA_HOOK_DEV, first->ops.dev->name)) + nla_put(skb, NFTA_HOOK_DEV, + first->ifnamelen, first->ifname)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -9324,7 +9325,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net, list_for_each_entry_rcu(hook, hook_list, list, lockdep_commit_lock_is_held(net)) { - if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name)) + if (nla_put(skb, NFTA_DEVICE_NAME, + hook->ifnamelen, hook->ifname)) goto nla_put_failure; } nla_nest_end(skb, nest_devs); From patchwork Thu Jan 16 17:18:54 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942060 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 60839234981; Thu, 16 Jan 2025 17:19:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047971; cv=none; b=hmhAKyCpceqn7III/QeTWToT+DAFE1+Jvb3u+nYjH9NZo9XkEkkbq0cz/vULHF6oR3W/Eu4zeLewSSXFCGkTrrRuQwvEVv7R/noUFTaj0hcelM1EqtEl1+x06IRYnKrISRh0KF4yokLyHxusRMLZfzQczK/PbMi6C6CxcG7Kn6I= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047971; c=relaxed/simple; bh=sOOiXELOLHGTDAG4yU5ZQHuv+9It1vE7voNAEewZqrM=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=p9MdIBvMjPcUXii64b8MJrT8WnthlGiLiqQvguLMZGjv+d3eLcj9pulLvXeCV/cZqHCbmxPjNzK+iVMuR7nvAL4V0vFlTAKFL6XNT9OikwlF0OUwtcpy19yyudZ3q+nxqCPhNnU7fBZtePfIB4QQly4xU3eKB4uJ12ATtWPz/5Y= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 06/14] netfilter: nf_tables: Compare netdev hooks based on stored name Date: Thu, 16 Jan 2025 18:18:54 +0100 Message-Id: <20250116171902.1783620-7-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org From: Phil Sutter The 1:1 relationship between nft_hook and nf_hook_ops is about to break, so choose the stored ifname to uniquely identify hooks. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 87175cd1d39b..ed15c52e3c65 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2317,7 +2317,7 @@ static struct nft_hook *nft_hook_list_find(struct list_head *hook_list, struct nft_hook *hook; list_for_each_entry(hook, hook_list, list) { - if (this->ops.dev == hook->ops.dev) + if (!strcmp(hook->ifname, this->ifname)) return hook; } From patchwork Thu Jan 16 17:18:55 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942064 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 608B9234CE4; Thu, 16 Jan 2025 17:19:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047971; cv=none; b=mGkGp3PP/MLRgmDM936KmwDBzF6o3mNQg8tKhoU3ZjfodJF6lPven168yxiImh8TP64Dgbeo+xwwt/UT11xnFezK+ecEjrevx0cKxXxc971B6FVyb+pNuKycAIxLc7wvn6ci6tVwvCFWHu/b2kJvFHG5XPdct/qD2Aw/8IsxT10= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047971; c=relaxed/simple; bh=6/7yjt2BepUE4q73oLbuslXB3cVo7z5uZYuf6bXngTI=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=P20jKA3ocz7x8DFRdoeJEeQlGz6lbMqKUzJyQ7qJs8WfarkdmPgn6Ch6rq811jUeCvqlORMspoJUo/5BdCwJKkVODZHKol1W9OFyqf8OC9g20m/KeG2ZAHCADSklg3Zv6i18m9otjF8ayFEub7lyQ8mijw3yzZe6lqzmZQYq6UE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 07/14] netfilter: nf_tables: Tolerate chains with no remaining hooks Date: Thu, 16 Jan 2025 18:18:55 +0100 Message-Id: <20250116171902.1783620-8-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org From: Phil Sutter Do not drop a netdev-family chain if the last interface it is registered for vanishes. Users dumping and storing the ruleset upon shutdown to restore it upon next boot may otherwise lose the chain and all contained rules. They will still lose the list of devices, a later patch will fix that. For now, this aligns the event handler's behaviour with that for flowtables. The controversal situation at netns exit should be no problem here: event handler will unregister the hooks, core nftables cleanup code will drop the chain itself. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 -- net/netfilter/nf_tables_api.c | 41 ------------------------------- net/netfilter/nft_chain_filter.c | 29 ++++++---------------- 3 files changed, 7 insertions(+), 65 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index d1f274af5b70..26a65d5a3123 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1235,8 +1235,6 @@ static inline bool nft_is_base_chain(const struct nft_chain *chain) return chain->flags & NFT_CHAIN_BASE; } -int __nft_release_basechain(struct nft_ctx *ctx); - unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); static inline bool nft_use_inc(u32 *use) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ed15c52e3c65..667459256e4c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -11741,47 +11741,6 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, } EXPORT_SYMBOL_GPL(nft_data_dump); -static void __nft_release_basechain_now(struct nft_ctx *ctx) -{ - struct nft_rule *rule, *nr; - - list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { - list_del(&rule->list); - nf_tables_rule_release(ctx, rule); - } - nf_tables_chain_destroy(ctx->chain); -} - -int __nft_release_basechain(struct nft_ctx *ctx) -{ - struct nft_rule *rule; - - if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) - return 0; - - nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain); - list_for_each_entry(rule, &ctx->chain->rules, list) - nft_use_dec(&ctx->chain->use); - - nft_chain_del(ctx->chain); - nft_use_dec(&ctx->table->use); - - if (!maybe_get_net(ctx->net)) { - __nft_release_basechain_now(ctx); - return 0; - } - - /* wait for ruleset dumps to complete. Owning chain is no longer in - * lists, so new dumps can't find any of these rules anymore. - */ - synchronize_rcu(); - - __nft_release_basechain_now(ctx); - put_net(ctx->net); - return 0; -} -EXPORT_SYMBOL_GPL(__nft_release_basechain); - static void __nft_release_hook(struct net *net, struct nft_table *table) { struct nft_flowtable *flowtable; diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 7010541fcca6..543f258b7c6b 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -322,34 +322,19 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, struct nft_ctx *ctx) { struct nft_base_chain *basechain = nft_base_chain(ctx->chain); - struct nft_hook *hook, *found = NULL; - int n = 0; + struct nft_hook *hook; list_for_each_entry(hook, &basechain->hook_list, list) { - if (hook->ops.dev == dev) - found = hook; - - n++; - } - if (!found) - return; + if (hook->ops.dev != dev) + continue; - if (n > 1) { if (!(ctx->chain->table->flags & NFT_TABLE_F_DORMANT)) - nf_unregister_net_hook(ctx->net, &found->ops); + nf_unregister_net_hook(ctx->net, &hook->ops); - list_del_rcu(&found->list); - kfree_rcu(found, rcu); - return; + list_del_rcu(&hook->list); + kfree_rcu(hook, rcu); + break; } - - /* UNREGISTER events are also happening on netns exit. - * - * Although nf_tables core releases all tables/chains, only this event - * handler provides guarantee that hook->ops.dev is still accessible, - * so we cannot skip exiting net namespaces. - */ - __nft_release_basechain(ctx); } static int nf_tables_netdev_event(struct notifier_block *this, From patchwork Thu Jan 16 17:18:56 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942061 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 4E71D2343B6; Thu, 16 Jan 2025 17:19:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047971; cv=none; b=AE90Os0t1c1+fJN6y9UX9DnxOTejjpNQyAlc14KwuIrxbpm1NFg7ImJSRIjQRFhgUZ04ip+RQBz3R13lOTpm92MtWe1KoDAPlXI7gkZvjbruu4AZoWaRqzcNUwHui+dEuyZNntpPw2CmKJX4uTJzIBueb8jH3iYaOGKiSx8hahs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047971; c=relaxed/simple; bh=6ddK21dq6yMTvfYMZwaXs6sdIX5DTKQsflnlcsiwT3w=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=ZvKPzUYRVjOoY8yC1tmhU5W4LAg7KKY3bjNZOJcpszb3UwXzD9z2+2R2OFyK1xNIWrskfJuwvVgrON/8UQLsr19ycgpGawi9OCs1zHRg8SAsqc5YmKVMsMTRpvc70biV9g4af9m0q2hpvkWjbIedK3Y695kgddtA+2p6nn7I5l0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 08/14] netfilter: nf_tables: Simplify chain netdev notifier Date: Thu, 16 Jan 2025 18:18:56 +0100 Message-Id: <20250116171902.1783620-9-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org From: Phil Sutter With conditional chain deletion gone, callback code simplifies: Instead of filling an nft_ctx object, just pass basechain to the per-chain function. Also plain list_for_each_entry() is safe now. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_chain_filter.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 543f258b7c6b..19a553550c76 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -319,17 +319,16 @@ static const struct nft_chain_type nft_chain_filter_netdev = { }; static void nft_netdev_event(unsigned long event, struct net_device *dev, - struct nft_ctx *ctx) + struct nft_base_chain *basechain) { - struct nft_base_chain *basechain = nft_base_chain(ctx->chain); struct nft_hook *hook; list_for_each_entry(hook, &basechain->hook_list, list) { if (hook->ops.dev != dev) continue; - if (!(ctx->chain->table->flags & NFT_TABLE_F_DORMANT)) - nf_unregister_net_hook(ctx->net, &hook->ops); + if (!(basechain->chain.table->flags & NFT_TABLE_F_DORMANT)) + nf_unregister_net_hook(dev_net(dev), &hook->ops); list_del_rcu(&hook->list); kfree_rcu(hook, rcu); @@ -343,25 +342,20 @@ static int nf_tables_netdev_event(struct notifier_block *this, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct nft_base_chain *basechain; struct nftables_pernet *nft_net; - struct nft_chain *chain, *nr; + struct nft_chain *chain; struct nft_table *table; - struct nft_ctx ctx = { - .net = dev_net(dev), - }; if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - nft_net = nft_pernet(ctx.net); + nft_net = nft_pernet(dev_net(dev)); mutex_lock(&nft_net->commit_mutex); list_for_each_entry(table, &nft_net->tables, list) { if (table->family != NFPROTO_NETDEV && table->family != NFPROTO_INET) continue; - ctx.family = table->family; - ctx.table = table; - list_for_each_entry_safe(chain, nr, &table->chains, list) { + list_for_each_entry(chain, &table->chains, list) { if (!nft_is_base_chain(chain)) continue; @@ -370,8 +364,7 @@ static int nf_tables_netdev_event(struct notifier_block *this, basechain->ops.hooknum != NF_INET_INGRESS) continue; - ctx.chain = chain; - nft_netdev_event(event, dev, &ctx); + nft_netdev_event(event, dev, basechain); } } mutex_unlock(&nft_net->commit_mutex); From patchwork Thu Jan 16 17:18:57 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942062 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 6076B2343BC; Thu, 16 Jan 2025 17:19:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047971; cv=none; b=kvyP6kxxckB5mTsEuNWNS6THm1o2ktOXTBilMXrTC+bcuTenBqDV1Qv0qTBOwyKjjOrTY4qnGmxGLTh5WnPPvIUjJNfjuciK+DJSdotqiNtF9aZ93OYEE5BntN5zogh3eqrqgKkGYGlA1aihyCoBh11S3FAJzwgkaFm75+/BSKQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047971; c=relaxed/simple; bh=bf4U92hBPyNGdkOqWr/Gm0rDjv1jB2B0iEc2B3V9aSI=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=cwmgoG+Kpa8ce5+flNnkBJonXUmUD6V1bo/18RVtHEBgGZF7zHx0OeAGDclSt9UqvpD9X5rPNYU6Jmr0XqZ6T9ukE0sai/Xyf/d6NyeJiTj72N96Efgmvn0BJHB3ZJogJAH9O+wjZWeYGsAAzT6/Eon7YzV5z/QciVilkjXMgU8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 09/14] netfilter: nft_flow_offload: clear tcp MAXACK flag before moving to slowpath Date: Thu, 16 Jan 2025 18:18:57 +0100 Message-Id: <20250116171902.1783620-10-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org From: Florian Westphal This state reset is racy, no locks are held here. Since commit 8437a6209f76 ("netfilter: nft_flow_offload: set liberal tracking mode for tcp"), the window checks are disabled for normal data packets, but MAXACK flag is checked when validating TCP resets. Clear the flag so tcp reset validation checks are ignored. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index df72b0376970..bdde469bbbd1 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -161,10 +161,20 @@ void flow_offload_route_init(struct flow_offload *flow, } EXPORT_SYMBOL_GPL(flow_offload_route_init); -static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp) +static void flow_offload_fixup_tcp(struct nf_conn *ct) { + struct ip_ct_tcp *tcp = &ct->proto.tcp; + + spin_lock_bh(&ct->lock); + /* Conntrack state is outdated due to offload bypass. + * Clear IP_CT_TCP_FLAG_MAXACK_SET, otherwise conntracks + * TCP reset validation will fail. + */ tcp->seen[0].td_maxwin = 0; + tcp->seen[0].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET; tcp->seen[1].td_maxwin = 0; + tcp->seen[1].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET; + spin_unlock_bh(&ct->lock); } static void flow_offload_fixup_ct(struct nf_conn *ct) @@ -176,7 +186,7 @@ static void flow_offload_fixup_ct(struct nf_conn *ct) if (l4num == IPPROTO_TCP) { struct nf_tcp_net *tn = nf_tcp_pernet(net); - flow_offload_fixup_tcp(&ct->proto.tcp); + flow_offload_fixup_tcp(ct); timeout = tn->timeouts[ct->proto.tcp.state]; timeout -= tn->offload_timeout; From patchwork Thu Jan 16 17:18:58 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942063 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 4BD4422D4F7; Thu, 16 Jan 2025 17:19:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047971; cv=none; b=mTj9zq4kOFeOTNtTulWHMlGbhWydUzXLR+8CscoN1fnxyMwLYhb6CPIDPb84vkBvWyj/bBJKoMKNEZC5faQKgxdgD0czhAc+SLKikSgF8wgbhW0fzxPYxokzFnygcyk8rz4Fmth9x7Fs9h7V+iLr2LKZQgrT3vIs/MNr9zIOiWM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047971; c=relaxed/simple; bh=gNmgvJICOtLINI4dsmfOEC95rc76jXxRBTt+IehiqPc=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=pyRizDGf6GEuyOrMezeD1gWZYzncLg4IBiIKHIHkuMCNsiQ0kT6Y3MzSf01p8Euu6i5q27x2ee6e2RIfvUqBkGix2jXyUgBOLjAiprWhw1VRFxtjldglgjFRJI4+k5CnW+CozDQ7oBIuPjfL+Rn7VSuFzsEQrKWNwoFncLrorBU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 10/14] netfilter: nft_flow_offload: update tcp state flags under lock Date: Thu, 16 Jan 2025 18:18:58 +0100 Message-Id: <20250116171902.1783620-11-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org From: Florian Westphal The conntrack entry is already public, there is a small chance that another CPU is handling a packet in reply direction and racing with the tcp state update. Move this under ct spinlock. This is done once, when ct is about to be offloaded, so this should not result in a noticeable performance hit. Fixes: 8437a6209f76 ("netfilter: nft_flow_offload: set liberal tracking mode for tcp") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 3b474d235663..221d50223018 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -289,6 +289,15 @@ static bool nft_flow_offload_skip(struct sk_buff *skb, int family) return false; } +static void flow_offload_ct_tcp(struct nf_conn *ct) +{ + /* conntrack will not see all packets, disable tcp window validation. */ + spin_lock_bh(&ct->lock); + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; + spin_unlock_bh(&ct->lock); +} + static void nft_flow_offload_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -356,11 +365,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, goto err_flow_alloc; flow_offload_route_init(flow, &route); - - if (tcph) { - ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; - ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; - } + if (tcph) + flow_offload_ct_tcp(ct); __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags); ret = flow_offload_add(flowtable, flow); From patchwork Thu Jan 16 17:18:59 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942067 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 5E8CD2361CE; Thu, 16 Jan 2025 17:19:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047973; cv=none; b=gVSYze1ya4bWYfiG6wh8qAiaa+95IUuhMW91peh9edTDXPokLEdyIafzGBAb0Kk8496DbWH8nkz4tn1BhAt85HqWeO1YQrvvN3Nx4QOd3o1X9yA+Ue+CdOtXPOzXFqlw4a6etGvMbj1wbM5qEWZTJaMNwS9dqjus7AsqAWKy6rQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047973; c=relaxed/simple; bh=6OS/a7grl6gGo/eIMQF3exeLDzIEFYUlqzHlYfkjBQM=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=Oxj7jobN3En+3b7vGCesEQk33NNiyjCCMYRYPshETGspugjiMLcEjnOhkB6SW5BNY4LKhti9niYDA+3W3gH68rQIHUyVuYaLybTSegFF4svaPx411JTVuFOjduGlVdpK1n1mWquvuPt+0DX7AjsalQhnO90Jn6wOGYgsFjYXo78= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 11/14] netfilter: conntrack: remove skb argument from nf_ct_refresh Date: Thu, 16 Jan 2025 18:18:59 +0100 Message-Id: <20250116171902.1783620-12-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org From: Florian Westphal Its not used (and could be NULL), so remove it. This allows to use nf_ct_refresh in places where we don't have an skb without having to double-check that skb == NULL would be safe. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 8 +++----- net/netfilter/nf_conntrack_amanda.c | 2 +- net/netfilter/nf_conntrack_broadcast.c | 2 +- net/netfilter/nf_conntrack_core.c | 7 +++---- net/netfilter/nf_conntrack_h323_main.c | 4 ++-- net/netfilter/nf_conntrack_sip.c | 4 ++-- net/netfilter/nft_ct.c | 2 +- 7 files changed, 13 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cba3ccf03fcc..3cbf29dd0b71 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -204,8 +204,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple); void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, - u32 extra_jiffies, bool do_acct); + u32 extra_jiffies, unsigned int bytes); /* Refresh conntrack for this many jiffies and do accounting */ static inline void nf_ct_refresh_acct(struct nf_conn *ct, @@ -213,15 +212,14 @@ static inline void nf_ct_refresh_acct(struct nf_conn *ct, const struct sk_buff *skb, u32 extra_jiffies) { - __nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, true); + __nf_ct_refresh_acct(ct, ctinfo, extra_jiffies, skb->len); } /* Refresh conntrack for this many jiffies */ static inline void nf_ct_refresh(struct nf_conn *ct, - const struct sk_buff *skb, u32 extra_jiffies) { - __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, false); + __nf_ct_refresh_acct(ct, 0, extra_jiffies, 0); } /* kill conntrack and do accounting */ diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index d011d2eb0848..7be4c35e4795 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -106,7 +106,7 @@ static int amanda_help(struct sk_buff *skb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - nf_ct_refresh(ct, skb, master_timeout * HZ); + nf_ct_refresh(ct, master_timeout * HZ); /* No data? */ dataoff = protoff + sizeof(struct udphdr); diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index cfa0fe0356de..a7552a46d6ac 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -75,7 +75,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, nf_ct_expect_related(exp, 0); nf_ct_expect_put(exp); - nf_ct_refresh(ct, skb, timeout * HZ); + nf_ct_refresh(ct, timeout * HZ); out: return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 456446d7af20..0149d482adaa 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2089,9 +2089,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_in); /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, u32 extra_jiffies, - bool do_acct) + unsigned int bytes) { /* Only update if this is not a fixed timeout */ if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) @@ -2104,8 +2103,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, if (READ_ONCE(ct->timeout) != extra_jiffies) WRITE_ONCE(ct->timeout, extra_jiffies); acct: - if (do_acct) - nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len); + if (bytes) + nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 5a9bce24f3c3..14f73872f647 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1385,7 +1385,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, if (info->timeout > 0) { pr_debug("nf_ct_ras: set RAS connection timeout to " "%u seconds\n", info->timeout); - nf_ct_refresh(ct, skb, info->timeout * HZ); + nf_ct_refresh(ct, info->timeout * HZ); /* Set expect timeout */ spin_lock_bh(&nf_conntrack_expect_lock); @@ -1433,7 +1433,7 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, info->sig_port[!dir] = 0; /* Give it 30 seconds for UCF or URJ */ - nf_ct_refresh(ct, skb, 30 * HZ); + nf_ct_refresh(ct, 30 * HZ); return 0; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d0eac27f6ba0..ca748f8dbff1 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1553,7 +1553,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, if (dataoff >= skb->len) return NF_ACCEPT; - nf_ct_refresh(ct, skb, sip_timeout * HZ); + nf_ct_refresh(ct, sip_timeout * HZ); if (unlikely(skb_linearize(skb))) return NF_DROP; @@ -1624,7 +1624,7 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, if (dataoff >= skb->len) return NF_ACCEPT; - nf_ct_refresh(ct, skb, sip_timeout * HZ); + nf_ct_refresh(ct, sip_timeout * HZ); if (unlikely(skb_linearize(skb))) return NF_DROP; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 67a41cd2baaf..2e59aba681a1 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -929,7 +929,7 @@ static void nft_ct_timeout_obj_eval(struct nft_object *obj, */ values = nf_ct_timeout_data(timeout); if (values) - nf_ct_refresh(ct, pkt->skb, values[0]); + nf_ct_refresh(ct, values[0]); } static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx, From patchwork Thu Jan 16 17:19:00 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942068 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 5E9482361D1; Thu, 16 Jan 2025 17:19:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047973; cv=none; b=Xhjv4BKEUi16FIWxHmNUpog4ubKJtkOR2TqBTZ68rkxbwliQZXSsYiwkIcTIY/oy36dXwQKpNAi60/+DrrkugmHTpbaQ5XqB4FhMEnn7X3nGJ+FmBFl6xwPbF9kiBX1QYhgxJOBS9yJHrCFfNlQfbXy0HEYGxOUKg27J6Zrhh2U= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047973; c=relaxed/simple; bh=J9P2GYdhkwlTmIwrXykDqbCEDGkpZxUHvtR0tBqWksA=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=NUjr8f0pMLvi+keW8qO4Ok1zWjAzjpWzh9BduWlFtmN8yJ8pN35x6hYaLsdF5aAZg68agGvZhBXJWGPXTaan7KHJrCxTrXSfpFZ78zi6cpJ0krm6sgj8rdV+6UyNe6dmHpaqXIveOAQVYxhuVJuQYe5t7YFp8NotP5gKrN9bhvI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 12/14] netfilter: conntrack: rework offload nf_conn timeout extension logic Date: Thu, 16 Jan 2025 18:19:00 +0100 Message-Id: <20250116171902.1783620-13-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org From: Florian Westphal Offload nf_conn entries may not see traffic for a very long time. To prevent incorrect 'ct is stale' checks during nf_conntrack table lookup, the gc worker extends the timeout nf_conn entries marked for offload to a large value. The existing logic suffers from a few problems. Garbage collection runs without locks, its unlikely but possible that @ct is removed right after the 'offload' bit test. In that case, the timeout of a new/reallocated nf_conn entry will be increased. Prevent this by obtaining a reference count on the ct object and re-check of the confirmed and offload bits. If those are not set, the ct is being removed, skip the timeout extension in this case. Parallel teardown is also problematic: cpu1 cpu2 gc_worker calls flow_offload_teardown() tests OFFLOAD bit, set clear OFFLOAD bit ct->timeout is repaired (e.g. set to timeout[UDP_CT_REPLIED]) nf_ct_offload_timeout() called expire value is fetched -> NF_CT_DAY timeout for flow that isn't offloaded (and might not see any further packets). Use cmpxchg: if ct->timeout was repaired after the 2nd 'offload bit' test passed, then ct->timeout will only be updated of ct->timeout was not altered in between. As we already have a gc worker for flowtable entries, ct->timeout repair can be handled from the flowtable gc worker. This avoids having flowtable specific logic in the conntrack core and avoids checking entries that were never offloaded. This allows to remove the nf_ct_offload_timeout helper. Its safe to use in the add case, but not on teardown. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 10 --- net/netfilter/nf_conntrack_core.c | 6 -- net/netfilter/nf_flow_table_core.c | 105 ++++++++++++++++++++++++++- 3 files changed, 103 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 3cbf29dd0b71..3f02a45773e8 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -312,16 +312,6 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) #define NF_CT_DAY (86400 * HZ) -/* Set an arbitrary timeout large enough not to ever expire, this save - * us a check for the IPS_OFFLOAD_BIT from the packet path via - * nf_ct_is_expired(). - */ -static inline void nf_ct_offload_timeout(struct nf_conn *ct) -{ - if (nf_ct_expires(ct) < NF_CT_DAY / 2) - WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); -} - struct kernel_param; int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0149d482adaa..7f8b245e287a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1544,12 +1544,6 @@ static void gc_worker(struct work_struct *work) tmp = nf_ct_tuplehash_to_ctrack(h); - if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { - nf_ct_offload_timeout(tmp); - if (!nf_conntrack_max95) - continue; - } - if (expired_count > GC_SCAN_EXPIRED_MAX) { rcu_read_unlock(); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index bdde469bbbd1..676d582ef7ab 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -304,7 +304,7 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) return err; } - nf_ct_offload_timeout(flow->ct); + nf_ct_refresh(flow->ct, NF_CT_DAY); if (nf_flowtable_hw_offload(flow_table)) { __set_bit(NF_FLOW_HW, &flow->flags); @@ -424,15 +424,116 @@ static bool nf_flow_custom_gc(struct nf_flowtable *flow_table, return flow_table->type->gc && flow_table->type->gc(flow); } +/** + * nf_flow_table_tcp_timeout() - new timeout of offloaded tcp entry + * @ct: Flowtable offloaded tcp ct + * + * Return number of seconds when ct entry should expire. + */ +static u32 nf_flow_table_tcp_timeout(const struct nf_conn *ct) +{ + u8 state = READ_ONCE(ct->proto.tcp.state); + + switch (state) { + case TCP_CONNTRACK_SYN_SENT: + case TCP_CONNTRACK_SYN_RECV: + return 0; + case TCP_CONNTRACK_ESTABLISHED: + return NF_CT_DAY; + case TCP_CONNTRACK_FIN_WAIT: + case TCP_CONNTRACK_CLOSE_WAIT: + case TCP_CONNTRACK_LAST_ACK: + case TCP_CONNTRACK_TIME_WAIT: + return 5 * 60 * HZ; + case TCP_CONNTRACK_CLOSE: + return 0; + } + + return 0; +} + +/** + * nf_flow_table_extend_ct_timeout() - Extend ct timeout of offloaded conntrack entry + * @ct: Flowtable offloaded ct + * + * Datapath lookups in the conntrack table will evict nf_conn entries + * if they have expired. + * + * Once nf_conn entries have been offloaded, nf_conntrack might not see any + * packets anymore. Thus ct->timeout is no longer refreshed and ct can + * be evicted. + * + * To avoid the need for an additional check on the offload bit for every + * packet processed via nf_conntrack_in(), set an arbitrary timeout large + * enough not to ever expire, this save us a check for the IPS_OFFLOAD_BIT + * from the packet path via nf_ct_is_expired(). + */ +static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct) +{ + static const u32 min_timeout = 5 * 60 * HZ; + u32 expires = nf_ct_expires(ct); + + /* normal case: large enough timeout, nothing to do. */ + if (likely(expires >= min_timeout)) + return; + + /* must check offload bit after this, we do not hold any locks. + * flowtable and ct entries could have been removed on another CPU. + */ + if (!refcount_inc_not_zero(&ct->ct_general.use)) + return; + + /* load ct->status after refcount increase */ + smp_acquire__after_ctrl_dep(); + + if (nf_ct_is_confirmed(ct) && + test_bit(IPS_OFFLOAD_BIT, &ct->status)) { + u8 l4proto = nf_ct_protonum(ct); + u32 new_timeout = true; + + switch (l4proto) { + case IPPROTO_UDP: + new_timeout = NF_CT_DAY; + break; + case IPPROTO_TCP: + new_timeout = nf_flow_table_tcp_timeout(ct); + break; + default: + WARN_ON_ONCE(1); + break; + } + + /* Update to ct->timeout from nf_conntrack happens + * without holding ct->lock. + * + * Use cmpxchg to ensure timeout extension doesn't + * happen when we race with conntrack datapath. + * + * The inverse -- datapath updating ->timeout right + * after this -- is fine, datapath is authoritative. + */ + if (new_timeout) { + new_timeout += nfct_time_stamp; + cmpxchg(&ct->timeout, expires, new_timeout); + } + } + + nf_ct_put(ct); +} + static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { + bool teardown = test_bit(NF_FLOW_TEARDOWN, &flow->flags); + if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || nf_flow_custom_gc(flow_table, flow)) flow_offload_teardown(flow); + else if (!teardown) + nf_flow_table_extend_ct_timeout(flow->ct); - if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { + if (teardown) { if (test_bit(NF_FLOW_HW, &flow->flags)) { if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) nf_flow_offload_del(flow_table, flow); From patchwork Thu Jan 16 17:19:01 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942066 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 89E87236A89; Thu, 16 Jan 2025 17:19:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047973; cv=none; b=G9IGmuMzjWigDqA5F/PZMoOqeiC0JKW0no94EvVSoIphHC4PMZvS/rBY0G1oOBMyWsuNONIM5/y1Aq8L/LXmD+cBHs+QAncWHzzbh2XhB1KLcypxQDdXBFyL19GrlGoTEzERXY1dHVWDbmFTEYPqRbgX72uuw5DUiOyuCwC306Y= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047973; c=relaxed/simple; bh=TpOe8lEa+VWEybRdhUyswpd6EgPwBrCIeX+GSFaQbjU=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=FkMVPbxItOB9sKO9ve+RwqSz0uKwpLHf3Sr99Inny7u+DgTC7SBb9AhPIZWqtNUlC5ikvYVlZCxe8OrQq9p/ZVM2N5021gT5O0HCF9U7pcyCarvd4d7W4uPqHCxrWretLjBWeZTUJfJmUkGXOvDVkpOp47g8FECX5+KBYu9QuQk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 13/14] netfilter: flowtable: teardown flow if cached mtu is stale Date: Thu, 16 Jan 2025 18:19:01 +0100 Message-Id: <20250116171902.1783620-14-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org Tear down the flow entry in the unlikely case that the interface mtu changes, this gives the flow a chance to refresh the cached mtu, otherwise such refresh does not occur until flow entry expires. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 98edcaa37b38..a22856106383 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -377,8 +377,10 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; - if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) + if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) { + flow_offload_teardown(flow); return 0; + } iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); thoff = (iph->ihl * 4) + ctx->offset; @@ -656,8 +658,10 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; - if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) + if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) { + flow_offload_teardown(flow); return 0; + } ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); thoff = sizeof(*ip6h) + ctx->offset; From patchwork Thu Jan 16 17:19:02 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pablo Neira Ayuso X-Patchwork-Id: 13942069 X-Patchwork-Delegate: kuba@kernel.org Received: from mail.netfilter.org (unknown [217.70.188.207]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 8AF93236A8B; Thu, 16 Jan 2025 17:19:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.70.188.207 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047973; cv=none; b=c9vIDyjdBtMcjP9L1VEiRy6O8HnAw7O6q5Z94s7AGMl/+vPA+kjw35iNkRz9HEgRo/O/uknJQNeWklNhC3cxnBtmzXKWbTaqWTXWpOwhecUNznJtOydNRICWeMqTXIe6hlQS0hhvUIgcsWZrjECur1KSjVfgy3tI+dEhli5tq7M= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737047973; c=relaxed/simple; bh=//qsKqzpsYnllQ724iJOo4wNiTBxLefdqXKpWkxV8R4=; h=From:To:Cc:Subject:Date:Message-Id:In-Reply-To:References: MIME-Version; b=MjPXMh5dNw2dlgTJGx6Ii6K7yjn/DAVXCksRs6uvGV3nqDrLgeetYTInMQpwoldxHxdLp1vl3cOqKHNf0sKyyz5eJSaYh/Ps0EuZ1JhqLzK9oyV9qnqAxUeTFiKoHvdYWaPOamfVVNJ5wdZqVg78vmBGHOVrdm8qdtD/qis2phc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org; spf=pass smtp.mailfrom=netfilter.org; arc=none smtp.client-ip=217.70.188.207 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=netfilter.org Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=netfilter.org From: Pablo Neira Ayuso To: netfilter-devel@vger.kernel.org Cc: davem@davemloft.net, netdev@vger.kernel.org, kuba@kernel.org, pabeni@redhat.com, edumazet@google.com, fw@strlen.de Subject: [PATCH net-next 14/14] netfilter: flowtable: add CLOSING state Date: Thu, 16 Jan 2025 18:19:02 +0100 Message-Id: <20250116171902.1783620-15-pablo@netfilter.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20250116171902.1783620-1-pablo@netfilter.org> References: <20250116171902.1783620-1-pablo@netfilter.org> Precedence: bulk X-Mailing-List: netdev@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: kuba@kernel.org tcp rst/fin packet triggers an immediate teardown of the flow which results in sending flows back to the classic forwarding path. This behaviour was introduced by: da5984e51063 ("netfilter: nf_flow_table: add support for sending flows back to the slow path") b6f27d322a0a ("netfilter: nf_flow_table: tear down TCP flows if RST or FIN was seen") whose goal is to expedite removal of flow entries from the hardware table. Before these patches, the flow was released after the flow entry timed out. However, this approach leads to packet races when restoring the conntrack state as well as late flow re-offload situations when the TCP connection is ending. This patch adds a new CLOSING state that is is entered when tcp rst/fin packet is seen. This allows for an early removal of the flow entry from the hardware table. But the flow entry still remains in software, so tcp packets to shut down the flow are not sent back to slow path. If syn packet is seen from this new CLOSING state, then this flow enters teardown state, ct state is set to TCP_CONNTRACK_CLOSE state and packet is sent to slow path, so this TCP reopen scenario can be handled by conntrack. TCP_CONNTRACK_CLOSE provides a small timeout that aims at quickly releasing this stale entry from the conntrack table. Moreover, skip hardware re-offload from flowtable software packet if the flow is in CLOSING state. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 1 + net/netfilter/nf_flow_table_core.c | 66 +++++++++++++++++++-------- net/netfilter/nf_flow_table_ip.c | 6 ++- 3 files changed, 54 insertions(+), 19 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index b63d53bb9dd6..d711642e78b5 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -163,6 +163,7 @@ struct flow_offload_tuple_rhash { enum nf_flow_flags { NF_FLOW_SNAT, NF_FLOW_DNAT, + NF_FLOW_CLOSING, NF_FLOW_TEARDOWN, NF_FLOW_HW, NF_FLOW_HW_DYING, diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 676d582ef7ab..659087655118 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -161,11 +161,19 @@ void flow_offload_route_init(struct flow_offload *flow, } EXPORT_SYMBOL_GPL(flow_offload_route_init); -static void flow_offload_fixup_tcp(struct nf_conn *ct) +static inline bool nf_flow_has_expired(const struct flow_offload *flow) +{ + return nf_flow_timeout_delta(flow->timeout) <= 0; +} + +static void flow_offload_fixup_tcp(struct nf_conn *ct, u8 tcp_state) { struct ip_ct_tcp *tcp = &ct->proto.tcp; spin_lock_bh(&ct->lock); + if (tcp->state != tcp_state) + tcp->state = tcp_state; + /* Conntrack state is outdated due to offload bypass. * Clear IP_CT_TCP_FLAG_MAXACK_SET, otherwise conntracks * TCP reset validation will fail. @@ -177,36 +185,58 @@ static void flow_offload_fixup_tcp(struct nf_conn *ct) spin_unlock_bh(&ct->lock); } -static void flow_offload_fixup_ct(struct nf_conn *ct) +static void flow_offload_fixup_ct(struct flow_offload *flow) { + struct nf_conn *ct = flow->ct; struct net *net = nf_ct_net(ct); int l4num = nf_ct_protonum(ct); + bool expired, closing = false; + u32 offload_timeout = 0; s32 timeout; if (l4num == IPPROTO_TCP) { - struct nf_tcp_net *tn = nf_tcp_pernet(net); + const struct nf_tcp_net *tn = nf_tcp_pernet(net); + u8 tcp_state; - flow_offload_fixup_tcp(ct); + /* Enter CLOSE state if fin/rst packet has been seen, this + * allows TCP reopen from conntrack. Otherwise, pick up from + * the last seen TCP state. + */ + closing = test_bit(NF_FLOW_CLOSING, &flow->flags); + if (closing) { + flow_offload_fixup_tcp(ct, TCP_CONNTRACK_CLOSE); + timeout = READ_ONCE(tn->timeouts[TCP_CONNTRACK_CLOSE]); + expired = false; + } else { + tcp_state = READ_ONCE(ct->proto.tcp.state); + flow_offload_fixup_tcp(ct, tcp_state); + timeout = READ_ONCE(tn->timeouts[tcp_state]); + expired = nf_flow_has_expired(flow); + } + offload_timeout = READ_ONCE(tn->offload_timeout); - timeout = tn->timeouts[ct->proto.tcp.state]; - timeout -= tn->offload_timeout; } else if (l4num == IPPROTO_UDP) { - struct nf_udp_net *tn = nf_udp_pernet(net); + const struct nf_udp_net *tn = nf_udp_pernet(net); enum udp_conntrack state = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ? UDP_CT_REPLIED : UDP_CT_UNREPLIED; - timeout = tn->timeouts[state]; - timeout -= tn->offload_timeout; + timeout = READ_ONCE(tn->timeouts[state]); + expired = nf_flow_has_expired(flow); + offload_timeout = READ_ONCE(tn->offload_timeout); } else { return; } + if (expired) + timeout -= offload_timeout; + if (timeout < 0) timeout = 0; - if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) - WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); + if (closing || + nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) + nf_ct_refresh(ct, timeout); } static void flow_offload_route_release(struct flow_offload *flow) @@ -326,18 +356,14 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, else return; - if (likely(!nf_flowtable_hw_offload(flow_table))) + if (likely(!nf_flowtable_hw_offload(flow_table)) || + test_bit(NF_FLOW_CLOSING, &flow->flags)) return; nf_flow_offload_add(flow_table, flow); } EXPORT_SYMBOL_GPL(flow_offload_refresh); -static inline bool nf_flow_has_expired(const struct flow_offload *flow) -{ - return nf_flow_timeout_delta(flow->timeout) <= 0; -} - static void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow) { @@ -354,7 +380,7 @@ void flow_offload_teardown(struct flow_offload *flow) { clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); set_bit(NF_FLOW_TEARDOWN, &flow->flags); - flow_offload_fixup_ct(flow->ct); + flow_offload_fixup_ct(flow); } EXPORT_SYMBOL_GPL(flow_offload_teardown); @@ -542,6 +568,10 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, } else { flow_offload_del(flow_table, flow); } + } else if (test_bit(NF_FLOW_CLOSING, &flow->flags) && + test_bit(NF_FLOW_HW, &flow->flags) && + !test_bit(NF_FLOW_HW_DYING, &flow->flags)) { + nf_flow_offload_del(flow_table, flow); } else if (test_bit(NF_FLOW_HW, &flow->flags)) { nf_flow_offload_stats(flow_table, flow); } diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index a22856106383..97c6eb8847a0 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -28,11 +28,15 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto, return 0; tcph = (void *)(skb_network_header(skb) + thoff); - if (unlikely(tcph->fin || tcph->rst)) { + if (tcph->syn && test_bit(NF_FLOW_CLOSING, &flow->flags)) { flow_offload_teardown(flow); return -1; } + if ((tcph->fin || tcph->rst) && + !test_bit(NF_FLOW_CLOSING, &flow->flags)) + set_bit(NF_FLOW_CLOSING, &flow->flags); + return 0; }