diff mbox series

[net,v2,1/2] ipv4: Fix route lookups when handling ICMP redirects and PMTU updates

Message ID 8cbc1e6f2319dd50d4289bec6604174484ca615c.1647519748.git.gnault@redhat.com (mailing list archive)
State Accepted
Commit 544b4dd568e3b09c1ab38a759d3187e7abda11a0
Delegated to: Netdev Maintainers
Headers show
Series ipv4: Handle TOS and scope properly for ICMP redirects and PMTU updates | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 54 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Guillaume Nault March 17, 2022, 12:45 p.m. UTC
The PMTU update and ICMP redirect helper functions initialise their fl4
variable with either __build_flow_key() or build_sk_flow_key(). These
initialisation functions always set ->flowi4_scope with
RT_SCOPE_UNIVERSE and might set the ECN bits of ->flowi4_tos. This is
not a problem when the route lookup is later done via
ip_route_output_key_hash(), which properly clears the ECN bits from
->flowi4_tos and initialises ->flowi4_scope based on the RTO_ONLINK
flag. However, some helpers call fib_lookup() directly, without
sanitising the tos and scope fields, so the route lookup can fail and,
as a result, the ICMP redirect or PMTU update aren't taken into
account.

Fix this by extracting the ->flowi4_tos and ->flowi4_scope sanitisation
code into ip_rt_fix_tos(), then use this function in handlers that call
fib_lookup() directly.

Note 1: We can't sanitise ->flowi4_tos and ->flowi4_scope in a central
place (like __build_flow_key() or flowi4_init_output()), because
ip_route_output_key_hash() expects non-sanitised values. When called
with sanitised values, it can erroneously overwrite RT_SCOPE_LINK with
RT_SCOPE_UNIVERSE in ->flowi4_scope. Therefore we have to be careful to
sanitise the values only for those paths that don't call
ip_route_output_key_hash().

Note 2: The problem is mostly about sanitising ->flowi4_tos. Having
->flowi4_scope initialised with RT_SCOPE_UNIVERSE instead of
RT_SCOPE_LINK probably wasn't really a problem: sockets with the
SOCK_LOCALROUTE flag set (those that'd result in RTO_ONLINK being set)
normally shouldn't receive ICMP redirects or PMTU updates.

Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions.")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
---
 net/ipv4/route.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

Comments

David Ahern March 17, 2022, 6:40 p.m. UTC | #1
On 3/17/22 6:45 AM, Guillaume Nault wrote:
> The PMTU update and ICMP redirect helper functions initialise their fl4
> variable with either __build_flow_key() or build_sk_flow_key(). These
> initialisation functions always set ->flowi4_scope with
> RT_SCOPE_UNIVERSE and might set the ECN bits of ->flowi4_tos. This is
> not a problem when the route lookup is later done via
> ip_route_output_key_hash(), which properly clears the ECN bits from
> ->flowi4_tos and initialises ->flowi4_scope based on the RTO_ONLINK
> flag. However, some helpers call fib_lookup() directly, without
> sanitising the tos and scope fields, so the route lookup can fail and,
> as a result, the ICMP redirect or PMTU update aren't taken into
> account.
> 
> Fix this by extracting the ->flowi4_tos and ->flowi4_scope sanitisation
> code into ip_rt_fix_tos(), then use this function in handlers that call
> fib_lookup() directly.
> 
> Note 1: We can't sanitise ->flowi4_tos and ->flowi4_scope in a central
> place (like __build_flow_key() or flowi4_init_output()), because
> ip_route_output_key_hash() expects non-sanitised values. When called
> with sanitised values, it can erroneously overwrite RT_SCOPE_LINK with
> RT_SCOPE_UNIVERSE in ->flowi4_scope. Therefore we have to be careful to
> sanitise the values only for those paths that don't call
> ip_route_output_key_hash().
> 
> Note 2: The problem is mostly about sanitising ->flowi4_tos. Having
> ->flowi4_scope initialised with RT_SCOPE_UNIVERSE instead of
> RT_SCOPE_LINK probably wasn't really a problem: sockets with the
> SOCK_LOCALROUTE flag set (those that'd result in RTO_ONLINK being set)
> normally shouldn't receive ICMP redirects or PMTU updates.
> 
> Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions.")
> Signed-off-by: Guillaume Nault <gnault@redhat.com>
> ---
>  net/ipv4/route.c | 18 ++++++++++++++----
>  1 file changed, 14 insertions(+), 4 deletions(-)
> 

Reviewed-by: David Ahern <dsahern@kernel.org>
diff mbox series

Patch

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f33ad1f383b6..d5d058de3664 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -499,6 +499,15 @@  void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
 }
 EXPORT_SYMBOL(__ip_select_ident);
 
+static void ip_rt_fix_tos(struct flowi4 *fl4)
+{
+	__u8 tos = RT_FL_TOS(fl4);
+
+	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
+	fl4->flowi4_scope = tos & RTO_ONLINK ?
+			    RT_SCOPE_LINK : RT_SCOPE_UNIVERSE;
+}
+
 static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
 			     const struct sock *sk,
 			     const struct iphdr *iph,
@@ -824,6 +833,7 @@  static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
 	rt = (struct rtable *) dst;
 
 	__build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
+	ip_rt_fix_tos(&fl4);
 	__ip_do_redirect(rt, skb, &fl4, true);
 }
 
@@ -1048,6 +1058,7 @@  static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 	struct flowi4 fl4;
 
 	ip_rt_build_flow_key(&fl4, sk, skb);
+	ip_rt_fix_tos(&fl4);
 
 	/* Don't make lookup fail for bridged encapsulations */
 	if (skb && netif_is_any_bridge_port(skb->dev))
@@ -1122,6 +1133,8 @@  void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
 			goto out;
 
 		new = true;
+	} else {
+		ip_rt_fix_tos(&fl4);
 	}
 
 	__ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
@@ -2603,7 +2616,6 @@  static struct rtable *__mkroute_output(const struct fib_result *res,
 struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
 					const struct sk_buff *skb)
 {
-	__u8 tos = RT_FL_TOS(fl4);
 	struct fib_result res = {
 		.type		= RTN_UNSPEC,
 		.fi		= NULL,
@@ -2613,9 +2625,7 @@  struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
 	struct rtable *rth;
 
 	fl4->flowi4_iif = LOOPBACK_IFINDEX;
-	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
-	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
-			 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
+	ip_rt_fix_tos(fl4);
 
 	rcu_read_lock();
 	rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);