diff mbox series

[net,1/2] ipv4: Fix incorrect TOS in route get reply

Message ID 20240715142354.3697987-2-idosch@nvidia.com (mailing list archive)
State Accepted
Commit 338bb57e4c2a1c2c6fc92f9c0bd35be7587adca7
Delegated to: Netdev Maintainers
Headers show
Series ipv4: Fix incorrect TOS in route get reply | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 850 this patch: 850
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 864 this patch: 864
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 2773 this patch: 2773
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 52 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-07-15--18-00 (tests: 696)

Commit Message

Ido Schimmel July 15, 2024, 2:23 p.m. UTC
The TOS value that is returned to user space in the route get reply is
the one with which the lookup was performed ('fl4->flowi4_tos'). This is
fine when the matched route is configured with a TOS as it would not
match if its TOS value did not match the one with which the lookup was
performed.

However, matching on TOS is only performed when the route's TOS is not
zero. It is therefore possible to have the kernel incorrectly return a
non-zero TOS:

 # ip link add name dummy1 up type dummy
 # ip address add 192.0.2.1/24 dev dummy1
 # ip route get 192.0.2.2 tos 0xfc
 192.0.2.2 tos 0x1c dev dummy1 src 192.0.2.1 uid 0
     cache

Fix by adding a DSCP field to the FIB result structure (inside an
existing 4 bytes hole), populating it in the route lookup and using it
when filling the route get reply.

Output after the patch:

 # ip link add name dummy1 up type dummy
 # ip address add 192.0.2.1/24 dev dummy1
 # ip route get 192.0.2.2 tos 0xfc
 192.0.2.2 dev dummy1 src 192.0.2.1 uid 0
     cache

Fixes: 1a00fee4ffb2 ("ipv4: Remove rt_key_{src,dst,tos} from struct rtable.")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 include/net/ip_fib.h |  1 +
 net/ipv4/fib_trie.c  |  1 +
 net/ipv4/route.c     | 14 +++++++-------
 3 files changed, 9 insertions(+), 7 deletions(-)

Comments

David Ahern July 15, 2024, 10:33 p.m. UTC | #1
On 7/15/24 7:23 AM, Ido Schimmel wrote:
> The TOS value that is returned to user space in the route get reply is
> the one with which the lookup was performed ('fl4->flowi4_tos'). This is
> fine when the matched route is configured with a TOS as it would not
> match if its TOS value did not match the one with which the lookup was
> performed.
> 
> However, matching on TOS is only performed when the route's TOS is not
> zero. It is therefore possible to have the kernel incorrectly return a
> non-zero TOS:
> 
>  # ip link add name dummy1 up type dummy
>  # ip address add 192.0.2.1/24 dev dummy1
>  # ip route get 192.0.2.2 tos 0xfc
>  192.0.2.2 tos 0x1c dev dummy1 src 192.0.2.1 uid 0
>      cache
> 
> Fix by adding a DSCP field to the FIB result structure (inside an
> existing 4 bytes hole), populating it in the route lookup and using it
> when filling the route get reply.
> 
> Output after the patch:
> 
>  # ip link add name dummy1 up type dummy
>  # ip address add 192.0.2.1/24 dev dummy1
>  # ip route get 192.0.2.2 tos 0xfc
>  192.0.2.2 dev dummy1 src 192.0.2.1 uid 0
>      cache
> 
> Fixes: 1a00fee4ffb2 ("ipv4: Remove rt_key_{src,dst,tos} from struct rtable.")
> Signed-off-by: Ido Schimmel <idosch@nvidia.com>
> ---
>  include/net/ip_fib.h |  1 +
>  net/ipv4/fib_trie.c  |  1 +
>  net/ipv4/route.c     | 14 +++++++-------
>  3 files changed, 9 insertions(+), 7 deletions(-)
> 

Reviewed-by: David Ahern <dsahern@kernel.org>
Guillaume Nault July 16, 2024, 6:10 p.m. UTC | #2
On Mon, Jul 15, 2024 at 05:23:53PM +0300, Ido Schimmel wrote:
> The TOS value that is returned to user space in the route get reply is
> the one with which the lookup was performed ('fl4->flowi4_tos'). This is
> fine when the matched route is configured with a TOS as it would not
> match if its TOS value did not match the one with which the lookup was
> performed.
> 
> However, matching on TOS is only performed when the route's TOS is not
> zero. It is therefore possible to have the kernel incorrectly return a
> non-zero TOS:
> 
>  # ip link add name dummy1 up type dummy
>  # ip address add 192.0.2.1/24 dev dummy1
>  # ip route get 192.0.2.2 tos 0xfc
>  192.0.2.2 tos 0x1c dev dummy1 src 192.0.2.1 uid 0
>      cache
> 
> Fix by adding a DSCP field to the FIB result structure (inside an
> existing 4 bytes hole), populating it in the route lookup and using it
> when filling the route get reply.
> 
> Output after the patch:
> 
>  # ip link add name dummy1 up type dummy
>  # ip address add 192.0.2.1/24 dev dummy1
>  # ip route get 192.0.2.2 tos 0xfc
>  192.0.2.2 dev dummy1 src 192.0.2.1 uid 0
>      cache

Reviewed-by: Guillaume Nault <gnault@redhat.com>
diff mbox series

Patch

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 9b2f69ba5e49..c29639b4323f 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -173,6 +173,7 @@  struct fib_result {
 	unsigned char		type;
 	unsigned char		scope;
 	u32			tclassid;
+	dscp_t			dscp;
 	struct fib_nh_common	*nhc;
 	struct fib_info		*fi;
 	struct fib_table	*table;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index f474106464d2..8f30e3f00b7f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1629,6 +1629,7 @@  int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 			res->nhc = nhc;
 			res->type = fa->fa_type;
 			res->scope = fi->fib_scope;
+			res->dscp = fa->fa_dscp;
 			res->fi = fi;
 			res->table = tb;
 			res->fa_head = &n->leaf;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b3073d1c8f8f..7790a8347461 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2868,9 +2868,9 @@  EXPORT_SYMBOL_GPL(ip_route_output_flow);
 
 /* called with rcu_read_lock held */
 static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
-			struct rtable *rt, u32 table_id, struct flowi4 *fl4,
-			struct sk_buff *skb, u32 portid, u32 seq,
-			unsigned int flags)
+			struct rtable *rt, u32 table_id, dscp_t dscp,
+			struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
+			u32 seq, unsigned int flags)
 {
 	struct rtmsg *r;
 	struct nlmsghdr *nlh;
@@ -2886,7 +2886,7 @@  static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
 	r->rtm_family	 = AF_INET;
 	r->rtm_dst_len	= 32;
 	r->rtm_src_len	= 0;
-	r->rtm_tos	= fl4 ? fl4->flowi4_tos : 0;
+	r->rtm_tos	= inet_dscp_to_dsfield(dscp);
 	r->rtm_table	= table_id < 256 ? table_id : RT_TABLE_COMPAT;
 	if (nla_put_u32(skb, RTA_TABLE, table_id))
 		goto nla_put_failure;
@@ -3036,7 +3036,7 @@  static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
 				goto next;
 
 			err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
-					   table_id, NULL, skb,
+					   table_id, 0, NULL, skb,
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, flags);
 			if (err)
@@ -3359,8 +3359,8 @@  static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
 				    nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0);
 	} else {
-		err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
-				   NETLINK_CB(in_skb).portid,
+		err = rt_fill_info(net, dst, src, rt, table_id, res.dscp, &fl4,
+				   skb, NETLINK_CB(in_skb).portid,
 				   nlh->nlmsg_seq, 0);
 	}
 	if (err < 0)