diff mbox series

[RESEND,net] ipv{4,6}/raw: fix output xfrm lookup wrt protocol

Message ID 20230516201542.9086-1-nicolas.dichtel@6wind.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [RESEND,net] ipv{4,6}/raw: fix output xfrm lookup wrt protocol | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4403 this patch: 4403
netdev/cc_maintainers warning 1 maintainers not CCed: dsahern@kernel.org
netdev/build_clang success Errors and warnings before: 980 this patch: 980
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 4625 this patch: 4625
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 74 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Nicolas Dichtel May 16, 2023, 8:15 p.m. UTC
With a raw socket bound to IPPROTO_RAW (ie with hdrincl enabled), the
protocol field of the flow structure, build by raw_sendmsg() /
rawv6_sendmsg()),  is set to IPPROTO_RAW. This breaks the ipsec policy
lookup when some policies are defined with a protocol in the selector.

For ipv6, the sin6_port field from 'struct sockaddr_in6' could be used to
specify the protocol. Just accept all values for IPPROTO_RAW socket.

For ipv4, the sin_port field of 'struct sockaddr_in' could not be used
without breaking backward compatibility (the value of this field was never
checked). Let's add a new kind of control message, so that the userland
could specify which protocol is used.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
CC: stable@vger.kernel.org
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---

The first version has been marked 'Awaiting Upstream'. Steffen confirmed
that the 'net' tree should be the target, thus I resend this patch.
I also CC stable@vger.kernel.org.

 include/net/ip.h        |  2 ++
 include/uapi/linux/in.h |  1 +
 net/ipv4/ip_sockglue.c  | 15 ++++++++++++++-
 net/ipv4/raw.c          |  5 ++++-
 net/ipv6/raw.c          |  3 ++-
 5 files changed, 23 insertions(+), 3 deletions(-)

Comments

Paolo Abeni May 18, 2023, 10:01 a.m. UTC | #1
On Tue, 2023-05-16 at 22:15 +0200, Nicolas Dichtel wrote:
> With a raw socket bound to IPPROTO_RAW (ie with hdrincl enabled), the
> protocol field of the flow structure, build by raw_sendmsg() /
> rawv6_sendmsg()),  is set to IPPROTO_RAW. This breaks the ipsec policy
> lookup when some policies are defined with a protocol in the selector.
> 
> For ipv6, the sin6_port field from 'struct sockaddr_in6' could be used to
> specify the protocol. Just accept all values for IPPROTO_RAW socket.
> 
> For ipv4, the sin_port field of 'struct sockaddr_in' could not be used
> without breaking backward compatibility (the value of this field was never
> checked). Let's add a new kind of control message, so that the userland
> could specify which protocol is used.
> 
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> CC: stable@vger.kernel.org
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> ---
> 
> The first version has been marked 'Awaiting Upstream'. Steffen confirmed
> that the 'net' tree should be the target, thus I resend this patch.
> I also CC stable@vger.kernel.org.
> 
>  include/net/ip.h        |  2 ++
>  include/uapi/linux/in.h |  1 +
>  net/ipv4/ip_sockglue.c  | 15 ++++++++++++++-
>  net/ipv4/raw.c          |  5 ++++-
>  net/ipv6/raw.c          |  3 ++-
>  5 files changed, 23 insertions(+), 3 deletions(-)
> 
> diff --git a/include/net/ip.h b/include/net/ip.h
> index c3fffaa92d6e..acec504c469a 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -76,6 +76,7 @@ struct ipcm_cookie {
>  	__be32			addr;
>  	int			oif;
>  	struct ip_options_rcu	*opt;
> +	__u8			protocol;
>  	__u8			ttl;
>  	__s16			tos;
>  	char			priority;
> @@ -96,6 +97,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
>  	ipcm->sockc.tsflags = inet->sk.sk_tsflags;
>  	ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
>  	ipcm->addr = inet->inet_saddr;
> +	ipcm->protocol = inet->inet_num;
>  }
>  
>  #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
> index 4b7f2df66b99..e682ab628dfa 100644
> --- a/include/uapi/linux/in.h
> +++ b/include/uapi/linux/in.h
> @@ -163,6 +163,7 @@ struct in_addr {
>  #define IP_MULTICAST_ALL		49
>  #define IP_UNICAST_IF			50
>  #define IP_LOCAL_PORT_RANGE		51
> +#define IP_PROTOCOL			52
>  
>  #define MCAST_EXCLUDE	0
>  #define MCAST_INCLUDE	1
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index b511ff0adc0a..ec0fbe874426 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -317,7 +317,17 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
>  			ipc->tos = val;
>  			ipc->priority = rt_tos2priority(ipc->tos);
>  			break;
> -
> +		case IP_PROTOCOL:
> +			if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
> +				val = *(int *)CMSG_DATA(cmsg);
> +			else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
> +				val = *(u8 *)CMSG_DATA(cmsg);

AFAICS the 'dual' u8 support for IP_TOS has been introduce to cope with
asymmetry WRT recvmsg(). Here we don't have (yet) the recvmsg counter-
part, and if/when that will be added we can use the correct data type.

I think we are better off supporting only int, as e.g. IP_TTL does.

Side note, the above code could be factored out in an helper to be used
both for IP_PROTOCOL and IP_TTL (possibly in a net-next patch).

Thanks!

Paolo
diff mbox series

Patch

diff --git a/include/net/ip.h b/include/net/ip.h
index c3fffaa92d6e..acec504c469a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -76,6 +76,7 @@  struct ipcm_cookie {
 	__be32			addr;
 	int			oif;
 	struct ip_options_rcu	*opt;
+	__u8			protocol;
 	__u8			ttl;
 	__s16			tos;
 	char			priority;
@@ -96,6 +97,7 @@  static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
 	ipcm->sockc.tsflags = inet->sk.sk_tsflags;
 	ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
 	ipcm->addr = inet->inet_saddr;
+	ipcm->protocol = inet->inet_num;
 }
 
 #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb))
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 4b7f2df66b99..e682ab628dfa 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -163,6 +163,7 @@  struct in_addr {
 #define IP_MULTICAST_ALL		49
 #define IP_UNICAST_IF			50
 #define IP_LOCAL_PORT_RANGE		51
+#define IP_PROTOCOL			52
 
 #define MCAST_EXCLUDE	0
 #define MCAST_INCLUDE	1
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b511ff0adc0a..ec0fbe874426 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -317,7 +317,17 @@  int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
 			ipc->tos = val;
 			ipc->priority = rt_tos2priority(ipc->tos);
 			break;
-
+		case IP_PROTOCOL:
+			if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
+				val = *(int *)CMSG_DATA(cmsg);
+			else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
+				val = *(u8 *)CMSG_DATA(cmsg);
+			else
+				return -EINVAL;
+			if (val < 1 || val > 255)
+				return -EINVAL;
+			ipc->protocol = val;
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -1761,6 +1771,9 @@  int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_LOCAL_PORT_RANGE:
 		val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
 		break;
+	case IP_PROTOCOL:
+		val = inet_sk(sk)->inet_num;
+		break;
 	default:
 		sockopt_release_sock(sk);
 		return -ENOPROTOOPT;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ff712bf2a98d..eadf1c9ef7e4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -532,6 +532,9 @@  static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	}
 
 	ipcm_init_sk(&ipc, inet);
+	/* Keep backward compat */
+	if (hdrincl)
+		ipc.protocol = IPPROTO_RAW;
 
 	if (msg->msg_controllen) {
 		err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -599,7 +602,7 @@  static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 	flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
 			   RT_SCOPE_UNIVERSE,
-			   hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+			   hdrincl ? ipc.protocol : sk->sk_protocol,
 			   inet_sk_flowi_flags(sk) |
 			    (hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
 			   daddr, saddr, 0, 0, sk->sk_uid);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 7d0adb612bdd..44ee7a2e72ac 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -793,7 +793,8 @@  static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 		if (!proto)
 			proto = inet->inet_num;
-		else if (proto != inet->inet_num)
+		else if (proto != inet->inet_num &&
+			 inet->inet_num != IPPROTO_RAW)
 			return -EINVAL;
 
 		if (proto > 255)