diff mbox series

[net-next,6/6] icmp: add response to RFC 8335 PROBE messages

Message ID 403b12364707f6e579b91927799c505867336bb3.1607050389.git.andreas.a.roeseler@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series add support for RFC 8335 PROBE | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 3 this patch: 18
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: networking block comments don't use an empty /* line, use /* Comment...
netdev/build_allmodconfig_warn fail Errors and warnings before: 3 this patch: 18
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Andreas Roeseler Dec. 4, 2020, 3:17 a.m. UTC
Modify the icmp_rcv function to check for PROBE messages and call
icmp_echo if a PROBE request is detected.

Modify the existing icmp_echo function to respond to both ping and PROBE
requests.

This was tested using a custom modification of the iputils package and
wireshark. It supports IPV4 probing by name, ifindex, and probing by both IPV4 and IPV6
addresses. It currently does not support responding to probes off the proxy node
(See RFC 8335 Section 2). 

Signed-off-by: Andreas Roeseler <andreas.a.roeseler@gmail.com>
---
 net/ipv4/icmp.c | 135 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 125 insertions(+), 10 deletions(-)

Comments

David Ahern Dec. 5, 2020, 5:44 a.m. UTC | #1
On 12/3/20 8:17 PM, Andreas Roeseler wrote:
> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
> index 005faea415a4..313061b60387 100644
> --- a/net/ipv4/icmp.c
> +++ b/net/ipv4/icmp.c
> @@ -984,20 +984,121 @@ static bool icmp_redirect(struct sk_buff *skb)
>  static bool icmp_echo(struct sk_buff *skb)
>  {
>  	struct net *net;
> +	struct icmp_bxm icmp_param;
> +	struct net_device *dev;
> +	struct net_device *target_dev;
> +	struct in_ifaddr *ifaddr;
> +	struct inet6_ifaddr *inet6_ifaddr;
> +	struct list_head *position;
> +	struct icmp_extobj_hdr *extobj_hdr;
> +	struct icmp_ext_ctype3_hdr *ctype3_hdr;
> +	__u8 status;

networking coding style is reverse xmas tree — i.e., longest to shortest.


>  
>  	net = dev_net(skb_dst(skb)->dev);
> -	if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
> -		struct icmp_bxm icmp_param;
> +	/* should there be an ICMP stat for ignored echos? */
> +	if (net->ipv4.sysctl_icmp_echo_ignore_all)
> +		return true;
> +
> +	icmp_param.data.icmph		= *icmp_hdr(skb);
> +	icmp_param.skb			= skb;
> +	icmp_param.offset		= 0;
> +	icmp_param.data_len		= skb->len;
> +	icmp_param.head_len		= sizeof(struct icmphdr);
>  
> -		icmp_param.data.icmph	   = *icmp_hdr(skb);
> +	if (icmp_param.data.icmph.type == ICMP_ECHO) {
>  		icmp_param.data.icmph.type = ICMP_ECHOREPLY;
> -		icmp_param.skb		   = skb;
> -		icmp_param.offset	   = 0;
> -		icmp_param.data_len	   = skb->len;
> -		icmp_param.head_len	   = sizeof(struct icmphdr);
> -		icmp_reply(&icmp_param, skb);
> +		goto send_reply;
>  	}
> -	/* should there be an ICMP stat for ignored echos? */
> +	if (!net->ipv4.sysctl_icmp_echo_enable_probe)
> +		return true;
> +	/* We currently do not support probing off the proxy node */
> +	if ((ntohs(icmp_param.data.icmph.un.echo.sequence) & 1) == 0)
> +		return true;
> +
> +	icmp_param.data.icmph.type = ICMP_EXT_ECHOREPLY;
> +	icmp_param.data.icmph.un.echo.sequence &= htons(0xFF00);
> +	extobj_hdr = (struct icmp_extobj_hdr *)(skb->data + sizeof(struct icmp_ext_hdr));
> +	ctype3_hdr = (struct icmp_ext_ctype3_hdr *)(extobj_hdr + 1);
> +	status = 0;
> +	target_dev = NULL;
> +	read_lock(&dev_base_lock);
> +	for_each_netdev(net, dev) {

for_each_netdev needs to be replaced by an appropriate lookup.


> +		switch (extobj_hdr->class_type) {
> +		case CTYPE_NAME:
> +			if (strcmp(dev->name, (char *)(extobj_hdr + 1)) == 0)
> +				goto found_matching_interface;
> +			break;
> +		case CTYPE_INDEX:
> +			if (ntohl(*((uint32_t *)(extobj_hdr + 1))) ==
> +				dev->ifindex)
> +				goto found_matching_interface;
> +			break;
> +		case CTYPE_ADDR:

1. In general, a name lookup is done by __dev_get_by_name /
dev_get_by_name_rcu / dev_get_by_name based on locking. rtnl is not held
in the datapath. Depending on need, you can hold the rcu lock
(rcu_read_lock) and use dev_get_by_name_rcu but you need to make sure
all references to the dev are used before calling rcu_read_unlock.

2. Similarly, lookup by index is done using __dev_get_by_index /
dev_get_by_index_rcu / dev_get_by_index.

3. Address to device lookup is done using something like __ip_dev_find
(IPv4) or ipv6_dev_find (IPv6) - again check the locking needs.


> +			switch (ntohs(ctype3_hdr->afi)) {
> +			/* IPV4 address */
> +			case 1:
> +				ifaddr = dev->ip_ptr->ifa_list;
> +				while (ifaddr) {
> +					if (memcmp(&ifaddr->ifa_address,
> +						   (ctype3_hdr + 1),
> +						   sizeof(ifaddr->ifa_address)) == 0)
> +						goto found_matching_interface;
> +					ifaddr = ifaddr->ifa_next;
> +				}
> +				break;
> +			/* IPV6 address */
> +			case 2:

No magic numbers - if AFI enums do not exist, add them.
diff mbox series

Patch

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 005faea415a4..313061b60387 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -984,20 +984,121 @@  static bool icmp_redirect(struct sk_buff *skb)
 static bool icmp_echo(struct sk_buff *skb)
 {
 	struct net *net;
+	struct icmp_bxm icmp_param;
+	struct net_device *dev;
+	struct net_device *target_dev;
+	struct in_ifaddr *ifaddr;
+	struct inet6_ifaddr *inet6_ifaddr;
+	struct list_head *position;
+	struct icmp_extobj_hdr *extobj_hdr;
+	struct icmp_ext_ctype3_hdr *ctype3_hdr;
+	__u8 status;
 
 	net = dev_net(skb_dst(skb)->dev);
-	if (!net->ipv4.sysctl_icmp_echo_ignore_all) {
-		struct icmp_bxm icmp_param;
+	/* should there be an ICMP stat for ignored echos? */
+	if (net->ipv4.sysctl_icmp_echo_ignore_all)
+		return true;
+
+	icmp_param.data.icmph		= *icmp_hdr(skb);
+	icmp_param.skb			= skb;
+	icmp_param.offset		= 0;
+	icmp_param.data_len		= skb->len;
+	icmp_param.head_len		= sizeof(struct icmphdr);
 
-		icmp_param.data.icmph	   = *icmp_hdr(skb);
+	if (icmp_param.data.icmph.type == ICMP_ECHO) {
 		icmp_param.data.icmph.type = ICMP_ECHOREPLY;
-		icmp_param.skb		   = skb;
-		icmp_param.offset	   = 0;
-		icmp_param.data_len	   = skb->len;
-		icmp_param.head_len	   = sizeof(struct icmphdr);
-		icmp_reply(&icmp_param, skb);
+		goto send_reply;
 	}
-	/* should there be an ICMP stat for ignored echos? */
+	if (!net->ipv4.sysctl_icmp_echo_enable_probe)
+		return true;
+	/* We currently do not support probing off the proxy node */
+	if ((ntohs(icmp_param.data.icmph.un.echo.sequence) & 1) == 0)
+		return true;
+
+	icmp_param.data.icmph.type = ICMP_EXT_ECHOREPLY;
+	icmp_param.data.icmph.un.echo.sequence &= htons(0xFF00);
+	extobj_hdr = (struct icmp_extobj_hdr *)(skb->data + sizeof(struct icmp_ext_hdr));
+	ctype3_hdr = (struct icmp_ext_ctype3_hdr *)(extobj_hdr + 1);
+	status = 0;
+	target_dev = NULL;
+	read_lock(&dev_base_lock);
+	for_each_netdev(net, dev) {
+		switch (extobj_hdr->class_type) {
+		case CTYPE_NAME:
+			if (strcmp(dev->name, (char *)(extobj_hdr + 1)) == 0)
+				goto found_matching_interface;
+			break;
+		case CTYPE_INDEX:
+			if (ntohl(*((uint32_t *)(extobj_hdr + 1))) ==
+				dev->ifindex)
+				goto found_matching_interface;
+			break;
+		case CTYPE_ADDR:
+			switch (ntohs(ctype3_hdr->afi)) {
+			/* IPV4 address */
+			case 1:
+				ifaddr = dev->ip_ptr->ifa_list;
+				while (ifaddr) {
+					if (memcmp(&ifaddr->ifa_address,
+						   (ctype3_hdr + 1),
+						   sizeof(ifaddr->ifa_address)) == 0)
+						goto found_matching_interface;
+					ifaddr = ifaddr->ifa_next;
+				}
+				break;
+			/* IPV6 address */
+			case 2:
+				list_for_each(position,
+					      &dev->ip6_ptr->addr_list) {
+					inet6_ifaddr = list_entry(position,
+								  struct inet6_ifaddr,
+								  if_list);
+					if (memcmp(&inet6_ifaddr->addr.in6_u,
+						   (ctype3_hdr + 1),
+						   sizeof(inet6_ifaddr->addr.in6_u)) == 0)
+						goto found_matching_interface;
+				}
+				break;
+			default:
+				icmp_param.data.icmph.code = ICMP_EXT_MAL_QUERY;
+				goto unlock_dev;
+			}
+			break;
+		default:
+			icmp_param.data.icmph.code = ICMP_EXT_MAL_QUERY;
+			goto unlock_dev;
+		}
+		continue;
+found_matching_interface:
+		if (target_dev) {
+			icmp_param.data.icmph.code = ICMP_EXT_MULT_IFS;
+			goto unlock_dev;
+		}
+		target_dev = dev;
+	}
+	if (!target_dev) {
+		icmp_param.data.icmph.code = ICMP_EXT_NO_IF;
+		goto unlock_dev;
+	}
+
+	/* RFC 8335: 3 the last 8 bits of the Extended Echo Reply Message
+	 *  are laid out as follows:
+	 *	+-+-+-+-+-+-+-+-+
+	 *	|State|Res|A|4|6|
+	 *	+-+-+-+-+-+-+-+-+
+	 */
+	if (target_dev->flags & IFF_UP)
+		status |= EXT_ECHOREPLY_ACTIVE;
+	if (target_dev->ip_ptr->ifa_list)
+		status |= EXT_ECHOREPLY_IPV4;
+	if (!list_empty(&target_dev->ip6_ptr->addr_list))
+		status |= EXT_ECHOREPLY_IPV6;
+
+	icmp_param.data.icmph.un.echo.sequence |= htons(status);
+unlock_dev:
+	read_unlock(&dev_base_lock);
+send_reply:
+	icmp_reply(&icmp_param, skb);
 	return true;
 }
 
@@ -1087,6 +1188,13 @@  int icmp_rcv(struct sk_buff *skb)
 	icmph = icmp_hdr(skb);
 
 	ICMPMSGIN_INC_STATS(net, icmph->type);
+
+	/*
+	 *	Check for ICMP Extended Echo (PROBE) messages
+	 */
+	if (icmph->type == ICMP_EXT_ECHO || icmph->type == ICMPV6_EXT_ECHO_REQUEST)
+		goto probe;
+
 	/*
 	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
 	 *
@@ -1096,7 +1204,6 @@  int icmp_rcv(struct sk_buff *skb)
 	if (icmph->type > NR_ICMP_TYPES)
 		goto error;
 
-
 	/*
 	 *	Parse the ICMP message
 	 */
@@ -1123,6 +1230,7 @@  int icmp_rcv(struct sk_buff *skb)
 
 	success = icmp_pointers[icmph->type].handler(skb);
 
+success_check:
 	if (success)  {
 		consume_skb(skb);
 		return NET_RX_SUCCESS;
@@ -1136,6 +1244,13 @@  int icmp_rcv(struct sk_buff *skb)
 error:
 	__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 	goto drop;
+probe:
+	/*
+	 * We can't use icmp_pointers[].handler() because the codes for PROBE
+	 *   messages are 42 or 160
+	 */
+	success = icmp_echo(skb);
+	goto success_check;
 }
 
 static bool ip_icmp_error_rfc4884_validate(const struct sk_buff *skb, int off)