diff mbox series

[net-next,v2] ipv6: add IFLA_INET6_RA_MTU to expose mtu value in the RA message

Message ID 20210731015230.11589-1-rocco.yue@mediatek.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v2] ipv6: add IFLA_INET6_RA_MTU to expose mtu value in the RA message | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 5 maintainers not CCed: thomas.karlsson@paneda.se matthias.bgg@gmail.com jonas@norrbonn.se laforge@gnumonks.org pbshelar@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 4765 this patch: 4765
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 85 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 4827 this patch: 4827
netdev/header_inline success Link

Commit Message

Rocco Yue July 31, 2021, 1:52 a.m. UTC
The kernel provides a "/proc/sys/net/ipv6/conf/<iface>/mtu"
file, which can temporarily record the mtu value of the last
received RA message when the RA mtu value is lower than the
interface mtu, but this proc has following limitations:

(1) when the interface mtu (/sys/class/net/<iface>/mtu) is
updeated, mtu6 (/proc/sys/net/ipv6/conf/<iface>/mtu) will
be updated to the value of interface mtu;
(2) mtu6 (/proc/sys/net/ipv6/conf/<iface>/mtu) only affect
ipv6 connection, and not affect ipv4.

Therefore, when the mtu option is carried in the RA message,
there will be a problem that the user sometimes cannot obtain
RA mtu value correctly by reading mtu6.

After this patch set, if a RA message carries the mtu option,
you can send a netlink msg which nlmsg_type is RTM_GETLINK,
and then by parsing the attribute of IFLA_INET6_RA_MTU to
get the mtu value carried in the RA message received on the
inet6 device.

In this way, if the MTU values that the device receives from
the network in the PCO IPv4 and the RA IPv6 procedures are
different, the user space process can read ra_mtu to get
the mtu value carried in the RA message without worrying
about the issue of ipv4 being stuck due to the late arrival
of RA message. After comparing the value of ra_mtu and ipv4
mtu, then the device can use the lower MTU value for both
IPv4 and IPv6.

Signed-off-by: Rocco Yue <rocco.yue@mediatek.com>
---
 include/net/if_inet6.h             | 2 ++
 include/uapi/linux/if_link.h       | 1 +
 net/ipv6/addrconf.c                | 5 +++++
 net/ipv6/ndisc.c                   | 5 +++++
 tools/include/uapi/linux/if_link.h | 1 +
 5 files changed, 14 insertions(+)

Comments

David Ahern July 31, 2021, 5:17 p.m. UTC | #1
On 7/30/21 7:52 PM, Rocco Yue wrote:
> The kernel provides a "/proc/sys/net/ipv6/conf/<iface>/mtu"
> file, which can temporarily record the mtu value of the last
> received RA message when the RA mtu value is lower than the
> interface mtu, but this proc has following limitations:
> 
> (1) when the interface mtu (/sys/class/net/<iface>/mtu) is
> updeated, mtu6 (/proc/sys/net/ipv6/conf/<iface>/mtu) will
> be updated to the value of interface mtu;
> (2) mtu6 (/proc/sys/net/ipv6/conf/<iface>/mtu) only affect
> ipv6 connection, and not affect ipv4.
> 
> Therefore, when the mtu option is carried in the RA message,
> there will be a problem that the user sometimes cannot obtain
> RA mtu value correctly by reading mtu6.
> 
> After this patch set, if a RA message carries the mtu option,
> you can send a netlink msg which nlmsg_type is RTM_GETLINK,
> and then by parsing the attribute of IFLA_INET6_RA_MTU to
> get the mtu value carried in the RA message received on the
> inet6 device.
> 
> In this way, if the MTU values that the device receives from
> the network in the PCO IPv4 and the RA IPv6 procedures are
> different, the user space process can read ra_mtu to get
> the mtu value carried in the RA message without worrying
> about the issue of ipv4 being stuck due to the late arrival
> of RA message. After comparing the value of ra_mtu and ipv4
> mtu, then the device can use the lower MTU value for both
> IPv4 and IPv6.

you are storing the value and sending to userspace but never using it
when sending a message. What's the pointing of processing the MTU in the
RA if you are not going to use it to control message size?

> 
> Signed-off-by: Rocco Yue <rocco.yue@mediatek.com>
> ---
>  include/net/if_inet6.h             | 2 ++
>  include/uapi/linux/if_link.h       | 1 +
>  net/ipv6/addrconf.c                | 5 +++++
>  net/ipv6/ndisc.c                   | 5 +++++
>  tools/include/uapi/linux/if_link.h | 1 +
>  5 files changed, 14 insertions(+)
> 
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 4882e81514b6..fcd1ae29f154 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -417,6 +417,7 @@ enum {
>  	IFLA_INET6_ICMP6STATS,	/* statistics (icmpv6)		*/
>  	IFLA_INET6_TOKEN,	/* device token			*/
>  	IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
> +	IFLA_INET6_RA_MTU,	/* mtu carried in the RA message  */
>  	__IFLA_INET6_MAX
>  };
>  
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index 3bf685fe64b9..98eeaba9f86c 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -5537,6 +5537,7 @@ static inline size_t inet6_ifla6_size(void)
>  	     + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
>  	     + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */
>  	     + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */
> +	     + nla_total_size(4) /* IFLA_INET6_RA_MTU */
>  	     + 0;
>  }
>  
> @@ -5645,6 +5646,9 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
>  	if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
>  		goto nla_put_failure;
>  
> +	if (nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu))
> +		goto nla_put_failure;
> +
>  	return 0;
>  
>  nla_put_failure:
> @@ -5761,6 +5765,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token,
>  static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = {
>  	[IFLA_INET6_ADDR_GEN_MODE]	= { .type = NLA_U8 },
>  	[IFLA_INET6_TOKEN]		= { .len = sizeof(struct in6_addr) },
> +	[IFLA_INET6_RA_MTU]		= { .type = NLA_U32 },
>  };
>  
>  static int check_addr_gen_mode(int mode)

Its value is derived from an RA not set by userspace, so set the type to
NLA_REJECT so that inet6_validate_link_af will reject messages that have
IFLA_INET6_RA_MTU set. You can set "reject_message" in the policy to
return a message that "IFLA_INET6_RA_MTU can not be set".
Rocco Yue Aug. 2, 2021, 3:19 a.m. UTC | #2
On Sat, 2021-07-31 at 11:17 -0600, David Ahern wrote:
On 7/30/21 7:52 PM, Rocco Yue wrote:
>> In this way, if the MTU values that the device receives from
>> the network in the PCO IPv4 and the RA IPv6 procedures are
>> different, the user space process can read ra_mtu to get
>> the mtu value carried in the RA message without worrying
>> about the issue of ipv4 being stuck due to the late arrival
>> of RA message. After comparing the value of ra_mtu and ipv4
>> mtu, then the device can use the lower MTU value for both
>> IPv4 and IPv6.
> 
> you are storing the value and sending to userspace but never using it
> when sending a message. What's the pointing of processing the MTU in the
> RA if you are not going to use it to control message size?

Hi David,

In the requirement of mobile operator at&t in 2021:
AT&T <CDR-CDS-116> Prioritize Lower MTU value:
If the MTU values that the device receives from the network in the PCO
IPv4 <CDR-CDS-110> and the RA IPv6 <CDR-CDS-112> procedures are different,
then the device shall use the lower MTU value for both IPv4 and IPv6.

And in the 3GPP 23.060:
The PDP PDUs shall be routed and transferred between the MS and the GGSN
or P-GW as N-PDUs. In order to avoid IP layer fragmentation between the
MS and the GGSN or P-GW, the link MTU size in the MS should be set to the
value provided by the network as a part of the IP configuration. This
applies to both IPv6 and IPv4.

That means user needs to be able to correctly read the mtu value carried
in the RA message so that user can correctly compare PCO ipv4 mtu and
RA ipv6 mtu.

>> @@ -5761,6 +5765,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token,
>>  static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = {
>>  	[IFLA_INET6_ADDR_GEN_MODE]	= { .type = NLA_U8 },
>>  	[IFLA_INET6_TOKEN]		= { .len = sizeof(struct in6_addr) },
>> +	[IFLA_INET6_RA_MTU]		= { .type = NLA_U32 },
>>  };
>>  
>>  static int check_addr_gen_mode(int mode)
> 
> Its value is derived from an RA not set by userspace, so set the type to
> NLA_REJECT so that inet6_validate_link_af will reject messages that have
> IFLA_INET6_RA_MTU set. You can set "reject_message" in the policy to
> return a message that "IFLA_INET6_RA_MTU can not be set".

will do.

Thanks
Rocco
Rocco Yue Aug. 2, 2021, 12:40 p.m. UTC | #3
On Sat, 2021-07-31 at 11:17 -0600, David Ahern wrote:
On 7/30/21 7:52 PM, Rocco Yue wrote:

> IFLA_INET6_RA_MTU set. You can set "reject_message" in the policy to
> return a message that "IFLA_INET6_RA_MTU can not be set".

Hi David,

Regarding setting "reject_message" in the policy, after reviewing
the code, I fell that it is unnecessary, because the cost of
implementing it seems to be a bit high, which requires modifying
the function interface. The reasons is as follows:

The parameter "struct netlink_ext_ack *extack" is not exposed in the
function inet6_validate_link_af(), and the last argument when calling
nla_parse_nested_deprecated() is NULL, which makes the user space not
notified even if reject_message is set.

static int inet6_validate_link_af(...)
{
...
	err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla,
					  inet6_af_policy, NULL);
...
}


Only when extack is not NULL, reject_message is valid.

static int validate_nla(...)
{
...
	switch (pt->type) {
	case NLA_REJECT:
		if (extack && pt->reject_message) {
			NL_SET_BAD_ATTR(extack, nla);
			extack->_msg = pt->reject_message;
			return -EINVAL;
		}
		err = -EINVAL;
		goto out_err;
...
}


Thanks
Rocco
David Ahern Aug. 2, 2021, 1:35 p.m. UTC | #4
On 8/2/21 6:40 AM, Rocco Yue wrote:
> On Sat, 2021-07-31 at 11:17 -0600, David Ahern wrote:
> On 7/30/21 7:52 PM, Rocco Yue wrote:
> 
>> IFLA_INET6_RA_MTU set. You can set "reject_message" in the policy to
>> return a message that "IFLA_INET6_RA_MTU can not be set".
> 
> Hi David,
> 
> Regarding setting "reject_message" in the policy, after reviewing
> the code, I fell that it is unnecessary, because the cost of
> implementing it seems to be a bit high, which requires modifying
> the function interface. The reasons is as follows:

The policy can be setup now to do the right thing once the extack
argument is available.

do_setlink() has an extack argument. It calls validate_linkmsg which
calls validate_link_af meaning support can be added in a single patch.
If you decide to do it, then it should be a separate patch preceding
this one.
David Ahern Aug. 2, 2021, 1:37 p.m. UTC | #5
On 8/1/21 9:19 PM, Rocco Yue wrote:
> On Sat, 2021-07-31 at 11:17 -0600, David Ahern wrote:
> On 7/30/21 7:52 PM, Rocco Yue wrote:
>>> In this way, if the MTU values that the device receives from
>>> the network in the PCO IPv4 and the RA IPv6 procedures are
>>> different, the user space process can read ra_mtu to get
>>> the mtu value carried in the RA message without worrying
>>> about the issue of ipv4 being stuck due to the late arrival
>>> of RA message. After comparing the value of ra_mtu and ipv4
>>> mtu, then the device can use the lower MTU value for both
>>> IPv4 and IPv6.
>>
>> you are storing the value and sending to userspace but never using it
>> when sending a message. What's the pointing of processing the MTU in the
>> RA if you are not going to use it to control message size?
> 
> Hi David,
> 
> In the requirement of mobile operator at&t in 2021:
> AT&T <CDR-CDS-116> Prioritize Lower MTU value:
> If the MTU values that the device receives from the network in the PCO
> IPv4 <CDR-CDS-110> and the RA IPv6 <CDR-CDS-112> procedures are different,
> then the device shall use the lower MTU value for both IPv4 and IPv6.
> 
> And in the 3GPP 23.060:
> The PDP PDUs shall be routed and transferred between the MS and the GGSN
> or P-GW as N-PDUs. In order to avoid IP layer fragmentation between the
> MS and the GGSN or P-GW, the link MTU size in the MS should be set to the
> value provided by the network as a part of the IP configuration. This
> applies to both IPv6 and IPv4.
> 
> That means user needs to be able to correctly read the mtu value carried
> in the RA message so that user can correctly compare PCO ipv4 mtu and
> RA ipv6 mtu.
> 

Then userspace should get a link notification when ra_mtu is set so it
does not have to poll.
Rocco Yue Aug. 3, 2021, 11:57 a.m. UTC | #6
On Mon, 2021-08-02 at 07:35 -0600, David Ahern wrote:
> On 8/2/21 6:40 AM, Rocco Yue wrote:
>> 
>> Regarding setting "reject_message" in the policy, after reviewing
>> the code, I fell that it is unnecessary, because the cost of
>> implementing it seems to be a bit high, which requires modifying
>> the function interface. The reasons is as follows:
> 
> The policy can be setup now to do the right thing once the extack
> argument is available.
> 
> do_setlink() has an extack argument. It calls validate_linkmsg which
> calls validate_link_af meaning support can be added in a single patch.
> If you decide to do it, then it should be a separate patch preceding
> this one.
> 

Hi David,

Thanks for your advice,
I will send a separate patch to add extack arg firstly.

> Then userspace should get a link notification when ra_mtu is set so it
> does not have to poll.

It make sense, I will do it.

Thanks
Rocco
diff mbox series

Patch

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 71bb4cc4d05d..5cc260dead33 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -213,6 +213,8 @@  struct inet6_dev {
 
 	unsigned long		tstamp; /* ipv6InterfaceTable update timestamp */
 	struct rcu_head		rcu;
+
+	unsigned int		ra_mtu;
 };
 
 static inline void ipv6_eth_mc_map(const struct in6_addr *addr, char *buf)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 4882e81514b6..fcd1ae29f154 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -417,6 +417,7 @@  enum {
 	IFLA_INET6_ICMP6STATS,	/* statistics (icmpv6)		*/
 	IFLA_INET6_TOKEN,	/* device token			*/
 	IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+	IFLA_INET6_RA_MTU,	/* mtu carried in the RA message  */
 	__IFLA_INET6_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3bf685fe64b9..98eeaba9f86c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5537,6 +5537,7 @@  static inline size_t inet6_ifla6_size(void)
 	     + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
 	     + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */
 	     + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */
+	     + nla_total_size(4) /* IFLA_INET6_RA_MTU */
 	     + 0;
 }
 
@@ -5645,6 +5646,9 @@  static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
 	if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
@@ -5761,6 +5765,7 @@  static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token,
 static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = {
 	[IFLA_INET6_ADDR_GEN_MODE]	= { .type = NLA_U8 },
 	[IFLA_INET6_TOKEN]		= { .len = sizeof(struct in6_addr) },
+	[IFLA_INET6_RA_MTU]		= { .type = NLA_U32 },
 };
 
 static int check_addr_gen_mode(int mode)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c467c6419893..9c6417a8e2b7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1496,6 +1496,11 @@  static void ndisc_router_discovery(struct sk_buff *skb)
 		memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
 		mtu = ntohl(n);
 
+		if (in6_dev->ra_mtu != mtu) {
+			in6_dev->ra_mtu = mtu;
+			ND_PRINTK(2, info, "update ra_mtu to %d\n", in6_dev->ra_mtu);
+		}
+
 		if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
 			ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
 		} else if (in6_dev->cnf.mtu6 != mtu) {
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index d208b2af697f..303085cdc271 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -230,6 +230,7 @@  enum {
 	IFLA_INET6_ICMP6STATS,	/* statistics (icmpv6)		*/
 	IFLA_INET6_TOKEN,	/* device token			*/
 	IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+	IFLA_INET6_RA_MTU,	/* mtu carried in the RA message  */
 	__IFLA_INET6_MAX
 };