diff mbox series

[ipsec-next,v10,07/16] xfrm: iptfs: add new iptfs xfrm mode impl

Message ID 20240824022054.3788149-8-chopps@chopps.org (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series Add IP-TFS mode to xfrm | expand

Checks

Context Check Description
netdev/series_format fail Series does not have a cover letter; Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 16 this patch: 16
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 4 maintainers not CCed: pabeni@redhat.com kuba@kernel.org edumazet@google.com herbert@gondor.apana.org.au
netdev/build_clang success Errors and warnings before: 17 this patch: 17
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 22 this patch: 22
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 90 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc fail Errors and warnings before: 0 this patch: 1
netdev/source_inline success Was 0 now: 0

Commit Message

Christian Hopps Aug. 24, 2024, 2:20 a.m. UTC
From: Christian Hopps <chopps@labn.net>

Add a new xfrm mode implementing AggFrag/IP-TFS from RFC9347.

This utilizes the new xfrm_mode_cbs to implement demand-driven IP-TFS
functionality. This functionality can be used to increase bandwidth
utilization through small packet aggregation, as well as help solve PMTU
issues through it's efficient use of fragmentation.

  Link: https://www.rfc-editor.org/rfc/rfc9347.txt

Multiple commits follow to build the functionality into xfrm_iptfs.c

Signed-off-by: Christian Hopps <chopps@labn.net>
---
 net/xfrm/Makefile     |   1 +
 net/xfrm/xfrm_iptfs.c | 210 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 211 insertions(+)
 create mode 100644 net/xfrm/xfrm_iptfs.c

Comments

Antony Antony Aug. 29, 2024, 11:43 a.m. UTC | #1
On Fri, Aug 23, 2024 at 10:20:45PM -0400, Christian Hopps wrote:
> From: Christian Hopps <chopps@labn.net>
> 
> Add a new xfrm mode implementing AggFrag/IP-TFS from RFC9347.
> 
> This utilizes the new xfrm_mode_cbs to implement demand-driven IP-TFS
> functionality. This functionality can be used to increase bandwidth
> utilization through small packet aggregation, as well as help solve PMTU
> issues through it's efficient use of fragmentation.
> 
>   Link: https://www.rfc-editor.org/rfc/rfc9347.txt
> 
> Multiple commits follow to build the functionality into xfrm_iptfs.c
> 
> Signed-off-by: Christian Hopps <chopps@labn.net>
> ---
>  net/xfrm/Makefile     |   1 +
>  net/xfrm/xfrm_iptfs.c | 210 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 211 insertions(+)
>  create mode 100644 net/xfrm/xfrm_iptfs.c
> 
> diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
> index 512e0b2f8514..5a1787587cb3 100644
> --- a/net/xfrm/Makefile
> +++ b/net/xfrm/Makefile
> @@ -21,5 +21,6 @@ obj-$(CONFIG_XFRM_USER) += xfrm_user.o
>  obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
>  obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
>  obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
> +obj-$(CONFIG_XFRM_IPTFS) += xfrm_iptfs.o
>  obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
>  obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
> diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c
> new file mode 100644
> index 000000000000..201406175d17
> --- /dev/null
> +++ b/net/xfrm/xfrm_iptfs.c
> @@ -0,0 +1,210 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* xfrm_iptfs: IPTFS encapsulation support
> + *
> + * April 21 2022, Christian Hopps <chopps@labn.net>
> + *
> + * Copyright (c) 2022, LabN Consulting, L.L.C.
> + *
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/icmpv6.h>
> +#include <net/gro.h>
> +#include <net/icmp.h>
> +#include <net/ip6_route.h>
> +#include <net/inet_ecn.h>
> +#include <net/xfrm.h>
> +
> +#include <crypto/aead.h>
> +
> +#include "xfrm_inout.h"
> +
> +/**
> + * struct xfrm_iptfs_config - configuration for the IPTFS tunnel.
> + * @pkt_size: size of the outer IP packet. 0 to use interface and MTU discovery,
> + *	otherwise the user specified value.
> + */
> +struct xfrm_iptfs_config {
> +	u32 pkt_size;	    /* outer_packet_size or 0 */
> +};
> +
> +/**
> + * struct xfrm_iptfs_data - mode specific xfrm state.
> + * @cfg: IPTFS tunnel config.
> + * @x: owning SA (xfrm_state).
> + * @payload_mtu: max payload size.
> + */
> +struct xfrm_iptfs_data {
> +	struct xfrm_iptfs_config cfg;
> +
> +	/* Ingress User Input */
> +	struct xfrm_state *x;	    /* owning state */
> +	u32 payload_mtu;	    /* max payload size */
> +};
> +
> +/* ========================== */
> +/* State Management Functions */
> +/* ========================== */
> +
> +/**
> + * iptfs_get_inner_mtu() - return inner MTU with no fragmentation.
> + * @x: xfrm state.
> + * @outer_mtu: the outer mtu
> + */
> +static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu)
> +{
> +	struct crypto_aead *aead;
> +	u32 blksize;
> +
> +	aead = x->data;
> +	blksize = ALIGN(crypto_aead_blocksize(aead), 4);
> +	return ((outer_mtu - x->props.header_len - crypto_aead_authsize(aead)) &
> +		~(blksize - 1)) - 2;
> +}
> +
> +/**
> + * iptfs_user_init() - initialize the SA with IPTFS options from netlink.
> + * @net: the net data
> + * @x: xfrm state
> + * @attrs: netlink attributes
> + * @extack: extack return data
> + *
> + * Return: 0 on success or a negative error code on failure
> + */
> +static int iptfs_user_init(struct net *net, struct xfrm_state *x,
> +			   struct nlattr **attrs,
> +			   struct netlink_ext_ack *extack)
> +{
> +	struct xfrm_iptfs_data *xtfs = x->mode_data;
> +	struct xfrm_iptfs_config *xc;
> +
> +	xc = &xtfs->cfg;
> +
> +	if (attrs[XFRMA_IPTFS_PKT_SIZE]) {
> +		xc->pkt_size = nla_get_u32(attrs[XFRMA_IPTFS_PKT_SIZE]);
> +		if (!xc->pkt_size) {
> +			xtfs->payload_mtu = 0;
> +		} else if (xc->pkt_size > x->props.header_len) {
> +			xtfs->payload_mtu = xc->pkt_size - x->props.header_len;
> +		} else {
> +			NL_SET_ERR_MSG(extack,
> +				       "Packet size must be 0 or greater than IPTFS/ESP header length");
> +			return -EINVAL;
> +		}
> +	}
> +	return 0;
> +}
> +
> +static unsigned int iptfs_sa_len(const struct xfrm_state *x)
> +{
> +	struct xfrm_iptfs_data *xtfs = x->mode_data;
> +	struct xfrm_iptfs_config *xc = &xtfs->cfg;
> +	unsigned int l = 0;
> +
> +	if (x->dir == XFRM_SA_DIR_OUT)
> +		l += nla_total_size(sizeof(xc->pkt_size));
> +
> +	return l;
> +}
> +
> +static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb)
> +{
> +	struct xfrm_iptfs_data *xtfs = x->mode_data;
> +	struct xfrm_iptfs_config *xc = &xtfs->cfg;
> +	int ret = 0;
> +
> +	if (x->dir == XFRM_SA_DIR_OUT)
> +		ret = nla_put_u32(skb, XFRMA_IPTFS_PKT_SIZE, xc->pkt_size);
> +
> +	return ret;
> +}
> +
> +static void __iptfs_init_state(struct xfrm_state *x,
> +			       struct xfrm_iptfs_data *xtfs)
> +{
> +	/* Modify type (esp) adjustment values */
> +
> +	if (x->props.family == AF_INET)
> +		x->props.header_len += sizeof(struct iphdr) + sizeof(struct ip_iptfs_hdr);
> +	else if (x->props.family == AF_INET6)
> +		x->props.header_len += sizeof(struct ipv6hdr) + sizeof(struct ip_iptfs_hdr);
> +	x->props.enc_hdr_len = sizeof(struct ip_iptfs_hdr);
> +
> +	/* Always keep a module reference when x->mode_data is set */
> +	__module_get(x->mode_cbs->owner);
> +
> +	x->mode_data = xtfs;
> +	xtfs->x = x;
> +}
> +
> +static int iptfs_clone(struct xfrm_state *x, struct xfrm_state *orig)
> +{
> +	struct xfrm_iptfs_data *xtfs;
> +
> +	xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL);
> +	if (!xtfs)
> +		return -ENOMEM;
> +
> +	__iptfs_init_state(x, xtfs);

I noticed __iptfs_init_state() is called twice during XFRM_MSG_MIGRATE.
This, the first, call does the right thing. However, the second call resets 
the iptfs values to zero.

While testing I noticed clone is not workig as expected. It seems to reset 
values iptfs. See the "ip x s"  out before and after clone.

Here are two "ip x s"  output one before clone and another after clone noice 
iptfs values are 0, while before max-queue-size 10485760

root@east:/testing/pluto/ikev2-mobike-01$ip x s
src 192.1.2.23 dst 192.1.3.33
	proto esp spi 0xcd561999 reqid 16393 mode iptfs
	replay-window 0 flag af-unspec esn
	auth-trunc hmac(sha256) 0xcba08c655b22df167c9bf16ac8005cffbe15e6baab553b8f48ec0056c037c51f 128
	enc cbc(aes) 0xb3702487e95675713e7dfb738cc21c5dd86a666af38cdabcc3705ed30fea92e2
	lastused 2024-08-29 12:33:12
	anti-replay esn context:
	 seq-hi 0x0, seq 0x0, oseq-hi 0x0, oseq 0xb
	 replay_window 0, bitmap-length 0
	dir out
	iptfs-opts dont-frag init-delay 0 max-queue-size 10485760 pkt-size 0
src 192.1.3.33 dst 192.1.2.23
	proto esp spi 0xd9ecf873 reqid 16393 mode iptfs
	replay-window 0 flag af-unspec esn
	auth-trunc hmac(sha256) 0xf841c6643a06186e86a856600e071e2a220450943fdf7b64a8d2f3e3bffd6c62 128
	enc cbc(aes) 0x5ffa993bbc568ecab82e15433b14c03e5da18ca4d216137493d552260bef0be1
	lastused 2024-08-29 12:33:12
	anti-replay esn context:
	 seq-hi 0x0, seq 0xb, oseq-hi 0x0, oseq 0x0
	 replay_window 128, bitmap-length 4
	 00000000 00000000 00000000 000007ff
	dir in
	iptfs-opts drop-time 3 reorder-window 3

After migrate: note iptfs vallues are 0.

root@east:/testing/pluto/ikev2-mobike-01$ip x s
src 192.1.8.22 dst 192.1.2.23
	proto esp spi 0xd9ecf873 reqid 16393 mode iptfs
	replay-window 0 flag af-unspec esn
	auth-trunc hmac(sha256) 0xf841c6643a06186e86a856600e071e2a220450943fdf7b64a8d2f3e3bffd6c62 128
	enc cbc(aes) 0x5ffa993bbc568ecab82e15433b14c03e5da18ca4d216137493d552260bef0be1
	lastused 2024-08-29 12:33:12
	anti-replay esn context:
	 seq-hi 0x0, seq 0xb, oseq-hi 0x0, oseq 0x0
	 replay_window 128, bitmap-length 4
	 00000000 00000000 00000000 000007ff
	dir in
	iptfs-opts drop-time 0 reorder-window 0
src 192.1.2.23 dst 192.1.8.22
	proto esp spi 0xcd561999 reqid 16393 mode iptfs
	replay-window 0 flag af-unspec esn
	auth-trunc hmac(sha256) 0xcba08c655b22df167c9bf16ac8005cffbe15e6baab553b8f48ec0056c037c51f 128
	enc cbc(aes) 0xb3702487e95675713e7dfb738cc21c5dd86a666af38cdabcc3705ed30fea92e2
	lastused 2024-08-29 12:33:12
	anti-replay esn context:
	 seq-hi 0x0, seq 0x0, oseq-hi 0x0, oseq 0xb
	 replay_window 0, bitmap-length 0
	dir out
	iptfs-opts init-delay 0 max-queue-size 0 pkt-size 0

Now running under gdb during a migrate I see __iptfs_init_state() called 
twice.

I got gdb back trace to show the two calls during XFRM_MSG_MIGRATE.

First call __iptfs_init_state() with bt. This is during clone/MIGRATE.

#0  __iptfs_init_state (x=x@entry=0xffff888110a1fc40, xtfs=xtfs@entry=0xffff88810e275000)
    at net/xfrm/xfrm_iptfs.c:2674
#1  0xffffffff81ece552 in iptfs_clone (x=0xffff888110a1fc40, orig=<optimized out>)
    at net/xfrm/xfrm_iptfs.c:2722
#2  0xffffffff81eb65ad in xfrm_state_clone (encap=0xffffffff00000010, orig=0xffff888110a1e040)
    at net/xfrm/xfrm_state.c:1878
#3  xfrm_state_migrate (x=x@entry=0xffff888110a1e040, m=m@entry=0xffffc90001b47400,
    encap=encap@entry=0x0 <fixed_percpu_data>) at net/xfrm/xfrm_state.c:1948
#4  0xffffffff81ea9206 in xfrm_migrate (sel=sel@entry=0xffff88811193ce50, dir=<optimized out>,
    type=type@entry=0 '\000', m=m@entry=0xffffc90001b47400, num_migrate=num_migrate@entry=1,
    k=k@entry=0x0 <fixed_percpu_data>, net=<optimized out>, encap=<optimized out>, if_id=<optimized out>,
    extack=<optimized out>) at net/xfrm/xfrm_policy.c:4652
#5  0xffffffff81ec26de in xfrm_do_migrate (skb=skb@entry=0xffff888109265000, nlh=<optimized out>,
    attrs=attrs@entry=0xffffc90001b47730, extack=<optimized out>) at net/xfrm/xfrm_user.c:3047
#6  0xffffffff81ec3e70 in xfrm_user_rcv_msg (skb=0xffff888109265000, nlh=<optimized out>,
    extack=<optimized out>) at net/xfrm/xfrm_user.c:3389
---
second call to __iptfs_init_state() bt.

#0  __iptfs_init_state (x=x@entry=0xffff888110a1fc40, xtfs=0xffff88810e272000) at net/xfrm/xfrm_iptfs.c:2674
#1  0xffffffff81ece1a4 in iptfs_create_state (x=0xffff888110a1fc40) at net/xfrm/xfrm_iptfs.c:2742
#2  0xffffffff81eb5c61 in xfrm_init_state (x=x@entry=0xffff888110a1fc40) at net/xfrm/xfrm_state.c:3042
#3  0xffffffff81eb65dc in xfrm_state_migrate (x=x@entry=0xffff888110a1e040, m=m@entry=0xffffc90001b47400,
    encap=encap@entry=0x0 <fixed_percpu_data>) at net/xfrm/xfrm_state.c:1954
#4  0xffffffff81ea9206 in xfrm_migrate (sel=sel@entry=0xffff88811193ce50, dir=<optimized out>,
    type=type@entry=0 '\000', m=m@entry=0xffffc90001b47400, num_migrate=num_migrate@entry=1,
    k=k@entry=0x0 <fixed_percpu_data>, net=<optimized out>, encap=<optimized out>, if_id=<optimized out>,
    extack=<optimized out>) at net/xfrm/xfrm_policy.c:4652
#5  0xffffffff81ec26de in xfrm_do_migrate (skb=skb@entry=0xffff888109265000, nlh=<optimized out>,
    attrs=attrs@entry=0xffffc90001b47730, extack=<optimized out>) at net/xfrm/xfrm_user.c:3047
#6  0xffffffff81ec3e70 in xfrm_user_rcv_msg (skb=0xffff888109265000, 
nlh=<optimized out>,

I have a proposed fix against v10, that seems to work. see the attached 
patch. The patch is applied top of the series.

-antony

PS: this exact issue was also reported in:
https://www.spinics.net/lists/netdev/msg976146.html
>From fced06475e82f328aede0370d26336bc8a48c333 Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Thu, 29 Aug 2024 13:23:42 +0200
Subject: [PATCH] call iptfs state init only once during cloning.

Signed-off-by: Antony Antony <antony.antony@secunet.com>
---
 net/xfrm/xfrm_iptfs.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c
index 7f7b3078ca70..aa18ee4733f8 100644
--- a/net/xfrm/xfrm_iptfs.c
+++ b/net/xfrm/xfrm_iptfs.c
@@ -2722,9 +2722,13 @@ static int iptfs_create_state(struct xfrm_state *x)
 {
 	struct xfrm_iptfs_data *xtfs;
 
-	xtfs = kzalloc(sizeof(*xtfs), GFP_KERNEL);
-	if (!xtfs)
-		return -ENOMEM;
+	if (x->mode_data) {
+		xtfs = x->mode_data;
+	} else {
+		xtfs = kzalloc(sizeof(*xtfs), GFP_KERNEL);
+		if (!xtfs)
+			return -ENOMEM;
+	}
 
 	__iptfs_init_state(x, xtfs);
Christian Hopps Sept. 7, 2024, 3:04 a.m. UTC | #2
> On Aug 29, 2024, at 07:43, Antony Antony via Devel <devel@linux-ipsec.org> wrote:
> 
> On Fri, Aug 23, 2024 at 10:20:45PM -0400, Christian Hopps wrote:
>> From: Christian Hopps <chopps@labn.net>
>> 
>> Add a new xfrm mode implementing AggFrag/IP-TFS from RFC9347.

[...]

>> +static int iptfs_clone(struct xfrm_state *x, struct xfrm_state *orig)
>> +{
>> + struct xfrm_iptfs_data *xtfs;
>> +
>> + xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL);
>> + if (!xtfs)
>> + return -ENOMEM;
>> +
>> + __iptfs_init_state(x, xtfs);
> 
> I noticed __iptfs_init_state() is called twice during XFRM_MSG_MIGRATE.
> This, the first, call does the right thing. However, the second call resets 
> the iptfs values to zero.

Fixed in patchset v11.

Thanks,
Chris.

> 
> While testing I noticed clone is not workig as expected. It seems to reset 
> values iptfs. See the "ip x s"  out before and after clone.
> 
> Here are two "ip x s"  output one before clone and another after clone noice 
> iptfs values are 0, while before max-queue-size 10485760
> 
> root@east:/testing/pluto/ikev2-mobike-01$ip x s
> src 192.1.2.23 dst 192.1.3.33
> proto esp spi 0xcd561999 reqid 16393 mode iptfs
> replay-window 0 flag af-unspec esn
> auth-trunc hmac(sha256) 0xcba08c655b22df167c9bf16ac8005cffbe15e6baab553b8f48ec0056c037c51f 128
> enc cbc(aes) 0xb3702487e95675713e7dfb738cc21c5dd86a666af38cdabcc3705ed30fea92e2
> lastused 2024-08-29 12:33:12
> anti-replay esn context:
>  seq-hi 0x0, seq 0x0, oseq-hi 0x0, oseq 0xb
>  replay_window 0, bitmap-length 0
> dir out
> iptfs-opts dont-frag init-delay 0 max-queue-size 10485760 pkt-size 0
> src 192.1.3.33 dst 192.1.2.23
> proto esp spi 0xd9ecf873 reqid 16393 mode iptfs
> replay-window 0 flag af-unspec esn
> auth-trunc hmac(sha256) 0xf841c6643a06186e86a856600e071e2a220450943fdf7b64a8d2f3e3bffd6c62 128
> enc cbc(aes) 0x5ffa993bbc568ecab82e15433b14c03e5da18ca4d216137493d552260bef0be1
> lastused 2024-08-29 12:33:12
> anti-replay esn context:
>  seq-hi 0x0, seq 0xb, oseq-hi 0x0, oseq 0x0
>  replay_window 128, bitmap-length 4
>  00000000 00000000 00000000 000007ff
> dir in
> iptfs-opts drop-time 3 reorder-window 3
> 
> After migrate: note iptfs vallues are 0.
> 
> root@east:/testing/pluto/ikev2-mobike-01$ip x s
> src 192.1.8.22 dst 192.1.2.23
> proto esp spi 0xd9ecf873 reqid 16393 mode iptfs
> replay-window 0 flag af-unspec esn
> auth-trunc hmac(sha256) 0xf841c6643a06186e86a856600e071e2a220450943fdf7b64a8d2f3e3bffd6c62 128
> enc cbc(aes) 0x5ffa993bbc568ecab82e15433b14c03e5da18ca4d216137493d552260bef0be1
> lastused 2024-08-29 12:33:12
> anti-replay esn context:
>  seq-hi 0x0, seq 0xb, oseq-hi 0x0, oseq 0x0
>  replay_window 128, bitmap-length 4
>  00000000 00000000 00000000 000007ff
> dir in
> iptfs-opts drop-time 0 reorder-window 0
> src 192.1.2.23 dst 192.1.8.22
> proto esp spi 0xcd561999 reqid 16393 mode iptfs
> replay-window 0 flag af-unspec esn
> auth-trunc hmac(sha256) 0xcba08c655b22df167c9bf16ac8005cffbe15e6baab553b8f48ec0056c037c51f 128
> enc cbc(aes) 0xb3702487e95675713e7dfb738cc21c5dd86a666af38cdabcc3705ed30fea92e2
> lastused 2024-08-29 12:33:12
> anti-replay esn context:
>  seq-hi 0x0, seq 0x0, oseq-hi 0x0, oseq 0xb
>  replay_window 0, bitmap-length 0
> dir out
> iptfs-opts init-delay 0 max-queue-size 0 pkt-size 0
> 
> Now running under gdb during a migrate I see __iptfs_init_state() called 
> twice.
> 
> I got gdb back trace to show the two calls during XFRM_MSG_MIGRATE.
> 
> First call __iptfs_init_state() with bt. This is during clone/MIGRATE.
> 
> #0  __iptfs_init_state (x=x@entry=0xffff888110a1fc40, xtfs=xtfs@entry=0xffff88810e275000)
>    at net/xfrm/xfrm_iptfs.c:2674
> #1  0xffffffff81ece552 in iptfs_clone (x=0xffff888110a1fc40, orig=<optimized out>)
>    at net/xfrm/xfrm_iptfs.c:2722
> #2  0xffffffff81eb65ad in xfrm_state_clone (encap=0xffffffff00000010, orig=0xffff888110a1e040)
>    at net/xfrm/xfrm_state.c:1878
> #3  xfrm_state_migrate (x=x@entry=0xffff888110a1e040, m=m@entry=0xffffc90001b47400,
>    encap=encap@entry=0x0 <fixed_percpu_data>) at net/xfrm/xfrm_state.c:1948
> #4  0xffffffff81ea9206 in xfrm_migrate (sel=sel@entry=0xffff88811193ce50, dir=<optimized out>,
>    type=type@entry=0 '\000', m=m@entry=0xffffc90001b47400, num_migrate=num_migrate@entry=1,
>    k=k@entry=0x0 <fixed_percpu_data>, net=<optimized out>, encap=<optimized out>, if_id=<optimized out>,
>    extack=<optimized out>) at net/xfrm/xfrm_policy.c:4652
> #5  0xffffffff81ec26de in xfrm_do_migrate (skb=skb@entry=0xffff888109265000, nlh=<optimized out>,
>    attrs=attrs@entry=0xffffc90001b47730, extack=<optimized out>) at net/xfrm/xfrm_user.c:3047
> #6  0xffffffff81ec3e70 in xfrm_user_rcv_msg (skb=0xffff888109265000, nlh=<optimized out>,
>    extack=<optimized out>) at net/xfrm/xfrm_user.c:3389
> ---
> second call to __iptfs_init_state() bt.
> 
> #0  __iptfs_init_state (x=x@entry=0xffff888110a1fc40, xtfs=0xffff88810e272000) at net/xfrm/xfrm_iptfs.c:2674
> #1  0xffffffff81ece1a4 in iptfs_create_state (x=0xffff888110a1fc40) at net/xfrm/xfrm_iptfs.c:2742
> #2  0xffffffff81eb5c61 in xfrm_init_state (x=x@entry=0xffff888110a1fc40) at net/xfrm/xfrm_state.c:3042
> #3  0xffffffff81eb65dc in xfrm_state_migrate (x=x@entry=0xffff888110a1e040, m=m@entry=0xffffc90001b47400,
>    encap=encap@entry=0x0 <fixed_percpu_data>) at net/xfrm/xfrm_state.c:1954
> #4  0xffffffff81ea9206 in xfrm_migrate (sel=sel@entry=0xffff88811193ce50, dir=<optimized out>,
>    type=type@entry=0 '\000', m=m@entry=0xffffc90001b47400, num_migrate=num_migrate@entry=1,
>    k=k@entry=0x0 <fixed_percpu_data>, net=<optimized out>, encap=<optimized out>, if_id=<optimized out>,
>    extack=<optimized out>) at net/xfrm/xfrm_policy.c:4652
> #5  0xffffffff81ec26de in xfrm_do_migrate (skb=skb@entry=0xffff888109265000, nlh=<optimized out>,
>    attrs=attrs@entry=0xffffc90001b47730, extack=<optimized out>) at net/xfrm/xfrm_user.c:3047
> #6  0xffffffff81ec3e70 in xfrm_user_rcv_msg (skb=0xffff888109265000, 
> nlh=<optimized out>,
> 
> I have a proposed fix against v10, that seems to work. see the attached 
> patch. The patch is applied top of the series.
> 
> -antony
> 
> PS: this exact issue was also reported in:
> https://www.spinics.net/lists/netdev/msg976146.html
> <0001-call-iptfs-state-init-only-once-during-cloning.patch>-- 
> Devel mailing list
> Devel@linux-ipsec.org
> https://linux-ipsec.org/mailman/listinfo/devel
Antony Antony Sept. 9, 2024, 4:32 p.m. UTC | #3
On Fri, Sep 06, 2024 at 11:04:45PM -0400, Christian Hopps via Devel wrote:
> 
> 
> > On Aug 29, 2024, at 07:43, Antony Antony via Devel <devel@linux-ipsec.org> wrote:
> > 
> > On Fri, Aug 23, 2024 at 10:20:45PM -0400, Christian Hopps wrote:
> >> From: Christian Hopps <chopps@labn.net>
> >> 
> >> Add a new xfrm mode implementing AggFrag/IP-TFS from RFC9347.
> 
> [...]
> 
> >> +static int iptfs_clone(struct xfrm_state *x, struct xfrm_state *orig)
> >> +{
> >> + struct xfrm_iptfs_data *xtfs;
> >> +
> >> + xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL);
> >> + if (!xtfs)
> >> + return -ENOMEM;
> >> +
> >> + __iptfs_init_state(x, xtfs);
> > 
> > I noticed __iptfs_init_state() is called twice during XFRM_MSG_MIGRATE.
> > This, the first, call does the right thing. However, the second call resets 
> > the iptfs values to zero.
> 
> Fixed in patchset v11.

thanks Chris.

I notice an unconditional memory alloc in iptfs_init_state()
xtfs = kzalloc(sizeof(*xtfs), GFP_KERNEL);

That is not what I tested. It looks odd.  Did you miss a hunk during the git 
rebase, or did you change the code?
I didn't test v11 yet.

-antony


> 
> Thanks,
> Chris.
> 
> > 
> > While testing I noticed clone is not workig as expected. It seems to reset 
> > values iptfs. See the "ip x s"  out before and after clone.
> > 
> > Here are two "ip x s"  output one before clone and another after clone noice 
> > iptfs values are 0, while before max-queue-size 10485760
> > 
> > root@east:/testing/pluto/ikev2-mobike-01$ip x s
> > src 192.1.2.23 dst 192.1.3.33
> > proto esp spi 0xcd561999 reqid 16393 mode iptfs
> > replay-window 0 flag af-unspec esn
> > auth-trunc hmac(sha256) 0xcba08c655b22df167c9bf16ac8005cffbe15e6baab553b8f48ec0056c037c51f 128
> > enc cbc(aes) 0xb3702487e95675713e7dfb738cc21c5dd86a666af38cdabcc3705ed30fea92e2
> > lastused 2024-08-29 12:33:12
> > anti-replay esn context:
> >  seq-hi 0x0, seq 0x0, oseq-hi 0x0, oseq 0xb
> >  replay_window 0, bitmap-length 0
> > dir out
> > iptfs-opts dont-frag init-delay 0 max-queue-size 10485760 pkt-size 0
> > src 192.1.3.33 dst 192.1.2.23
> > proto esp spi 0xd9ecf873 reqid 16393 mode iptfs
> > replay-window 0 flag af-unspec esn
> > auth-trunc hmac(sha256) 0xf841c6643a06186e86a856600e071e2a220450943fdf7b64a8d2f3e3bffd6c62 128
> > enc cbc(aes) 0x5ffa993bbc568ecab82e15433b14c03e5da18ca4d216137493d552260bef0be1
> > lastused 2024-08-29 12:33:12
> > anti-replay esn context:
> >  seq-hi 0x0, seq 0xb, oseq-hi 0x0, oseq 0x0
> >  replay_window 128, bitmap-length 4
> >  00000000 00000000 00000000 000007ff
> > dir in
> > iptfs-opts drop-time 3 reorder-window 3
> > 
> > After migrate: note iptfs vallues are 0.
> > 
> > root@east:/testing/pluto/ikev2-mobike-01$ip x s
> > src 192.1.8.22 dst 192.1.2.23
> > proto esp spi 0xd9ecf873 reqid 16393 mode iptfs
> > replay-window 0 flag af-unspec esn
> > auth-trunc hmac(sha256) 0xf841c6643a06186e86a856600e071e2a220450943fdf7b64a8d2f3e3bffd6c62 128
> > enc cbc(aes) 0x5ffa993bbc568ecab82e15433b14c03e5da18ca4d216137493d552260bef0be1
> > lastused 2024-08-29 12:33:12
> > anti-replay esn context:
> >  seq-hi 0x0, seq 0xb, oseq-hi 0x0, oseq 0x0
> >  replay_window 128, bitmap-length 4
> >  00000000 00000000 00000000 000007ff
> > dir in
> > iptfs-opts drop-time 0 reorder-window 0
> > src 192.1.2.23 dst 192.1.8.22
> > proto esp spi 0xcd561999 reqid 16393 mode iptfs
> > replay-window 0 flag af-unspec esn
> > auth-trunc hmac(sha256) 0xcba08c655b22df167c9bf16ac8005cffbe15e6baab553b8f48ec0056c037c51f 128
> > enc cbc(aes) 0xb3702487e95675713e7dfb738cc21c5dd86a666af38cdabcc3705ed30fea92e2
> > lastused 2024-08-29 12:33:12
> > anti-replay esn context:
> >  seq-hi 0x0, seq 0x0, oseq-hi 0x0, oseq 0xb
> >  replay_window 0, bitmap-length 0
> > dir out
> > iptfs-opts init-delay 0 max-queue-size 0 pkt-size 0
> > 
> > Now running under gdb during a migrate I see __iptfs_init_state() called 
> > twice.
> > 
> > I got gdb back trace to show the two calls during XFRM_MSG_MIGRATE.
> > 
> > First call __iptfs_init_state() with bt. This is during clone/MIGRATE.
> > 
> > #0  __iptfs_init_state (x=x@entry=0xffff888110a1fc40, xtfs=xtfs@entry=0xffff88810e275000)
> >    at net/xfrm/xfrm_iptfs.c:2674
> > #1  0xffffffff81ece552 in iptfs_clone (x=0xffff888110a1fc40, orig=<optimized out>)
> >    at net/xfrm/xfrm_iptfs.c:2722
> > #2  0xffffffff81eb65ad in xfrm_state_clone (encap=0xffffffff00000010, orig=0xffff888110a1e040)
> >    at net/xfrm/xfrm_state.c:1878
> > #3  xfrm_state_migrate (x=x@entry=0xffff888110a1e040, m=m@entry=0xffffc90001b47400,
> >    encap=encap@entry=0x0 <fixed_percpu_data>) at net/xfrm/xfrm_state.c:1948
> > #4  0xffffffff81ea9206 in xfrm_migrate (sel=sel@entry=0xffff88811193ce50, dir=<optimized out>,
> >    type=type@entry=0 '\000', m=m@entry=0xffffc90001b47400, num_migrate=num_migrate@entry=1,
> >    k=k@entry=0x0 <fixed_percpu_data>, net=<optimized out>, encap=<optimized out>, if_id=<optimized out>,
> >    extack=<optimized out>) at net/xfrm/xfrm_policy.c:4652
> > #5  0xffffffff81ec26de in xfrm_do_migrate (skb=skb@entry=0xffff888109265000, nlh=<optimized out>,
> >    attrs=attrs@entry=0xffffc90001b47730, extack=<optimized out>) at net/xfrm/xfrm_user.c:3047
> > #6  0xffffffff81ec3e70 in xfrm_user_rcv_msg (skb=0xffff888109265000, nlh=<optimized out>,
> >    extack=<optimized out>) at net/xfrm/xfrm_user.c:3389
> > ---
> > second call to __iptfs_init_state() bt.
> > 
> > #0  __iptfs_init_state (x=x@entry=0xffff888110a1fc40, xtfs=0xffff88810e272000) at net/xfrm/xfrm_iptfs.c:2674
> > #1  0xffffffff81ece1a4 in iptfs_create_state (x=0xffff888110a1fc40) at net/xfrm/xfrm_iptfs.c:2742
> > #2  0xffffffff81eb5c61 in xfrm_init_state (x=x@entry=0xffff888110a1fc40) at net/xfrm/xfrm_state.c:3042
> > #3  0xffffffff81eb65dc in xfrm_state_migrate (x=x@entry=0xffff888110a1e040, m=m@entry=0xffffc90001b47400,
> >    encap=encap@entry=0x0 <fixed_percpu_data>) at net/xfrm/xfrm_state.c:1954
> > #4  0xffffffff81ea9206 in xfrm_migrate (sel=sel@entry=0xffff88811193ce50, dir=<optimized out>,
> >    type=type@entry=0 '\000', m=m@entry=0xffffc90001b47400, num_migrate=num_migrate@entry=1,
> >    k=k@entry=0x0 <fixed_percpu_data>, net=<optimized out>, encap=<optimized out>, if_id=<optimized out>,
> >    extack=<optimized out>) at net/xfrm/xfrm_policy.c:4652
> > #5  0xffffffff81ec26de in xfrm_do_migrate (skb=skb@entry=0xffff888109265000, nlh=<optimized out>,
> >    attrs=attrs@entry=0xffffc90001b47730, extack=<optimized out>) at net/xfrm/xfrm_user.c:3047
> > #6  0xffffffff81ec3e70 in xfrm_user_rcv_msg (skb=0xffff888109265000, 
> > nlh=<optimized out>,
> > 
> > I have a proposed fix against v10, that seems to work. see the attached 
> > patch. The patch is applied top of the series.
> > 
> > -antony
> > 
> > PS: this exact issue was also reported in:
> > https://www.spinics.net/lists/netdev/msg976146.html
> > <0001-call-iptfs-state-init-only-once-during-cloning.patch>-- 
> > Devel mailing list
> > Devel@linux-ipsec.org
> > https://linux-ipsec.org/mailman/listinfo/devel
> 
> 
> -- 
> Devel mailing list
> Devel@linux-ipsec.org
> https://linux-ipsec.org/mailman/listinfo/devel
Christian Hopps Sept. 9, 2024, 5:53 p.m. UTC | #4
> On Sep 9, 2024, at 12:32, Antony Antony <antony@phenome.org> wrote:
> 
> On Fri, Sep 06, 2024 at 11:04:45PM -0400, Christian Hopps via Devel wrote:
>> 
>> 
>>> On Aug 29, 2024, at 07:43, Antony Antony via Devel <devel@linux-ipsec.org> wrote:
>>> 
>>> On Fri, Aug 23, 2024 at 10:20:45PM -0400, Christian Hopps wrote:
>>>> From: Christian Hopps <chopps@labn.net>
>>>> 
>>>> Add a new xfrm mode implementing AggFrag/IP-TFS from RFC9347.
>> 
>> [...]
>> 
>>>> +static int iptfs_clone(struct xfrm_state *x, struct xfrm_state *orig)
>>>> +{
>>>> + struct xfrm_iptfs_data *xtfs;
>>>> +
>>>> + xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL);
>>>> + if (!xtfs)
>>>> + return -ENOMEM;
>>>> +
>>>> + __iptfs_init_state(x, xtfs);
>>> 
>>> I noticed __iptfs_init_state() is called twice during XFRM_MSG_MIGRATE.
>>> This, the first, call does the right thing. However, the second call resets 
>>> the iptfs values to zero.
>> 
>> Fixed in patchset v11.
> 
> thanks Chris.
> 
> I notice an unconditional memory alloc in iptfs_init_state()
> xtfs = kzalloc(sizeof(*xtfs), GFP_KERNEL);
> 
> That is not what I tested. It looks odd.  Did you miss a hunk during the git 
> rebase, or did you change the code?
> I didn't test v11 yet.

Hmm, this was removed when I removed the call to `__iptfs_init_state()` inside `iptfs_clone_state()`; however, You are correct that the check is still needed. I will have to publish a v12 to add this back. It would be great if it were easier to test migration, but that's another project.

Thanks,
Chris.

> 
> -antony
> 
> 
>> 
>> Thanks,
>> Chris.
>> 
>>> 
>>> While testing I noticed clone is not workig as expected. It seems to reset 
>>> values iptfs. See the "ip x s"  out before and after clone.
>>> 
>>> Here are two "ip x s"  output one before clone and another after clone noice 
>>> iptfs values are 0, while before max-queue-size 10485760
>>> 
>>> root@east:/testing/pluto/ikev2-mobike-01$ip x s
>>> src 192.1.2.23 dst 192.1.3.33
>>> proto esp spi 0xcd561999 reqid 16393 mode iptfs
>>> replay-window 0 flag af-unspec esn
>>> auth-trunc hmac(sha256) 0xcba08c655b22df167c9bf16ac8005cffbe15e6baab553b8f48ec0056c037c51f 128
>>> enc cbc(aes) 0xb3702487e95675713e7dfb738cc21c5dd86a666af38cdabcc3705ed30fea92e2
>>> lastused 2024-08-29 12:33:12
>>> anti-replay esn context:
>>> seq-hi 0x0, seq 0x0, oseq-hi 0x0, oseq 0xb
>>> replay_window 0, bitmap-length 0
>>> dir out
>>> iptfs-opts dont-frag init-delay 0 max-queue-size 10485760 pkt-size 0
>>> src 192.1.3.33 dst 192.1.2.23
>>> proto esp spi 0xd9ecf873 reqid 16393 mode iptfs
>>> replay-window 0 flag af-unspec esn
>>> auth-trunc hmac(sha256) 0xf841c6643a06186e86a856600e071e2a220450943fdf7b64a8d2f3e3bffd6c62 128
>>> enc cbc(aes) 0x5ffa993bbc568ecab82e15433b14c03e5da18ca4d216137493d552260bef0be1
>>> lastused 2024-08-29 12:33:12
>>> anti-replay esn context:
>>> seq-hi 0x0, seq 0xb, oseq-hi 0x0, oseq 0x0
>>> replay_window 128, bitmap-length 4
>>> 00000000 00000000 00000000 000007ff
>>> dir in
>>> iptfs-opts drop-time 3 reorder-window 3
>>> 
>>> After migrate: note iptfs vallues are 0.
>>> 
>>> root@east:/testing/pluto/ikev2-mobike-01$ip x s
>>> src 192.1.8.22 dst 192.1.2.23
>>> proto esp spi 0xd9ecf873 reqid 16393 mode iptfs
>>> replay-window 0 flag af-unspec esn
>>> auth-trunc hmac(sha256) 0xf841c6643a06186e86a856600e071e2a220450943fdf7b64a8d2f3e3bffd6c62 128
>>> enc cbc(aes) 0x5ffa993bbc568ecab82e15433b14c03e5da18ca4d216137493d552260bef0be1
>>> lastused 2024-08-29 12:33:12
>>> anti-replay esn context:
>>> seq-hi 0x0, seq 0xb, oseq-hi 0x0, oseq 0x0
>>> replay_window 128, bitmap-length 4
>>> 00000000 00000000 00000000 000007ff
>>> dir in
>>> iptfs-opts drop-time 0 reorder-window 0
>>> src 192.1.2.23 dst 192.1.8.22
>>> proto esp spi 0xcd561999 reqid 16393 mode iptfs
>>> replay-window 0 flag af-unspec esn
>>> auth-trunc hmac(sha256) 0xcba08c655b22df167c9bf16ac8005cffbe15e6baab553b8f48ec0056c037c51f 128
>>> enc cbc(aes) 0xb3702487e95675713e7dfb738cc21c5dd86a666af38cdabcc3705ed30fea92e2
>>> lastused 2024-08-29 12:33:12
>>> anti-replay esn context:
>>> seq-hi 0x0, seq 0x0, oseq-hi 0x0, oseq 0xb
>>> replay_window 0, bitmap-length 0
>>> dir out
>>> iptfs-opts init-delay 0 max-queue-size 0 pkt-size 0
>>> 
>>> Now running under gdb during a migrate I see __iptfs_init_state() called 
>>> twice.
>>> 
>>> I got gdb back trace to show the two calls during XFRM_MSG_MIGRATE.
>>> 
>>> First call __iptfs_init_state() with bt. This is during clone/MIGRATE.
>>> 
>>> #0  __iptfs_init_state (x=x@entry=0xffff888110a1fc40, xtfs=xtfs@entry=0xffff88810e275000)
>>>   at net/xfrm/xfrm_iptfs.c:2674
>>> #1  0xffffffff81ece552 in iptfs_clone (x=0xffff888110a1fc40, orig=<optimized out>)
>>>   at net/xfrm/xfrm_iptfs.c:2722
>>> #2  0xffffffff81eb65ad in xfrm_state_clone (encap=0xffffffff00000010, orig=0xffff888110a1e040)
>>>   at net/xfrm/xfrm_state.c:1878
>>> #3  xfrm_state_migrate (x=x@entry=0xffff888110a1e040, m=m@entry=0xffffc90001b47400,
>>>   encap=encap@entry=0x0 <fixed_percpu_data>) at net/xfrm/xfrm_state.c:1948
>>> #4  0xffffffff81ea9206 in xfrm_migrate (sel=sel@entry=0xffff88811193ce50, dir=<optimized out>,
>>>   type=type@entry=0 '\000', m=m@entry=0xffffc90001b47400, num_migrate=num_migrate@entry=1,
>>>   k=k@entry=0x0 <fixed_percpu_data>, net=<optimized out>, encap=<optimized out>, if_id=<optimized out>,
>>>   extack=<optimized out>) at net/xfrm/xfrm_policy.c:4652
>>> #5  0xffffffff81ec26de in xfrm_do_migrate (skb=skb@entry=0xffff888109265000, nlh=<optimized out>,
>>>   attrs=attrs@entry=0xffffc90001b47730, extack=<optimized out>) at net/xfrm/xfrm_user.c:3047
>>> #6  0xffffffff81ec3e70 in xfrm_user_rcv_msg (skb=0xffff888109265000, nlh=<optimized out>,
>>>   extack=<optimized out>) at net/xfrm/xfrm_user.c:3389
>>> ---
>>> second call to __iptfs_init_state() bt.
>>> 
>>> #0  __iptfs_init_state (x=x@entry=0xffff888110a1fc40, xtfs=0xffff88810e272000) at net/xfrm/xfrm_iptfs.c:2674
>>> #1  0xffffffff81ece1a4 in iptfs_create_state (x=0xffff888110a1fc40) at net/xfrm/xfrm_iptfs.c:2742
>>> #2  0xffffffff81eb5c61 in xfrm_init_state (x=x@entry=0xffff888110a1fc40) at net/xfrm/xfrm_state.c:3042
>>> #3  0xffffffff81eb65dc in xfrm_state_migrate (x=x@entry=0xffff888110a1e040, m=m@entry=0xffffc90001b47400,
>>>   encap=encap@entry=0x0 <fixed_percpu_data>) at net/xfrm/xfrm_state.c:1954
>>> #4  0xffffffff81ea9206 in xfrm_migrate (sel=sel@entry=0xffff88811193ce50, dir=<optimized out>,
>>>   type=type@entry=0 '\000', m=m@entry=0xffffc90001b47400, num_migrate=num_migrate@entry=1,
>>>   k=k@entry=0x0 <fixed_percpu_data>, net=<optimized out>, encap=<optimized out>, if_id=<optimized out>,
>>>   extack=<optimized out>) at net/xfrm/xfrm_policy.c:4652
>>> #5  0xffffffff81ec26de in xfrm_do_migrate (skb=skb@entry=0xffff888109265000, nlh=<optimized out>,
>>>   attrs=attrs@entry=0xffffc90001b47730, extack=<optimized out>) at net/xfrm/xfrm_user.c:3047
>>> #6  0xffffffff81ec3e70 in xfrm_user_rcv_msg (skb=0xffff888109265000, 
>>> nlh=<optimized out>,
>>> 
>>> I have a proposed fix against v10, that seems to work. see the attached 
>>> patch. The patch is applied top of the series.
>>> 
>>> -antony
>>> 
>>> PS: this exact issue was also reported in:
>>> https://www.spinics.net/lists/netdev/msg976146.html
>>> <0001-call-iptfs-state-init-only-once-during-cloning.patch>-- 
>>> Devel mailing list
>>> Devel@linux-ipsec.org
>>> https://linux-ipsec.org/mailman/listinfo/devel
>> 
>> 
>> -- 
>> Devel mailing list
>> Devel@linux-ipsec.org
>> https://linux-ipsec.org/mailman/listinfo/devel
diff mbox series

Patch

diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 512e0b2f8514..5a1787587cb3 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -21,5 +21,6 @@  obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
 obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
 obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
+obj-$(CONFIG_XFRM_IPTFS) += xfrm_iptfs.o
 obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
 obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c
new file mode 100644
index 000000000000..201406175d17
--- /dev/null
+++ b/net/xfrm/xfrm_iptfs.c
@@ -0,0 +1,210 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* xfrm_iptfs: IPTFS encapsulation support
+ *
+ * April 21 2022, Christian Hopps <chopps@labn.net>
+ *
+ * Copyright (c) 2022, LabN Consulting, L.L.C.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/icmpv6.h>
+#include <net/gro.h>
+#include <net/icmp.h>
+#include <net/ip6_route.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+
+#include <crypto/aead.h>
+
+#include "xfrm_inout.h"
+
+/**
+ * struct xfrm_iptfs_config - configuration for the IPTFS tunnel.
+ * @pkt_size: size of the outer IP packet. 0 to use interface and MTU discovery,
+ *	otherwise the user specified value.
+ */
+struct xfrm_iptfs_config {
+	u32 pkt_size;	    /* outer_packet_size or 0 */
+};
+
+/**
+ * struct xfrm_iptfs_data - mode specific xfrm state.
+ * @cfg: IPTFS tunnel config.
+ * @x: owning SA (xfrm_state).
+ * @payload_mtu: max payload size.
+ */
+struct xfrm_iptfs_data {
+	struct xfrm_iptfs_config cfg;
+
+	/* Ingress User Input */
+	struct xfrm_state *x;	    /* owning state */
+	u32 payload_mtu;	    /* max payload size */
+};
+
+/* ========================== */
+/* State Management Functions */
+/* ========================== */
+
+/**
+ * iptfs_get_inner_mtu() - return inner MTU with no fragmentation.
+ * @x: xfrm state.
+ * @outer_mtu: the outer mtu
+ */
+static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu)
+{
+	struct crypto_aead *aead;
+	u32 blksize;
+
+	aead = x->data;
+	blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+	return ((outer_mtu - x->props.header_len - crypto_aead_authsize(aead)) &
+		~(blksize - 1)) - 2;
+}
+
+/**
+ * iptfs_user_init() - initialize the SA with IPTFS options from netlink.
+ * @net: the net data
+ * @x: xfrm state
+ * @attrs: netlink attributes
+ * @extack: extack return data
+ *
+ * Return: 0 on success or a negative error code on failure
+ */
+static int iptfs_user_init(struct net *net, struct xfrm_state *x,
+			   struct nlattr **attrs,
+			   struct netlink_ext_ack *extack)
+{
+	struct xfrm_iptfs_data *xtfs = x->mode_data;
+	struct xfrm_iptfs_config *xc;
+
+	xc = &xtfs->cfg;
+
+	if (attrs[XFRMA_IPTFS_PKT_SIZE]) {
+		xc->pkt_size = nla_get_u32(attrs[XFRMA_IPTFS_PKT_SIZE]);
+		if (!xc->pkt_size) {
+			xtfs->payload_mtu = 0;
+		} else if (xc->pkt_size > x->props.header_len) {
+			xtfs->payload_mtu = xc->pkt_size - x->props.header_len;
+		} else {
+			NL_SET_ERR_MSG(extack,
+				       "Packet size must be 0 or greater than IPTFS/ESP header length");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static unsigned int iptfs_sa_len(const struct xfrm_state *x)
+{
+	struct xfrm_iptfs_data *xtfs = x->mode_data;
+	struct xfrm_iptfs_config *xc = &xtfs->cfg;
+	unsigned int l = 0;
+
+	if (x->dir == XFRM_SA_DIR_OUT)
+		l += nla_total_size(sizeof(xc->pkt_size));
+
+	return l;
+}
+
+static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct xfrm_iptfs_data *xtfs = x->mode_data;
+	struct xfrm_iptfs_config *xc = &xtfs->cfg;
+	int ret = 0;
+
+	if (x->dir == XFRM_SA_DIR_OUT)
+		ret = nla_put_u32(skb, XFRMA_IPTFS_PKT_SIZE, xc->pkt_size);
+
+	return ret;
+}
+
+static void __iptfs_init_state(struct xfrm_state *x,
+			       struct xfrm_iptfs_data *xtfs)
+{
+	/* Modify type (esp) adjustment values */
+
+	if (x->props.family == AF_INET)
+		x->props.header_len += sizeof(struct iphdr) + sizeof(struct ip_iptfs_hdr);
+	else if (x->props.family == AF_INET6)
+		x->props.header_len += sizeof(struct ipv6hdr) + sizeof(struct ip_iptfs_hdr);
+	x->props.enc_hdr_len = sizeof(struct ip_iptfs_hdr);
+
+	/* Always keep a module reference when x->mode_data is set */
+	__module_get(x->mode_cbs->owner);
+
+	x->mode_data = xtfs;
+	xtfs->x = x;
+}
+
+static int iptfs_clone(struct xfrm_state *x, struct xfrm_state *orig)
+{
+	struct xfrm_iptfs_data *xtfs;
+
+	xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL);
+	if (!xtfs)
+		return -ENOMEM;
+
+	__iptfs_init_state(x, xtfs);
+
+	return 0;
+}
+
+static int iptfs_create_state(struct xfrm_state *x)
+{
+	struct xfrm_iptfs_data *xtfs;
+
+	xtfs = kzalloc(sizeof(*xtfs), GFP_KERNEL);
+	if (!xtfs)
+		return -ENOMEM;
+
+	__iptfs_init_state(x, xtfs);
+
+	return 0;
+}
+
+static void iptfs_delete_state(struct xfrm_state *x)
+{
+	struct xfrm_iptfs_data *xtfs = x->mode_data;
+
+	if (!xtfs)
+		return;
+
+	kfree_sensitive(xtfs);
+
+	module_put(x->mode_cbs->owner);
+}
+
+static const struct xfrm_mode_cbs iptfs_mode_cbs = {
+	.owner = THIS_MODULE,
+	.create_state = iptfs_create_state,
+	.delete_state = iptfs_delete_state,
+	.user_init = iptfs_user_init,
+	.copy_to_user = iptfs_copy_to_user,
+	.sa_len = iptfs_sa_len,
+	.clone = iptfs_clone,
+	.get_inner_mtu = iptfs_get_inner_mtu,
+};
+
+static int __init xfrm_iptfs_init(void)
+{
+	int err;
+
+	pr_info("xfrm_iptfs: IPsec IP-TFS tunnel mode module\n");
+
+	err = xfrm_register_mode_cbs(XFRM_MODE_IPTFS, &iptfs_mode_cbs);
+	if (err < 0)
+		pr_info("%s: can't register IP-TFS\n", __func__);
+
+	return err;
+}
+
+static void __exit xfrm_iptfs_fini(void)
+{
+	xfrm_unregister_mode_cbs(XFRM_MODE_IPTFS);
+}
+
+module_init(xfrm_iptfs_init);
+module_exit(xfrm_iptfs_fini);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP-TFS support for xfrm ipsec tunnels");