Message ID | 20240714202246.1573817-10-chopps@chopps.org (mailing list archive) |
---|---|
State | Awaiting Upstream |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Add IP-TFS mode to xfrm | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Guessing tree name failed - patch did not apply |
On Sun, Jul 14, 2024 at 04:22:37PM -0400, Christian Hopps wrote: > From: Christian Hopps <chopps@labn.net> > > Add tunnel packet output functionality. This is code handles > the ingress to the tunnel. > > Signed-off-by: Christian Hopps <chopps@labn.net> > --- > net/xfrm/xfrm_iptfs.c | 535 +++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 532 insertions(+), 3 deletions(-) > > diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c ... > +static int iptfs_get_cur_pmtu(struct xfrm_state *x, > + struct xfrm_iptfs_data *xtfs, struct sk_buff *skb) > +{ > + struct xfrm_dst *xdst = (struct xfrm_dst *)skb_dst(skb); > + u32 payload_mtu = xtfs->payload_mtu; > + u32 pmtu = iptfs_get_inner_mtu(x, xdst->child_mtu_cached); Hi Christian, Please consider arranging local variable declarations in Networking code in reverse xmas tree order - longest line to shortest. I think that in this case that would involve separating the declaration and assignment of pmtu. Edward Cree's tool can be helpful here: https://github.com/ecree-solarflare/xmastree > + > + if (payload_mtu && payload_mtu < pmtu) > + pmtu = payload_mtu; > + > + return pmtu; > +} ... > +/* IPv4/IPv6 packet ingress to IPTFS tunnel, arrange to send in IPTFS payload > + * (i.e., aggregating or fragmenting as appropriate). > + * This is set in dst->output for an SA. > + */ > +static int iptfs_output_collect(struct net *net, struct sock *sk, > + struct sk_buff *skb) > +{ > + struct dst_entry *dst = skb_dst(skb); > + struct xfrm_state *x = dst->xfrm; > + struct xfrm_iptfs_data *xtfs = x->mode_data; > + struct sk_buff *segs, *nskb; > + u32 pmtu = 0; > + bool ok = true; > + bool was_gso; > + > + /* We have hooked into dst_entry->output which means we have skipped the > + * protocol specific netfilter (see xfrm4_output, xfrm6_output). > + * when our timer runs we will end up calling xfrm_output directly on > + * the encapsulated traffic. > + * > + * For both cases this is the NF_INET_POST_ROUTING hook which allows > + * changing the skb->dst entry which then may not be xfrm based anymore > + * in which case a REROUTED flag is set. and dst_output is called. > + * > + * For IPv6 we are also skipping fragmentation handling for local > + * sockets, which may or may not be good depending on our tunnel DF > + * setting. Normally with fragmentation supported we want to skip this > + * fragmentation. > + */ > + > + BUG_ON(!xtfs); > + > + pmtu = iptfs_get_cur_pmtu(x, xtfs, skb); > + > + /* Break apart GSO skbs. If the queue is nearing full then we want the > + * accounting and queuing to be based on the individual packets not on the > + * aggregate GSO buffer. > + */ > + was_gso = skb_is_gso(skb); > + if (!was_gso) { > + segs = skb; > + } else { > + segs = skb_gso_segment(skb, 0); > + if (IS_ERR_OR_NULL(segs)) { > + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); > + kfree_skb(skb); > + return PTR_ERR(segs); This will return 0 is skb_gso_segment returns NULL, which occurs if skb doesn't require segmentation. Is that intentional? If so, I wonder if it would be slightly nicer to use PTR_ERR_OR_ZERO() instead of PTR_ERR(). Flagged by Smatch (suggestion is my own). (suggestion is my own) > + } > + consume_skb(skb); > + skb = NULL; > + } > + > + /* We can be running on multiple cores and from the network softirq or > + * from user context depending on where the packet is coming from. > + */ > + spin_lock_bh(&x->lock); > + > + skb_list_walk_safe(segs, skb, nskb) > + { > + skb_mark_not_on_list(skb); > + > + /* Once we drop due to no queue space we continue to drop the > + * rest of the packets from that GRO. > + */ > + if (!ok) { > +nospace: > + if (skb->dev) > + XFRM_INC_STATS(dev_net(skb->dev), > + LINUX_MIB_XFRMOUTNOQSPACE); > + kfree_skb_reason(skb, SKB_DROP_REASON_FULL_RING); > + continue; > + } > + > + /* Fragmenting handled in following commits. */ > + if (iptfs_is_too_big(sk, skb, pmtu)) { > + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); > + continue; > + } > + > + /* Enqueue to send in tunnel */ > + ok = iptfs_enqueue(xtfs, skb); > + if (!ok) > + goto nospace; > + } > + > + /* Start a delay timer if we don't have one yet */ > + if (!hrtimer_is_queued(&xtfs->iptfs_timer)) { > + hrtimer_start(&xtfs->iptfs_timer, xtfs->init_delay_ns, > + IPTFS_HRTIMER_MODE); > + xtfs->iptfs_settime = ktime_get_raw_fast_ns(); > + } > + > + spin_unlock_bh(&x->lock); > + return 0; > +} ... > +static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me) > +{ > + struct sk_buff_head list; > + struct xfrm_iptfs_data *xtfs; > + struct xfrm_state *x; > + time64_t settime; > + > + xtfs = container_of(me, typeof(*xtfs), iptfs_timer); > + x = xtfs->x; > + > + /* Process all the queued packets > + * > + * softirq execution order: timer > tasklet > hrtimer > + * > + * Network rx will have run before us giving one last chance to queue > + * ingress packets for us to process and transmit. > + */ > + > + spin_lock(&x->lock); > + __skb_queue_head_init(&list); > + skb_queue_splice_init(&xtfs->queue, &list); > + xtfs->queue_size = 0; > + settime = xtfs->iptfs_settime; nit: settime is set but otherwise unused in this function. Flagged by W=1 x86_64 allmodconfig builds with gcc-14 and clang-18. > + spin_unlock(&x->lock); > + > + /* After the above unlock, packets can begin queuing again, and the > + * timer can be set again, from another CPU either in softirq or user > + * context (not from this one since we are running at softirq level > + * already). > + */ > + > + iptfs_output_queued(x, &list); > + > + return HRTIMER_NORESTART; > +} ... > @@ -98,10 +607,23 @@ static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) > { > struct xfrm_iptfs_data *xtfs = x->mode_data; > struct xfrm_iptfs_config *xc = &xtfs->cfg; > - int ret = 0; > + int ret; > + u64 q; > + > + if (x->dir == XFRM_SA_DIR_OUT) { > + q = xtfs->init_delay_ns; > + (void)do_div(q, NSECS_IN_USEC); > + ret = nla_put_u32(skb, XFRMA_IPTFS_INIT_DELAY, q); > + if (ret) > + return ret; > + > + ret = nla_put_u32(skb, XFRMA_IPTFS_MAX_QSIZE, > + xc->max_queue_size); > + if (ret) > + return ret; > > - if (x->dir == XFRM_SA_DIR_OUT) > ret = nla_put_u32(skb, XFRMA_IPTFS_PKT_SIZE, xc->pkt_size); > + } ret will be used uninitialised here unless the if condition above is true. Flagged by W=1 x86_64 allmodconfig build with clang-18, and Smatch. > > return ret; > } ...
Hi Christian, kernel test robot noticed the following build errors: [auto build test ERROR on klassert-ipsec-next/master] [also build test ERROR on next-20240716] [cannot apply to klassert-ipsec/master netfilter-nf/main linus/master nf-next/master v6.10] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Christian-Hopps/xfrm-config-add-CONFIG_XFRM_IPTFS/20240715-042948 base: https://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git master patch link: https://lore.kernel.org/r/20240714202246.1573817-10-chopps%40chopps.org patch subject: [PATCH ipsec-next v5 09/17] xfrm: iptfs: add user packet (tunnel ingress) handling config: i386-randconfig-061-20240716 (https://download.01.org/0day-ci/archive/20240716/202407162110.8Xuwy6GR-lkp@intel.com/config) compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240716/202407162110.8Xuwy6GR-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202407162110.8Xuwy6GR-lkp@intel.com/ All errors (new ones prefixed by >>): ld: net/xfrm/xfrm_iptfs.o: in function `iptfs_user_init': >> net/xfrm/xfrm_iptfs.c:586:(.text+0x2a0): undefined reference to `__udivdi3' vim +586 net/xfrm/xfrm_iptfs.c 547 548 /** 549 * iptfs_user_init() - initialize the SA with IPTFS options from netlink. 550 * @net: the net data 551 * @x: xfrm state 552 * @attrs: netlink attributes 553 * @extack: extack return data 554 */ 555 static int iptfs_user_init(struct net *net, struct xfrm_state *x, 556 struct nlattr **attrs, 557 struct netlink_ext_ack *extack) 558 { 559 struct xfrm_iptfs_data *xtfs = x->mode_data; 560 struct xfrm_iptfs_config *xc; 561 562 xc = &xtfs->cfg; 563 xc->max_queue_size = net->xfrm.sysctl_iptfs_max_qsize; 564 xtfs->init_delay_ns = 565 (u64)net->xfrm.sysctl_iptfs_init_delay * NSECS_IN_USEC; 566 567 if (attrs[XFRMA_IPTFS_PKT_SIZE]) { 568 xc->pkt_size = nla_get_u32(attrs[XFRMA_IPTFS_PKT_SIZE]); 569 if (!xc->pkt_size) { 570 xtfs->payload_mtu = 0; 571 } else if (xc->pkt_size > x->props.header_len) { 572 xtfs->payload_mtu = xc->pkt_size - x->props.header_len; 573 } else { 574 NL_SET_ERR_MSG(extack, 575 "Packet size must be 0 or greater than IPTFS/ESP header length"); 576 return -EINVAL; 577 } 578 } 579 if (attrs[XFRMA_IPTFS_MAX_QSIZE]) 580 xc->max_queue_size = nla_get_u32(attrs[XFRMA_IPTFS_MAX_QSIZE]); 581 if (attrs[XFRMA_IPTFS_INIT_DELAY]) 582 xtfs->init_delay_ns = 583 (u64)nla_get_u32(attrs[XFRMA_IPTFS_INIT_DELAY]) * 584 NSECS_IN_USEC; 585 > 586 xtfs->ecn_queue_size = (u64)xc->max_queue_size * 95 / 100; 587 588 return 0; 589 } 590
Simon Horman via Devel <devel@linux-ipsec.org> writes: > On Sun, Jul 14, 2024 at 04:22:37PM -0400, Christian Hopps wrote: >> From: Christian Hopps <chopps@labn.net> >> >> Add tunnel packet output functionality. This is code handles >> the ingress to the tunnel. >> >> Signed-off-by: Christian Hopps <chopps@labn.net> >> --- >> net/xfrm/xfrm_iptfs.c | 535 +++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 532 insertions(+), 3 deletions(-) >> >> diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c > > ... > >> +static int iptfs_get_cur_pmtu(struct xfrm_state *x, >> + struct xfrm_iptfs_data *xtfs, struct sk_buff *skb) >> +{ >> + struct xfrm_dst *xdst = (struct xfrm_dst *)skb_dst(skb); >> + u32 payload_mtu = xtfs->payload_mtu; >> + u32 pmtu = iptfs_get_inner_mtu(x, xdst->child_mtu_cached); > > Hi Christian, > > Please consider arranging local variable declarations in Networking > code in reverse xmas tree order - longest line to shortest. > I think that in this case that would involve separating the > declaration and assignment of pmtu. > > Edward Cree's tool can be helpful here: > https://github.com/ecree-solarflare/xmastree This does not appear to be a style that is required by the net/xfrm code. I verified this by running the above tool on the other files in net/xfrm/*.c. In this case I'd prefer to not increase the number of lines in the function in order to satisfy the optional style guideline. >> + >> + if (payload_mtu && payload_mtu < pmtu) >> + pmtu = payload_mtu; >> + >> + return pmtu; >> +} > > ... > >> +/* IPv4/IPv6 packet ingress to IPTFS tunnel, arrange to send in IPTFS payload >> + * (i.e., aggregating or fragmenting as appropriate). >> + * This is set in dst->output for an SA. >> + */ >> +static int iptfs_output_collect(struct net *net, struct sock *sk, >> + struct sk_buff *skb) >> +{ >> + struct dst_entry *dst = skb_dst(skb); >> + struct xfrm_state *x = dst->xfrm; >> + struct xfrm_iptfs_data *xtfs = x->mode_data; >> + struct sk_buff *segs, *nskb; >> + u32 pmtu = 0; >> + bool ok = true; >> + bool was_gso; >> + >> + /* We have hooked into dst_entry->output which means we have skipped the >> + * protocol specific netfilter (see xfrm4_output, xfrm6_output). >> + * when our timer runs we will end up calling xfrm_output directly on >> + * the encapsulated traffic. >> + * >> + * For both cases this is the NF_INET_POST_ROUTING hook which allows >> + * changing the skb->dst entry which then may not be xfrm based anymore >> + * in which case a REROUTED flag is set. and dst_output is called. >> + * >> + * For IPv6 we are also skipping fragmentation handling for local >> + * sockets, which may or may not be good depending on our tunnel DF >> + * setting. Normally with fragmentation supported we want to skip this >> + * fragmentation. >> + */ >> + >> + BUG_ON(!xtfs); >> + >> + pmtu = iptfs_get_cur_pmtu(x, xtfs, skb); >> + >> + /* Break apart GSO skbs. If the queue is nearing full then we want the >> + * accounting and queuing to be based on the individual packets not on the >> + * aggregate GSO buffer. >> + */ >> + was_gso = skb_is_gso(skb); >> + if (!was_gso) { >> + segs = skb; >> + } else { >> + segs = skb_gso_segment(skb, 0); >> + if (IS_ERR_OR_NULL(segs)) { >> + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); >> + kfree_skb(skb); >> + return PTR_ERR(segs); > > This will return 0 is skb_gso_segment returns NULL, > which occurs if skb doesn't require segmentation. > Is that intentional? > > If so, I wonder if it would be slightly nicer > to use PTR_ERR_OR_ZERO() instead of PTR_ERR(). > > Flagged by Smatch (suggestion is my own). > (suggestion is my own) Thanks! Actually in the case of NULL I've changed the code to return -EINVAL as xfrm_output_gso() does. >> + } >> + consume_skb(skb); >> + skb = NULL; >> + } >> + >> + /* We can be running on multiple cores and from the network softirq or >> + * from user context depending on where the packet is coming from. >> + */ >> + spin_lock_bh(&x->lock); >> + >> + skb_list_walk_safe(segs, skb, nskb) >> + { >> + skb_mark_not_on_list(skb); >> + >> + /* Once we drop due to no queue space we continue to drop the >> + * rest of the packets from that GRO. >> + */ >> + if (!ok) { >> +nospace: >> + if (skb->dev) >> + XFRM_INC_STATS(dev_net(skb->dev), >> + LINUX_MIB_XFRMOUTNOQSPACE); >> + kfree_skb_reason(skb, SKB_DROP_REASON_FULL_RING); >> + continue; >> + } >> + >> + /* Fragmenting handled in following commits. */ >> + if (iptfs_is_too_big(sk, skb, pmtu)) { >> + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); >> + continue; >> + } >> + >> + /* Enqueue to send in tunnel */ >> + ok = iptfs_enqueue(xtfs, skb); >> + if (!ok) >> + goto nospace; >> + } >> + >> + /* Start a delay timer if we don't have one yet */ >> + if (!hrtimer_is_queued(&xtfs->iptfs_timer)) { >> + hrtimer_start(&xtfs->iptfs_timer, xtfs->init_delay_ns, >> + IPTFS_HRTIMER_MODE); >> + xtfs->iptfs_settime = ktime_get_raw_fast_ns(); >> + } >> + >> + spin_unlock_bh(&x->lock); >> + return 0; >> +} > > ... > >> +static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me) >> +{ >> + struct sk_buff_head list; >> + struct xfrm_iptfs_data *xtfs; >> + struct xfrm_state *x; >> + time64_t settime; >> + >> + xtfs = container_of(me, typeof(*xtfs), iptfs_timer); >> + x = xtfs->x; >> + >> + /* Process all the queued packets >> + * >> + * softirq execution order: timer > tasklet > hrtimer >> + * >> + * Network rx will have run before us giving one last chance to queue >> + * ingress packets for us to process and transmit. >> + */ >> + >> + spin_lock(&x->lock); >> + __skb_queue_head_init(&list); >> + skb_queue_splice_init(&xtfs->queue, &list); >> + xtfs->queue_size = 0; >> + settime = xtfs->iptfs_settime; > > nit: settime is set but otherwise unused in this function. > > Flagged by W=1 x86_64 allmodconfig builds with gcc-14 and clang-18. > Hmm, this value is in fact used inside a trace point function call in this function. >> + spin_unlock(&x->lock); >> + >> + /* After the above unlock, packets can begin queuing again, and the >> + * timer can be set again, from another CPU either in softirq or user >> + * context (not from this one since we are running at softirq level >> + * already). >> + */ >> + >> + iptfs_output_queued(x, &list); >> + >> + return HRTIMER_NORESTART; >> +} > > ... > >> @@ -98,10 +607,23 @@ static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) >> { >> struct xfrm_iptfs_data *xtfs = x->mode_data; >> struct xfrm_iptfs_config *xc = &xtfs->cfg; >> - int ret = 0; >> + int ret; >> + u64 q; >> + >> + if (x->dir == XFRM_SA_DIR_OUT) { >> + q = xtfs->init_delay_ns; >> + (void)do_div(q, NSECS_IN_USEC); >> + ret = nla_put_u32(skb, XFRMA_IPTFS_INIT_DELAY, q); >> + if (ret) >> + return ret; >> + >> + ret = nla_put_u32(skb, XFRMA_IPTFS_MAX_QSIZE, >> + xc->max_queue_size); >> + if (ret) >> + return ret; >> >> - if (x->dir == XFRM_SA_DIR_OUT) >> ret = nla_put_u32(skb, XFRMA_IPTFS_PKT_SIZE, xc->pkt_size); >> + } > > ret will be used uninitialised here unless the if condition above is true. > > Flagged by W=1 x86_64 allmodconfig build with clang-18, and Smatch. Sigh, this is an artifact of splitting up the new file into multiple functionality commits. The final resulting code does not leave the value uninitialized. In any case I will fix it in this middle commit too. Thanks! Chris. > >> >> return ret; >> } > ...
Christian Hopps <chopps@chopps.org> writes: > [[PGP Signed Part:Good signature from 2E1D830ED7B83025 Christian Hopps <chopps@gmail.com> (trust ultimate) created at 2024-07-17T22:55:46-0700 using RSA]] > > Simon Horman via Devel <devel@linux-ipsec.org> writes: > >> On Sun, Jul 14, 2024 at 04:22:37PM -0400, Christian Hopps wrote: >>> From: Christian Hopps <chopps@labn.net> >>> >>> Add tunnel packet output functionality. This is code handles >>> the ingress to the tunnel. >>> >>> Signed-off-by: Christian Hopps <chopps@labn.net> >>> --- >>> net/xfrm/xfrm_iptfs.c | 535 +++++++++++++++++++++++++++++++++++++++++- >>> 1 file changed, 532 insertions(+), 3 deletions(-) >>> >>> diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c >> >> ... >> >>> +static int iptfs_get_cur_pmtu(struct xfrm_state *x, >>> + struct xfrm_iptfs_data *xtfs, struct sk_buff *skb) >>> +{ >>> + struct xfrm_dst *xdst = (struct xfrm_dst *)skb_dst(skb); >>> + u32 payload_mtu = xtfs->payload_mtu; >>> + u32 pmtu = iptfs_get_inner_mtu(x, xdst->child_mtu_cached); >> >> Hi Christian, >> >> Please consider arranging local variable declarations in Networking >> code in reverse xmas tree order - longest line to shortest. >> I think that in this case that would involve separating the >> declaration and assignment of pmtu. >> >> Edward Cree's tool can be helpful here: >> https://github.com/ecree-solarflare/xmastree > > This does not appear to be a style that is required by the net/xfrm code. I > verified this by running the above tool on the other files in net/xfrm/*.c. In > this case I'd prefer to not increase the number of lines in the function in > order to satisfy the optional style guideline. > >>> + >>> + if (payload_mtu && payload_mtu < pmtu) >>> + pmtu = payload_mtu; >>> + >>> + return pmtu; >>> +} >> >> ... >> >>> +/* IPv4/IPv6 packet ingress to IPTFS tunnel, arrange to send in IPTFS payload >>> + * (i.e., aggregating or fragmenting as appropriate). >>> + * This is set in dst->output for an SA. >>> + */ >>> +static int iptfs_output_collect(struct net *net, struct sock *sk, >>> + struct sk_buff *skb) >>> +{ >>> + struct dst_entry *dst = skb_dst(skb); >>> + struct xfrm_state *x = dst->xfrm; >>> + struct xfrm_iptfs_data *xtfs = x->mode_data; >>> + struct sk_buff *segs, *nskb; >>> + u32 pmtu = 0; >>> + bool ok = true; >>> + bool was_gso; >>> + >>> + /* We have hooked into dst_entry->output which means we have skipped the >>> + * protocol specific netfilter (see xfrm4_output, xfrm6_output). >>> + * when our timer runs we will end up calling xfrm_output directly on >>> + * the encapsulated traffic. >>> + * >>> + * For both cases this is the NF_INET_POST_ROUTING hook which allows >>> + * changing the skb->dst entry which then may not be xfrm based anymore >>> + * in which case a REROUTED flag is set. and dst_output is called. >>> + * >>> + * For IPv6 we are also skipping fragmentation handling for local >>> + * sockets, which may or may not be good depending on our tunnel DF >>> + * setting. Normally with fragmentation supported we want to skip this >>> + * fragmentation. >>> + */ >>> + >>> + BUG_ON(!xtfs); >>> + >>> + pmtu = iptfs_get_cur_pmtu(x, xtfs, skb); >>> + >>> + /* Break apart GSO skbs. If the queue is nearing full then we want the >>> + * accounting and queuing to be based on the individual packets not on the >>> + * aggregate GSO buffer. >>> + */ >>> + was_gso = skb_is_gso(skb); >>> + if (!was_gso) { >>> + segs = skb; >>> + } else { >>> + segs = skb_gso_segment(skb, 0); >>> + if (IS_ERR_OR_NULL(segs)) { >>> + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); >>> + kfree_skb(skb); >>> + return PTR_ERR(segs); >> >> This will return 0 is skb_gso_segment returns NULL, >> which occurs if skb doesn't require segmentation. >> Is that intentional? >> >> If so, I wonder if it would be slightly nicer >> to use PTR_ERR_OR_ZERO() instead of PTR_ERR(). >> >> Flagged by Smatch (suggestion is my own). >> (suggestion is my own) > > Thanks! Actually in the case of NULL I've changed the code to return -EINVAL as xfrm_output_gso() does. > >>> + } >>> + consume_skb(skb); >>> + skb = NULL; >>> + } >>> + >>> + /* We can be running on multiple cores and from the network softirq or >>> + * from user context depending on where the packet is coming from. >>> + */ >>> + spin_lock_bh(&x->lock); >>> + >>> + skb_list_walk_safe(segs, skb, nskb) >>> + { >>> + skb_mark_not_on_list(skb); >>> + >>> + /* Once we drop due to no queue space we continue to drop the >>> + * rest of the packets from that GRO. >>> + */ >>> + if (!ok) { >>> +nospace: >>> + if (skb->dev) >>> + XFRM_INC_STATS(dev_net(skb->dev), >>> + LINUX_MIB_XFRMOUTNOQSPACE); >>> + kfree_skb_reason(skb, SKB_DROP_REASON_FULL_RING); >>> + continue; >>> + } >>> + >>> + /* Fragmenting handled in following commits. */ >>> + if (iptfs_is_too_big(sk, skb, pmtu)) { >>> + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); >>> + continue; >>> + } >>> + >>> + /* Enqueue to send in tunnel */ >>> + ok = iptfs_enqueue(xtfs, skb); >>> + if (!ok) >>> + goto nospace; >>> + } >>> + >>> + /* Start a delay timer if we don't have one yet */ >>> + if (!hrtimer_is_queued(&xtfs->iptfs_timer)) { >>> + hrtimer_start(&xtfs->iptfs_timer, xtfs->init_delay_ns, >>> + IPTFS_HRTIMER_MODE); >>> + xtfs->iptfs_settime = ktime_get_raw_fast_ns(); >>> + } >>> + >>> + spin_unlock_bh(&x->lock); >>> + return 0; >>> +} >> >> ... >> >>> +static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me) >>> +{ >>> + struct sk_buff_head list; >>> + struct xfrm_iptfs_data *xtfs; >>> + struct xfrm_state *x; >>> + time64_t settime; >>> + >>> + xtfs = container_of(me, typeof(*xtfs), iptfs_timer); >>> + x = xtfs->x; >>> + >>> + /* Process all the queued packets >>> + * >>> + * softirq execution order: timer > tasklet > hrtimer >>> + * >>> + * Network rx will have run before us giving one last chance to queue >>> + * ingress packets for us to process and transmit. >>> + */ >>> + >>> + spin_lock(&x->lock); >>> + __skb_queue_head_init(&list); >>> + skb_queue_splice_init(&xtfs->queue, &list); >>> + xtfs->queue_size = 0; >>> + settime = xtfs->iptfs_settime; >> >> nit: settime is set but otherwise unused in this function. >> >> Flagged by W=1 x86_64 allmodconfig builds with gcc-14 and clang-18. >> > > Hmm, this value is in fact used inside a trace point function call in this function. Realized this is another artifact of splitting the commit, I've moved settime to the later tracepoint commit now. Thanks, Chris. > >>> + spin_unlock(&x->lock); >>> + >>> + /* After the above unlock, packets can begin queuing again, and the >>> + * timer can be set again, from another CPU either in softirq or user >>> + * context (not from this one since we are running at softirq level >>> + * already). >>> + */ >>> + >>> + iptfs_output_queued(x, &list); >>> + >>> + return HRTIMER_NORESTART; >>> +} >> >> ... >> >>> @@ -98,10 +607,23 @@ static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) >>> { >>> struct xfrm_iptfs_data *xtfs = x->mode_data; >>> struct xfrm_iptfs_config *xc = &xtfs->cfg; >>> - int ret = 0; >>> + int ret; >>> + u64 q; >>> + >>> + if (x->dir == XFRM_SA_DIR_OUT) { >>> + q = xtfs->init_delay_ns; >>> + (void)do_div(q, NSECS_IN_USEC); >>> + ret = nla_put_u32(skb, XFRMA_IPTFS_INIT_DELAY, q); >>> + if (ret) >>> + return ret; >>> + >>> + ret = nla_put_u32(skb, XFRMA_IPTFS_MAX_QSIZE, >>> + xc->max_queue_size); >>> + if (ret) >>> + return ret; >>> >>> - if (x->dir == XFRM_SA_DIR_OUT) >>> ret = nla_put_u32(skb, XFRMA_IPTFS_PKT_SIZE, xc->pkt_size); >>> + } >> >> ret will be used uninitialised here unless the if condition above is true. >> >> Flagged by W=1 x86_64 allmodconfig build with clang-18, and Smatch. > > Sigh, this is an artifact of splitting up the new file into multiple > functionality commits. The final resulting code does not leave the value > uninitialized. In any case I will fix it in this middle commit too. > > Thanks! > Chris. > >> >>> >>> return ret; >>> } >> ... > > [[End of PGP Signed Part]]
diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 414035a7a208..82c9027b85f1 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -19,8 +19,13 @@ #include "xfrm_inout.h" +#define NSECS_IN_USEC 1000 + +#define IPTFS_HRTIMER_MODE HRTIMER_MODE_REL_SOFT + struct xfrm_iptfs_config { u32 pkt_size; /* outer_packet_size or 0 */ + u32 max_queue_size; /* octets */ }; struct xfrm_iptfs_data { @@ -28,9 +33,498 @@ struct xfrm_iptfs_data { /* Ingress User Input */ struct xfrm_state *x; /* owning state */ + struct sk_buff_head queue; /* output queue */ + u32 queue_size; /* octets */ + u32 ecn_queue_size; /* octets above which ECN mark */ + u64 init_delay_ns; /* nanoseconds */ + struct hrtimer iptfs_timer; /* output timer */ + time64_t iptfs_settime; /* time timer was set */ u32 payload_mtu; /* max payload size */ }; +static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu); +static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me); + +/* ================================= */ +/* IPTFS Sending (ingress) Functions */ +/* ================================= */ + +/* ------------------------- */ +/* Enqueue to send functions */ +/* ------------------------- */ + +/** + * iptfs_enqueue() - enqueue packet if ok to send. + * @xtfs: xtfs state + * @skb: the packet + * + * Return: true if packet enqueued. + */ +static bool iptfs_enqueue(struct xfrm_iptfs_data *xtfs, struct sk_buff *skb) +{ + u64 newsz = xtfs->queue_size + skb->len; + struct iphdr *iph; + + assert_spin_locked(&xtfs->x->lock); + + if (newsz > xtfs->cfg.max_queue_size) + return false; + + /* Set ECN CE if we are above our ECN queue threshold */ + if (newsz > xtfs->ecn_queue_size) { + iph = ip_hdr(skb); + if (iph->version == 4) + IP_ECN_set_ce(iph); + else if (iph->version == 6) + IP6_ECN_set_ce(skb, ipv6_hdr(skb)); + } + + __skb_queue_tail(&xtfs->queue, skb); + xtfs->queue_size += skb->len; + return true; +} + +static int iptfs_get_cur_pmtu(struct xfrm_state *x, + struct xfrm_iptfs_data *xtfs, struct sk_buff *skb) +{ + struct xfrm_dst *xdst = (struct xfrm_dst *)skb_dst(skb); + u32 payload_mtu = xtfs->payload_mtu; + u32 pmtu = iptfs_get_inner_mtu(x, xdst->child_mtu_cached); + + if (payload_mtu && payload_mtu < pmtu) + pmtu = payload_mtu; + + return pmtu; +} + +static int iptfs_is_too_big(struct sock *sk, struct sk_buff *skb, u32 pmtu) +{ + if (skb->len <= pmtu) + return 0; + + /* We only send ICMP too big if the user has configured us as + * dont-fragment. + */ + if (skb->dev) + XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMOUTERROR); + + if (sk) { + xfrm_local_error(skb, pmtu); + } else if (ip_hdr(skb)->version == 4) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(pmtu)); + } else { + WARN_ON_ONCE(ip_hdr(skb)->version != 6); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, pmtu); + } + return 1; +} + +/* IPv4/IPv6 packet ingress to IPTFS tunnel, arrange to send in IPTFS payload + * (i.e., aggregating or fragmenting as appropriate). + * This is set in dst->output for an SA. + */ +static int iptfs_output_collect(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + struct xfrm_iptfs_data *xtfs = x->mode_data; + struct sk_buff *segs, *nskb; + u32 pmtu = 0; + bool ok = true; + bool was_gso; + + /* We have hooked into dst_entry->output which means we have skipped the + * protocol specific netfilter (see xfrm4_output, xfrm6_output). + * when our timer runs we will end up calling xfrm_output directly on + * the encapsulated traffic. + * + * For both cases this is the NF_INET_POST_ROUTING hook which allows + * changing the skb->dst entry which then may not be xfrm based anymore + * in which case a REROUTED flag is set. and dst_output is called. + * + * For IPv6 we are also skipping fragmentation handling for local + * sockets, which may or may not be good depending on our tunnel DF + * setting. Normally with fragmentation supported we want to skip this + * fragmentation. + */ + + BUG_ON(!xtfs); + + pmtu = iptfs_get_cur_pmtu(x, xtfs, skb); + + /* Break apart GSO skbs. If the queue is nearing full then we want the + * accounting and queuing to be based on the individual packets not on the + * aggregate GSO buffer. + */ + was_gso = skb_is_gso(skb); + if (!was_gso) { + segs = skb; + } else { + segs = skb_gso_segment(skb, 0); + if (IS_ERR_OR_NULL(segs)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + kfree_skb(skb); + return PTR_ERR(segs); + } + consume_skb(skb); + skb = NULL; + } + + /* We can be running on multiple cores and from the network softirq or + * from user context depending on where the packet is coming from. + */ + spin_lock_bh(&x->lock); + + skb_list_walk_safe(segs, skb, nskb) + { + skb_mark_not_on_list(skb); + + /* Once we drop due to no queue space we continue to drop the + * rest of the packets from that GRO. + */ + if (!ok) { +nospace: + if (skb->dev) + XFRM_INC_STATS(dev_net(skb->dev), + LINUX_MIB_XFRMOUTNOQSPACE); + kfree_skb_reason(skb, SKB_DROP_REASON_FULL_RING); + continue; + } + + /* Fragmenting handled in following commits. */ + if (iptfs_is_too_big(sk, skb, pmtu)) { + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); + continue; + } + + /* Enqueue to send in tunnel */ + ok = iptfs_enqueue(xtfs, skb); + if (!ok) + goto nospace; + } + + /* Start a delay timer if we don't have one yet */ + if (!hrtimer_is_queued(&xtfs->iptfs_timer)) { + hrtimer_start(&xtfs->iptfs_timer, xtfs->init_delay_ns, + IPTFS_HRTIMER_MODE); + xtfs->iptfs_settime = ktime_get_raw_fast_ns(); + } + + spin_unlock_bh(&x->lock); + return 0; +} + +/* -------------------------- */ +/* Dequeue and send functions */ +/* -------------------------- */ + +static void iptfs_output_prepare_skb(struct sk_buff *skb, u32 blkoff) +{ + struct ip_iptfs_hdr *h; + size_t hsz = sizeof(*h); + + /* now reset values to be pointing at the rest of the packets */ + h = skb_push(skb, hsz); + memset(h, 0, hsz); + if (blkoff) + h->block_offset = htons(blkoff); + + /* network_header current points at the inner IP packet + * move it to the iptfs header + */ + skb->transport_header = skb->network_header; + skb->network_header -= hsz; + + IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; +} + +static struct sk_buff **iptfs_rehome_fraglist(struct sk_buff **nextp, + struct sk_buff *child) +{ + u32 fllen = 0; + + /* It might be possible to account for a frag list in addition to page + * fragment if it's a valid state to be in. The page fragments size + * should be kept as data_len so only the frag_list size is removed, + * this must be done above as well. + */ + BUG_ON(skb_shinfo(child)->nr_frags); + *nextp = skb_shinfo(child)->frag_list; + while (*nextp) { + fllen += (*nextp)->len; + nextp = &(*nextp)->next; + } + skb_frag_list_init(child); + BUG_ON(fllen > child->data_len); + child->len -= fllen; + child->data_len -= fllen; + + return nextp; +} + +static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) +{ + struct xfrm_iptfs_data *xtfs = x->mode_data; + struct sk_buff *skb, *skb2, **nextp; + struct skb_shared_info *shi; + + while ((skb = __skb_dequeue(list))) { + u32 mtu = iptfs_get_cur_pmtu(x, xtfs, skb); + int remaining; + + /* protocol comes to us cleared sometimes */ + skb->protocol = x->outer_mode.family == AF_INET ? + htons(ETH_P_IP) : + htons(ETH_P_IPV6); + + if (skb->len > mtu) { + /* We handle this case before enqueueing so we are only + * here b/c MTU changed after we enqueued before we + * dequeued, just drop these. + */ + if (skb->dev) + XFRM_INC_STATS(dev_net(skb->dev), + LINUX_MIB_XFRMOUTERROR); + + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); + continue; + } + + /* If we don't have a cksum in the packet we need to add one + * before encapsulation. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb_checksum_help(skb)) { + XFRM_INC_STATS(dev_net(skb_dst(skb)->dev), + LINUX_MIB_XFRMOUTERROR); + kfree_skb(skb); + continue; + } + } + + /* Convert first inner packet into an outer IPTFS packet */ + iptfs_output_prepare_skb(skb, 0); + + /* The space remaining to send more inner packet data is `mtu` - + * (skb->len - sizeof iptfs header). This is b/c the `mtu` value + * has the basic IPTFS header len accounted for, and we added + * that header to the skb so it is a part of skb->len, thus we + * subtract it from the skb length. + */ + remaining = mtu - (skb->len - sizeof(struct ip_iptfs_hdr)); + + /* Re-home (un-nest) nested fragment lists. We need to do this + * b/c we will simply be appending any following aggregated + * inner packets to the frag list. + */ + shi = skb_shinfo(skb); + nextp = &shi->frag_list; + while (*nextp) { + if (skb_has_frag_list(*nextp)) + nextp = iptfs_rehome_fraglist(&(*nextp)->next, + *nextp); + else + nextp = &(*nextp)->next; + } + + /* See if we have enough space to simply append. + * + * NOTE: Maybe do not append if we will be mis-aligned, + * SW-based endpoints will probably have to copy in this + * case. + */ + while ((skb2 = skb_peek(list))) { + if (skb2->len > remaining) + break; + + __skb_unlink(skb2, list); + + /* If we don't have a cksum in the packet we need to add + * one before encapsulation. + */ + if (skb2->ip_summed == CHECKSUM_PARTIAL) { + if (skb_checksum_help(skb2)) { + XFRM_INC_STATS( + dev_net(skb_dst(skb2)->dev), + LINUX_MIB_XFRMOUTERROR); + kfree_skb(skb2); + continue; + } + } + + /* Do accounting */ + skb->data_len += skb2->len; + skb->len += skb2->len; + remaining -= skb2->len; + + /* Append to the frag_list */ + *nextp = skb2; + nextp = &skb2->next; + BUG_ON(*nextp); + if (skb_has_frag_list(skb2)) + nextp = iptfs_rehome_fraglist(nextp, skb2); + skb->truesize += skb2->truesize; + } + + xfrm_output(NULL, skb); + } +} + +static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me) +{ + struct sk_buff_head list; + struct xfrm_iptfs_data *xtfs; + struct xfrm_state *x; + time64_t settime; + + xtfs = container_of(me, typeof(*xtfs), iptfs_timer); + x = xtfs->x; + + /* Process all the queued packets + * + * softirq execution order: timer > tasklet > hrtimer + * + * Network rx will have run before us giving one last chance to queue + * ingress packets for us to process and transmit. + */ + + spin_lock(&x->lock); + __skb_queue_head_init(&list); + skb_queue_splice_init(&xtfs->queue, &list); + xtfs->queue_size = 0; + settime = xtfs->iptfs_settime; + spin_unlock(&x->lock); + + /* After the above unlock, packets can begin queuing again, and the + * timer can be set again, from another CPU either in softirq or user + * context (not from this one since we are running at softirq level + * already). + */ + + iptfs_output_queued(x, &list); + + return HRTIMER_NORESTART; +} + +/** + * iptfs_encap_add_ipv4() - add outer encaps + * @x: xfrm state + * @skb: the packet + * + * This was originally taken from xfrm4_tunnel_encap_add. The reason for the + * copy is that IP-TFS/AGGFRAG can have different functionality for how to set + * the TOS/DSCP bits. Sets the protocol to a different value and doesn't do + * anything with inner headers as they aren't pointing into a normal IP + * singleton inner packet. + */ +static int iptfs_encap_add_ipv4(struct xfrm_state *x, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct iphdr *top_iph; + + skb_reset_inner_network_header(skb); + skb_reset_inner_transport_header(skb); + + skb_set_network_header(skb, + -(x->props.header_len - x->props.enc_hdr_len)); + skb->mac_header = + skb->network_header + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + sizeof(*top_iph); + + top_iph = ip_hdr(skb); + top_iph->ihl = 5; + top_iph->version = 4; + top_iph->protocol = IPPROTO_AGGFRAG; + + /* As we have 0, fractional, 1 or N inner packets there's no obviously + * correct DSCP mapping to inherit. ECN should be cleared per RFC9347 + * 3.1. + */ + top_iph->tos = 0; + + top_iph->frag_off = htons(IP_DF); + top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; + ip_select_ident(dev_net(dst->dev), skb, NULL); + + return 0; +} + +/** + * iptfs_encap_add_ipv6() - add outer encaps + * @x: xfrm state + * @skb: the packet + * + * This was originally taken from xfrm6_tunnel_encap_add. The reason for the + * copy is that IP-TFS/AGGFRAG can have different functionality for how to set + * the flow label and TOS/DSCP bits. It also sets the protocol to a different + * value and doesn't do anything with inner headers as they aren't pointing into + * a normal IP singleton inner packet. + */ +static int iptfs_encap_add_ipv6(struct xfrm_state *x, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct ipv6hdr *top_iph; + int dsfield; + + skb_reset_inner_network_header(skb); + skb_reset_inner_transport_header(skb); + + skb_set_network_header(skb, + -x->props.header_len + x->props.enc_hdr_len); + skb->mac_header = + skb->network_header + offsetof(struct ipv6hdr, nexthdr); + skb->transport_header = skb->network_header + sizeof(*top_iph); + + top_iph = ipv6_hdr(skb); + top_iph->version = 6; + top_iph->priority = 0; + memset(top_iph->flow_lbl, 0, sizeof(top_iph->flow_lbl)); + top_iph->nexthdr = IPPROTO_AGGFRAG; + + /* As we have 0, fractional, 1 or N inner packets there's no obviously + * correct DSCP mapping to inherit. ECN should be cleared per RFC9347 + * 3.1. + */ + dsfield = 0; + ipv6_change_dsfield(top_iph, 0, dsfield); + + top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); + top_iph->saddr = *(struct in6_addr *)&x->props.saddr; + top_iph->daddr = *(struct in6_addr *)&x->id.daddr; + + return 0; +} + +/** + * iptfs_prepare_output() - prepare the skb for output + * @x: xfrm state + * @skb: the packet + * + * Return: Error value, if 0 then skb values should be as follows: + * - transport_header should point at ESP header + * - network_header should point at Outer IP header + * - mac_header should point at protocol/nexthdr of the outer IP + */ +static int iptfs_prepare_output(struct xfrm_state *x, struct sk_buff *skb) +{ + if (x->outer_mode.family == AF_INET) + return iptfs_encap_add_ipv4(x, skb); + if (x->outer_mode.family == AF_INET6) { +#if IS_ENABLED(CONFIG_IPV6) + return iptfs_encap_add_ipv6(x, skb); +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif + } + WARN_ON_ONCE(1); + return -EOPNOTSUPP; +} + /* ========================== */ /* State Management Functions */ /* ========================== */ @@ -66,6 +560,9 @@ static int iptfs_user_init(struct net *net, struct xfrm_state *x, struct xfrm_iptfs_config *xc; xc = &xtfs->cfg; + xc->max_queue_size = net->xfrm.sysctl_iptfs_max_qsize; + xtfs->init_delay_ns = + (u64)net->xfrm.sysctl_iptfs_init_delay * NSECS_IN_USEC; if (attrs[XFRMA_IPTFS_PKT_SIZE]) { xc->pkt_size = nla_get_u32(attrs[XFRMA_IPTFS_PKT_SIZE]); @@ -79,6 +576,15 @@ static int iptfs_user_init(struct net *net, struct xfrm_state *x, return -EINVAL; } } + if (attrs[XFRMA_IPTFS_MAX_QSIZE]) + xc->max_queue_size = nla_get_u32(attrs[XFRMA_IPTFS_MAX_QSIZE]); + if (attrs[XFRMA_IPTFS_INIT_DELAY]) + xtfs->init_delay_ns = + (u64)nla_get_u32(attrs[XFRMA_IPTFS_INIT_DELAY]) * + NSECS_IN_USEC; + + xtfs->ecn_queue_size = (u64)xc->max_queue_size * 95 / 100; + return 0; } @@ -88,8 +594,11 @@ static unsigned int iptfs_sa_len(const struct xfrm_state *x) struct xfrm_iptfs_config *xc = &xtfs->cfg; unsigned int l = 0; - if (x->dir == XFRM_SA_DIR_OUT) + if (x->dir == XFRM_SA_DIR_OUT) { + l += nla_total_size(sizeof(u32)); /* init delay usec */ + l += nla_total_size(sizeof(xc->max_queue_size)); l += nla_total_size(sizeof(xc->pkt_size)); + } return l; } @@ -98,10 +607,23 @@ static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) { struct xfrm_iptfs_data *xtfs = x->mode_data; struct xfrm_iptfs_config *xc = &xtfs->cfg; - int ret = 0; + int ret; + u64 q; + + if (x->dir == XFRM_SA_DIR_OUT) { + q = xtfs->init_delay_ns; + (void)do_div(q, NSECS_IN_USEC); + ret = nla_put_u32(skb, XFRMA_IPTFS_INIT_DELAY, q); + if (ret) + return ret; + + ret = nla_put_u32(skb, XFRMA_IPTFS_MAX_QSIZE, + xc->max_queue_size); + if (ret) + return ret; - if (x->dir == XFRM_SA_DIR_OUT) ret = nla_put_u32(skb, XFRMA_IPTFS_PKT_SIZE, xc->pkt_size); + } return ret; } @@ -109,6 +631,10 @@ static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) static int __iptfs_init_state(struct xfrm_state *x, struct xfrm_iptfs_data *xtfs) { + __skb_queue_head_init(&xtfs->queue); + hrtimer_init(&xtfs->iptfs_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); + xtfs->iptfs_timer.function = iptfs_delay_timer; + /* Modify type (esp) adjustment values */ if (x->props.family == AF_INET) @@ -166,6 +692,7 @@ static void iptfs_delete_state(struct xfrm_state *x) if (!xtfs) return; + hrtimer_cancel(&xtfs->iptfs_timer); kfree_sensitive(xtfs); module_put(x->mode_cbs->owner); @@ -180,6 +707,8 @@ static const struct xfrm_mode_cbs iptfs_mode_cbs = { .sa_len = iptfs_sa_len, .clone = iptfs_clone, .get_inner_mtu = iptfs_get_inner_mtu, + .output = iptfs_output_collect, + .prepare_output = iptfs_prepare_output, }; static int __init xfrm_iptfs_init(void)