[ipsec-next,v9,13/17] xfrm: iptfs: handle received fragmented inner packets

Message ID	20240807211331.1081038-14-chopps@chopps.org (mailing list archive)
State	Awaiting Upstream
Delegated to:	Netdev Maintainers
Headers	show Received: from smtp.chopps.org (smtp.chopps.org [54.88.81.56]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 7ADEB155A5D for <netdev@vger.kernel.org>; Wed, 7 Aug 2024 21:14:24 +0000 (UTC) From: Christian Hopps <chopps@chopps.org> To: devel@linux-ipsec.org Cc: Steffen Klassert <steffen.klassert@secunet.com>, netdev@vger.kernel.org, Christian Hopps <chopps@chopps.org>, Christian Hopps <chopps@labn.net> Subject: [PATCH ipsec-next v9 13/17] xfrm: iptfs: handle received fragmented inner packets Date: Wed, 7 Aug 2024 17:13:27 -0400 Message-ID: <20240807211331.1081038-14-chopps@chopps.org> In-Reply-To: <20240807211331.1081038-1-chopps@chopps.org> References: <20240807211331.1081038-1-chopps@chopps.org> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	Add IP-TFS mode to xfrm \| expand [ipsec-next,v9,00/17] Add IP-TFS mode to xfrm [ipsec-next,v9,01/17] net: refactor common skb header copy code for re-use [ipsec-next,v9,02/17] xfrm: config: add CONFIG_XFRM_IPTFS [ipsec-next,v9,03/17] include: uapi: add ip_tfs_*_hdr packet formats [ipsec-next,v9,04/17] include: uapi: add IPPROTO_AGGFRAG for AGGFRAG in ESP [ipsec-next,v9,05/17] xfrm: netlink: add config (netlink) options [ipsec-next,v9,06/17] xfrm: add mode_cbs module functionality [ipsec-next,v9,07/17] xfrm: add generic iptfs defines and functionality [ipsec-next,v9,08/17] xfrm: iptfs: add new iptfs xfrm mode impl [ipsec-next,v9,09/17] xfrm: iptfs: add user packet (tunnel ingress) handling [ipsec-next,v9,10/17] xfrm: iptfs: share page fragments of inner packets [ipsec-next,v9,11/17] xfrm: iptfs: add fragmenting of larger than MTU user packets [ipsec-next,v9,12/17] xfrm: iptfs: add basic receive packet (tunnel egress) handling [ipsec-next,v9,13/17] xfrm: iptfs: handle received fragmented inner packets [ipsec-next,v9,14/17] xfrm: iptfs: add reusing received skb for the tunnel egress packet [ipsec-next,v9,15/17] xfrm: iptfs: add skb-fragment sharing code [ipsec-next,v9,16/17] xfrm: iptfs: handle reordering of received packets [ipsec-next,v9,17/17] xfrm: iptfs: add tracepoint functionality

Context	Check	Description
netdev/series_format	fail	Series longer than 15 patches
netdev/tree_selection	success	Guessed tree name to be net-next
netdev/ynl	success	Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present	success	Fixes tag not required for -next series
netdev/header_inline	success	No static functions without inline keyword in header files
netdev/build_32bit	fail	Errors and warnings before: 34 this patch: 35
netdev/build_tools	success	No tools touched, skip
netdev/cc_maintainers	warning	4 maintainers not CCed: herbert@gondor.apana.org.au edumazet@google.com pabeni@redhat.com kuba@kernel.org
netdev/build_clang	fail	Errors and warnings before: 34 this patch: 36
netdev/verify_signedoff	success	Signed-off-by tag matches author and committer
netdev/deprecated_api	success	None detected
netdev/check_selftest	success	No net selftest shell script
netdev/verify_fixes	success	No Fixes tag
netdev/build_allmodconfig_warn	fail	Errors and warnings before: 34 this patch: 35
netdev/checkpatch	warning	WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/build_clang_rust	success	No Rust files in patch. Skipping build
netdev/kdoc	fail	Errors and warnings before: 1 this patch: 2
netdev/source_inline	success	Was 0 now: 0

diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 00f9e02dfda6..a4f30dc20e1e 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -24,6 +24,21 @@ #define IPTFS_SUBTYPE_BASIC 0 #define IPTFS_SUBTYPE_CC 1 +/* ----------------------------------------------- */ +/* IP-TFS default SA values (tunnel egress/dir-in) */ +/* ----------------------------------------------- */ + +/** + * define IPTFS_DEFAULT_DROP_TIME_USECS - default drop time + * + * The default IPTFS drop time in microseconds. The drop time is the amount of + * time before a missing out-of-order IPTFS tunnel packet is considered lost. + * See also the reorder window. + * + * Default 1s. + */ +#define IPTFS_DEFAULT_DROP_TIME_USECS 1000000 + /* ------------------------------------------------ */ /* IPTFS default SA values (tunnel ingress/dir-out) */ /* ------------------------------------------------ */ @@ -95,6 +110,13 @@ struct xfrm_iptfs_config { * @init_delay_ns: nanoseconds to wait to send initial IPTFS packet. * @iptfs_timer: output timer. * @payload_mtu: max payload size. + * @drop_lock: lock to protect reorder queue. + * @drop_timer: timer for considering next packet lost. + * @drop_time_ns: timer intervan in nanoseconds. + * @ra_newskb: new pkt being reassembled. + * @ra_wantseq: expected next sequence for reassembly. + * @ra_runt: last pkt bytes from very end of last skb. + * @ra_runtlen: size of ra_runt. */ struct xfrm_iptfs_data { struct xfrm_iptfs_config cfg; @@ -108,10 +130,33 @@ struct xfrm_iptfs_data { u64 init_delay_ns; /* nanoseconds */ struct hrtimer iptfs_timer; /* output timer */ u32 payload_mtu; /* max payload size */ + + /* Tunnel egress */ + spinlock_t drop_lock; + struct hrtimer drop_timer; + u64 drop_time_ns; + + /* Tunnel egress reassembly */ + struct sk_buff *ra_newskb; /* new pkt being reassembled */ + u64 ra_wantseq; /* expected next sequence */ + u8 ra_runt[6]; /* last pkt bytes from last skb */ + u8 ra_runtlen; /* count of ra_runt */ }; static u32 __iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu); static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me); +static enum hrtimer_restart iptfs_drop_timer(struct hrtimer *me); + +/* ================= */ +/* Utility Functions */ +/* ================= */ + +static u64 __esp_seq(struct sk_buff *skb) +{ + u64 seq = ntohl(XFRM_SKB_CB(skb)->seq.input.low); + + return seq | (u64)ntohl(XFRM_SKB_CB(skb)->seq.input.hi) << 32; +} /* ================= */ /* SK_BUFF Functions */ @@ -249,6 +294,67 @@ iptfs_pskb_extract_seq(u32 skblen, struct skb_seq_state *st, u32 off, int len) return skb; } +/** + * iptfs_input_save_runt() - save data in xtfs runt space. + * @xtfs: xtfs state + * @seq: the current sequence + * @buf: packet data + * @len: length of packet data + * + * Save the small (`len`) start of a fragmented packet in `buf` in the xtfs data + * runt space. + */ +static void iptfs_input_save_runt(struct xfrm_iptfs_data *xtfs, u64 seq, + u8 *buf, int len) +{ + WARN_ON_ONCE(xtfs->ra_newskb); /* we won't have a new SKB yet */ + + memcpy(xtfs->ra_runt, buf, len); + + xtfs->ra_runtlen = len; + xtfs->ra_wantseq = seq + 1; +} + +/** + * __iptfs_iphlen() - return the v4/v6 header length using packet data. + * @data: pointer at octet with version nibble + * + * The version data is expected to be valid (i.e., either 4 or 6). + * + * Return: the IP header size based on the IP version. + */ +static u32 __iptfs_iphlen(u8 *data) +{ + struct iphdr *iph = (struct iphdr *)data; + + if (iph->version == 0x4) + return sizeof(*iph); + WARN_ON_ONCE(iph->version != 0x6); + return sizeof(struct ipv6hdr); +} + +/** + * __iptfs_iplen() - return the v4/v6 length using packet data. + * @data: pointer to ip (v4/v6) packet header + * + * Grab the IPv4 or IPv6 length value in the start of the inner packet header + * pointed to by `data`. Assumes data len is enough for the length field only. + * + * The version data is expected to be valid (i.e., either 4 or 6). + * + * Return: the length value. + */ +static u32 __iptfs_iplen(u8 *data) +{ + struct iphdr *iph = (struct iphdr *)data; + + if (iph->version == 0x4) + return ntohs(iph->tot_len); + WARN_ON_ONCE(iph->version != 0x6); + return ntohs(((struct ipv6hdr *)iph)->payload_len) + + sizeof(struct ipv6hdr); +} + /** * iptfs_complete_inner_skb() - finish preparing the inner packet for gro recv. * @x: xfrm state @@ -298,6 +404,239 @@ static void iptfs_complete_inner_skb(struct xfrm_state *x, struct sk_buff *skb) } } +static void __iptfs_reassem_done(struct xfrm_iptfs_data *xtfs, bool free) +{ + assert_spin_locked(&xtfs->drop_lock); + + /* We don't care if it works locking takes care of things */ + hrtimer_try_to_cancel(&xtfs->drop_timer); + if (free) + kfree_skb(xtfs->ra_newskb); + xtfs->ra_newskb = NULL; +} + +/** + * iptfs_reassem_abort() - In-progress packet is aborted free the state. + * @xtfs: xtfs state + */ +static void iptfs_reassem_abort(struct xfrm_iptfs_data *xtfs) +{ + __iptfs_reassem_done(xtfs, true); +} + +/** + * iptfs_reassem_done() - In-progress packet is complete, clear the state. + * @xtfs: xtfs state + */ +static void iptfs_reassem_done(struct xfrm_iptfs_data *xtfs) +{ + __iptfs_reassem_done(xtfs, false); +} + +/** + * iptfs_reassem_cont() - Continue the reassembly of an inner packets. + * @xtfs: xtfs state + * @seq: sequence of current packet + * @st: seq read stat for current packet + * @skb: current packet + * @data: offset into sequential packet data + * @blkoff: packet blkoff value + * @list: list of skbs to enqueue completed packet on + * + * Process an IPTFS payload that has a non-zero `blkoff` or when we are + * expecting the continuation b/c we have a runt or in-progress packet. + * + * Return: the new data offset to continue processing from. + */ +static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq, + struct skb_seq_state *st, struct sk_buff *skb, + u32 data, u32 blkoff, struct list_head *list) +{ + struct sk_buff *newskb = xtfs->ra_newskb; + u32 remaining = skb->len - data; + u32 runtlen = xtfs->ra_runtlen; + u32 copylen, fraglen, ipremain, iphlen, iphremain, rrem; + + /* Handle packet fragment we aren't expecting */ + if (!runtlen && !xtfs->ra_newskb) + return data + min(blkoff, remaining); + + /* Important to remember that input to this function is an ordered + * packet stream (unless the user disabled the reorder window). Thus if + * we are waiting for, and expecting the next packet so we can continue + * assembly, a newer sequence number indicates older ones are not coming + * (or if they do should be ignored). Technically we can receive older + * ones when the reorder window is disabled; however, the user should + * have disabled fragmentation in this case, and regardless we don't + * deal with it. + * + * blkoff could be zero if the stream is messed up (or it's an all pad + * insertion) be careful to handle that case in each of the below + */ + + /* Too old case: This can happen when the reorder window is disabled so + * ordering isn't actually guaranteed. + */ + if (seq < xtfs->ra_wantseq) + return data + remaining; + + /* Too new case: We missed what we wanted cleanup. */ + if (seq > xtfs->ra_wantseq) { + XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + + if (blkoff == 0) { + if ((*skb->data & 0xF0) != 0) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + /* Handle all pad case, advance expected sequence number. + * (RFC 9347 S2.2.3) + */ + xtfs->ra_wantseq++; + /* will end parsing */ + return data + remaining; + } + + if (runtlen) { + WARN_ON_ONCE(xtfs->ra_newskb); + + /* Regardless of what happens we're done with the runt */ + xtfs->ra_runtlen = 0; + + /* The start of this inner packet was at the very end of the last + * iptfs payload which didn't include enough for the ip header + * length field. We must have *at least* that now. + */ + rrem = sizeof(xtfs->ra_runt) - runtlen; + if (remaining < rrem || blkoff < rrem) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + + /* fill in the runt data */ + if (skb_copy_bits_seq(st, data, &xtfs->ra_runt[runtlen], + rrem)) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } + + /* We have enough data to get the ip length value now, + * allocate an in progress skb + */ + ipremain = __iptfs_iplen(xtfs->ra_runt); + if (ipremain < sizeof(xtfs->ra_runt)) { + /* length has to be at least runtsize large */ + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + + /* For the runt case we don't attempt sharing currently. NOTE: + * Currently, this IPTFS implementation will not create runts. + */ + + newskb = iptfs_alloc_skb(skb, ipremain, false); + if (!newskb) { + XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINERROR); + goto abandon; + } + xtfs->ra_newskb = newskb; + + /* Copy the runt data into the buffer, but leave data + * pointers the same as normal non-runt case. The extra `rrem` + * recopied bytes are basically cacheline free. Allows using + * same logic below to complete. + */ + memcpy(skb_put(newskb, runtlen), xtfs->ra_runt, + sizeof(xtfs->ra_runt)); + } + + /* Continue reassembling the packet */ + ipremain = __iptfs_iplen(newskb->data); + iphlen = __iptfs_iphlen(newskb->data); + + /* Sanity check, we created the newskb knowing the IP length so the IP + * length can't now be shorter. + */ + WARN_ON_ONCE(newskb->len > ipremain); + + ipremain -= newskb->len; + if (blkoff < ipremain) { + /* Corrupt data, we don't have enough to complete the packet */ + XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + + /* We want the IP header in linear space */ + if (newskb->len < iphlen) { + iphremain = iphlen - newskb->len; + if (blkoff < iphremain) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + fraglen = min(blkoff, remaining); + copylen = min(fraglen, iphremain); + WARN_ON_ONCE(skb_tailroom(newskb) < copylen); + if (skb_copy_bits_seq(st, data, skb_put(newskb, copylen), + copylen)) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } + /* this is a silly condition that might occur anyway */ + if (copylen < iphremain) { + xtfs->ra_wantseq++; + return data + fraglen; + } + /* update data and things derived from it */ + data += copylen; + blkoff -= copylen; + remaining -= copylen; + ipremain -= copylen; + } + + fraglen = min(blkoff, remaining); + copylen = min(fraglen, ipremain); + + /* We verified this was true in the main receive routine */ + WARN_ON_ONCE(skb_tailroom(newskb) < copylen); + + /* copy fragment data into newskb */ + if (skb_copy_bits_seq(st, data, skb_put(newskb, copylen), copylen)) { + XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } + + if (copylen < ipremain) { + xtfs->ra_wantseq++; + } else { + /* We are done with packet reassembly! */ + WARN_ON_ONCE(copylen != ipremain); + iptfs_reassem_done(xtfs); + iptfs_complete_inner_skb(xtfs->x, newskb); + list_add_tail(&newskb->list, list); + } + + /* will continue on to new data block or end */ + return data + fraglen; + +abandon: + if (xtfs->ra_newskb) { + iptfs_reassem_abort(xtfs); + } else { + xtfs->ra_runtlen = 0; + xtfs->ra_wantseq = 0; + } + /* skip past fragment, maybe to end */ + return data + min(blkoff, remaining); +} + /** * iptfs_input() - handle receipt of iptfs payload * @x: xfrm state @@ -315,15 +654,20 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) struct list_head sublist; /* rename this it's just a list */ struct sk_buff *first_skb, *next; const unsigned char *old_mac; + struct xfrm_iptfs_data *xtfs; struct ip_iptfs_hdr *ipth; struct iphdr *iph; struct net *net; u32 remaining, iplen, iphlen, data, tail; - u32 blkoff; + u32 blkoff, capturelen; + u64 seq; + xtfs = x->mode_data; net = xs_net(x); first_skb = NULL; + seq = __esp_seq(skb); + /* Large enough to hold both types of header */ ipth = (struct ip_iptfs_hdr *)&iptcch; @@ -361,12 +705,27 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) INIT_LIST_HEAD(&sublist); - /* Fragment handling in following commits */ + /* Handle fragment at start of payload, and/or waiting reassembly. */ + blkoff = ntohs(ipth->block_offset); - data += blkoff; + /* check before locking i.e., maybe */ + if (blkoff || xtfs->ra_runtlen || xtfs->ra_newskb) { + spin_lock(&xtfs->drop_lock); + + /* check again after lock */ + if (blkoff || xtfs->ra_runtlen || xtfs->ra_newskb) { + data = iptfs_reassem_cont(xtfs, seq, &skbseq, skb, data, + blkoff, &sublist); + } + + spin_unlock(&xtfs->drop_lock); + } /* New packets */ + tail = skb->len; + WARN_ON_ONCE(xtfs->ra_newskb && data < tail); + while (data < tail) { __be16 protocol = 0; @@ -385,8 +744,13 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) iph = (struct iphdr *)hbytes; if (iph->version == 0x4) { /* must have at least tot_len field present */ - if (remaining < 4) + if (remaining < 4) { + /* save the bytes we have, advance data and exit */ + iptfs_input_save_runt(xtfs, seq, hbytes, + remaining); + data += remaining; break; + } iplen = be16_to_cpu(iph->tot_len); iphlen = iph->ihl << 2; @@ -394,8 +758,13 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) XFRM_MODE_SKB_CB(skbseq.root_skb)->tos = iph->tos; } else if (iph->version == 0x6) { /* must have at least payload_len field present */ - if (remaining < 6) + if (remaining < 6) { + /* save the bytes we have, advance data and exit */ + iptfs_input_save_runt(xtfs, seq, hbytes, + remaining); + data += remaining; break; + } iplen = be16_to_cpu(((struct ipv6hdr *)hbytes)->payload_len); iplen += sizeof(struct ipv6hdr); @@ -405,6 +774,7 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) ipv6_get_dsfield((struct ipv6hdr *)iph); } else if (iph->version == 0x0) { /* pad */ + data = tail; break; } else { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); @@ -424,16 +794,14 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) if (!first_skb) first_skb = skb; - /* Fragment handling in following commits */ - if (iplen > remaining) - break; - - skb = iptfs_pskb_extract_seq(iplen, &skbseq, data, iplen); + capturelen = min(iplen, remaining); + skb = iptfs_pskb_extract_seq(iplen, &skbseq, data, capturelen); if (!skb) { /* skip to next packet or done */ - data += iplen; + data += capturelen; continue; } + WARN_ON_ONCE(skb->len != capturelen); skb->protocol = protocol; if (old_mac) { @@ -444,11 +812,38 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) eth_hdr(skb)->h_proto = skb->protocol; } - data += iplen; + data += capturelen; + + if (skb->len < iplen) { + WARN_ON_ONCE(data != tail); + WARN_ON_ONCE(xtfs->ra_newskb); + + /* Start reassembly */ + spin_lock(&xtfs->drop_lock); + + xtfs->ra_newskb = skb; + xtfs->ra_wantseq = seq + 1; + if (!hrtimer_is_queued(&xtfs->drop_timer)) { + /* softirq blocked lest the timer fire and interrupt us */ + WARN_ON_ONCE(!in_interrupt()); + hrtimer_start(&xtfs->drop_timer, + xtfs->drop_time_ns, + IPTFS_HRTIMER_MODE); + } + + spin_unlock(&xtfs->drop_lock); + + break; + } + iptfs_complete_inner_skb(x, skb); list_add_tail(&skb->list, &sublist); } + if (data != tail) + /* this should not happen from the above code */ + XFRM_INC_STATS(net, LINUX_MIB_XFRMINIPTFSERROR); + /* Send the packets! */ list_for_each_entry_safe(skb, next, &sublist, list) { skb_list_del_init(skb); @@ -476,6 +871,47 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) return -EINPROGRESS; } +/** + * iptfs_drop_timer() - Handle drop timer expiry. + * @me: the timer + * + * This is similar to our input function. + * + * The drop timer is set when we start an in progress reassembly, and also when + * we save a future packet in the window saved array. + * + * NOTE packets in the save window are always newer WRT drop times as + * they get further in the future. i.e. for: + * + * if slots (S0, S1, ... Sn) and `Dn` is the drop time for slot `Sn`, + * then D(n-1) <= D(n). + * + * So, regardless of why the timer is firing we can always discard any inprogress + * fragment; either it's the reassembly timer, or slot 0 is going to be + * dropped as S0 must have the most recent drop time, and slot 0 holds the + * continuation fragment of the in progress packet. + * + * Returns HRTIMER_NORESTART. + */ +static enum hrtimer_restart iptfs_drop_timer(struct hrtimer *me) +{ + struct xfrm_iptfs_data *xtfs; + struct xfrm_state *x; + + xtfs = container_of(me, typeof(*xtfs), drop_timer); + x = xtfs->x; + + /* Drop any in progress packet */ + spin_lock(&xtfs->drop_lock); + if (xtfs->ra_newskb) { + kfree_skb(xtfs->ra_newskb); + xtfs->ra_newskb = NULL; + } + spin_unlock(&xtfs->drop_lock); + + return HRTIMER_NORESTART; +} + /* ================================= */ /* IPTFS Sending (ingress) Functions */ /* ================================= */ @@ -1249,6 +1685,7 @@ static int iptfs_user_init(struct net *net, struct xfrm_state *x, xc = &xtfs->cfg; xc->max_queue_size = IPTFS_DEFAULT_MAX_QUEUE_SIZE; + xtfs->drop_time_ns = IPTFS_DEFAULT_DROP_TIME_USECS * NSECS_IN_USEC; xtfs->init_delay_ns = IPTFS_DEFAULT_INIT_DELAY_USECS * NSECS_IN_USEC; if (attrs[XFRMA_IPTFS_DONT_FRAG]) @@ -1267,6 +1704,10 @@ static int iptfs_user_init(struct net *net, struct xfrm_state *x, } if (attrs[XFRMA_IPTFS_MAX_QSIZE]) xc->max_queue_size = nla_get_u32(attrs[XFRMA_IPTFS_MAX_QSIZE]); + if (attrs[XFRMA_IPTFS_DROP_TIME]) + xtfs->drop_time_ns = + (u64)nla_get_u32(attrs[XFRMA_IPTFS_DROP_TIME]) * + NSECS_IN_USEC; if (attrs[XFRMA_IPTFS_INIT_DELAY]) xtfs->init_delay_ns = (u64)nla_get_u32(attrs[XFRMA_IPTFS_INIT_DELAY]) * @@ -1285,7 +1726,9 @@ static unsigned int iptfs_sa_len(const struct xfrm_state *x) struct xfrm_iptfs_config *xc = &xtfs->cfg; unsigned int l = 0; - if (x->dir == XFRM_SA_DIR_OUT) { + if (x->dir == XFRM_SA_DIR_IN) { + l += nla_total_size(sizeof(u32)); /* drop time usec */ + } else { if (xc->dont_frag) l += nla_total_size(0); /* dont-frag flag */ l += nla_total_size(sizeof(u32)); /* init delay usec */ @@ -1303,7 +1746,11 @@ static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) int ret = 0; u64 q; - if (x->dir == XFRM_SA_DIR_OUT) { + if (x->dir == XFRM_SA_DIR_IN) { + q = xtfs->drop_time_ns; + (void)do_div(q, NSECS_IN_USEC); + ret = nla_put_u32(skb, XFRMA_IPTFS_DROP_TIME, q); + } else { if (xc->dont_frag) { ret = nla_put_flag(skb, XFRMA_IPTFS_DONT_FRAG); if (ret) @@ -1334,6 +1781,10 @@ static void __iptfs_init_state(struct xfrm_state *x, hrtimer_init(&xtfs->iptfs_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); xtfs->iptfs_timer.function = iptfs_delay_timer; + spin_lock_init(&xtfs->drop_lock); + hrtimer_init(&xtfs->drop_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); + xtfs->drop_timer.function = iptfs_drop_timer; + /* Modify type (esp) adjustment values */ if (x->props.family == AF_INET) @@ -1357,6 +1808,8 @@ static int iptfs_clone(struct xfrm_state *x, struct xfrm_state *orig) if (!xtfs) return -ENOMEM; + xtfs->ra_newskb = NULL; + __iptfs_init_state(x, xtfs); return 0; @@ -1393,6 +1846,13 @@ static void iptfs_delete_state(struct xfrm_state *x) while ((skb = __skb_dequeue(&list))) kfree_skb(skb); + spin_lock_bh(&xtfs->drop_lock); + hrtimer_cancel(&xtfs->drop_timer); + spin_unlock_bh(&xtfs->drop_lock); + + if (xtfs->ra_newskb) + kfree_skb(xtfs->ra_newskb); + kfree_sensitive(xtfs); module_put(x->mode_cbs->owner);

[ipsec-next,v9,13/17] xfrm: iptfs: handle received fragmented inner packets

Checks

Commit Message

Patch