diff mbox series

[v2,net-next,4/4] inet: frags: save a pair of atomic operations in reassembly

Message ID 20250312082250.1803501-5-edumazet@google.com (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series inet: frags: fully use RCU | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 8 maintainers not CCed: coreteam@netfilter.org netfilter-devel@vger.kernel.org alex.aring@gmail.com linux-wpan@vger.kernel.org miquel.raynal@bootlin.com pablo@netfilter.org kadlec@netfilter.org stefan@datenfreihafen.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 159 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2025-03-13--00-00 (tests: 894)

Commit Message

Eric Dumazet March 12, 2025, 8:22 a.m. UTC
As mentioned in commit 648700f76b03 ("inet: frags:
use rhashtables for reassembly units"):

  A followup patch will even remove the refcount hold/release
  left from prior implementation and save a couple of atomic
  operations.

This patch implements this idea, seven years later.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ieee802154/6lowpan/reassembly.c     |  5 ++++-
 net/ipv4/inet_fragment.c                | 18 ++++++++----------
 net/ipv4/ip_fragment.c                  |  5 ++++-
 net/ipv6/netfilter/nf_conntrack_reasm.c |  5 ++++-
 net/ipv6/reassembly.c                   |  9 ++++-----
 5 files changed, 24 insertions(+), 18 deletions(-)

Comments

Jacob Keller March 13, 2025, 10:53 p.m. UTC | #1
On 3/12/2025 1:22 AM, Eric Dumazet wrote:
> As mentioned in commit 648700f76b03 ("inet: frags:
> use rhashtables for reassembly units"):
> 
>   A followup patch will even remove the refcount hold/release
>   left from prior implementation and save a couple of atomic
>   operations.
> 
> This patch implements this idea, seven years later.
> 

Easy for things to slip through the cracks and get forgotten.

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>

> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  net/ieee802154/6lowpan/reassembly.c     |  5 ++++-
>  net/ipv4/inet_fragment.c                | 18 ++++++++----------
>  net/ipv4/ip_fragment.c                  |  5 ++++-
>  net/ipv6/netfilter/nf_conntrack_reasm.c |  5 ++++-
>  net/ipv6/reassembly.c                   |  9 ++++-----
>  5 files changed, 24 insertions(+), 18 deletions(-)
> 
> diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
> index c5f86cff06ee7f95556955f994b859c86a315646..d4b983d170382e616ea58821b197da89885a6bb2 100644
> --- a/net/ieee802154/6lowpan/reassembly.c
> +++ b/net/ieee802154/6lowpan/reassembly.c
> @@ -304,17 +304,20 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
>  		goto err;
>  	}
>  
> +	rcu_read_lock();
>  	fq = fq_find(net, cb, &hdr.source, &hdr.dest);
>  	if (fq != NULL) {
> -		int ret, refs = 1;
> +		int ret, refs = 0;
>  
>  		spin_lock(&fq->q.lock);
>  		ret = lowpan_frag_queue(fq, skb, frag_type, &refs);
>  		spin_unlock(&fq->q.lock);
>  
> +		rcu_read_unlock();
>  		inet_frag_putn(&fq->q, refs);
>  		return ret;
>  	}
> +	rcu_read_unlock();
>  
>  err:
>  	kfree_skb(skb);
> diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
> index 5eb18605001387e7f23b8661dc9f24a533ab1600..19fae4811ab2803bed2faa4900869f883cb3073c 100644
> --- a/net/ipv4/inet_fragment.c
> +++ b/net/ipv4/inet_fragment.c
> @@ -327,7 +327,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
>  
>  	timer_setup(&q->timer, f->frag_expire, 0);
>  	spin_lock_init(&q->lock);
> -	refcount_set(&q->refcnt, 3);
> +	/* One reference for the timer, one for the hash table. */
> +	refcount_set(&q->refcnt, 2);
>  
>  	return q;
>  }
> @@ -349,7 +350,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
>  	*prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
>  						 &q->node, f->rhash_params);
>  	if (*prev) {
> -		int refs = 2;
> +		/* We could not insert in the hash table,
> +		 * we need to cancel what inet_frag_alloc()
> +		 * anticipated.
> +		 */
> +		int refs = 1;
>  
>  		q->flags |= INET_FRAG_COMPLETE;
>  		inet_frag_kill(q, &refs);
> @@ -359,7 +364,6 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
>  	return q;
>  }
>  
> -/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
>  struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
>  {
>  	/* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
> @@ -369,17 +373,11 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
>  	if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
>  		return NULL;
>  
> -	rcu_read_lock();
> -
>  	prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
>  	if (!prev)
>  		fq = inet_frag_create(fqdir, key, &prev);
> -	if (!IS_ERR_OR_NULL(prev)) {
> +	if (!IS_ERR_OR_NULL(prev))
>  		fq = prev;
> -		if (!refcount_inc_not_zero(&fq->refcnt))
> -			fq = NULL;
> -	}
> -	rcu_read_unlock();
>  	return fq;
>  }
>  EXPORT_SYMBOL(inet_frag_find);
> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
> index c5f3c810706fb328c3d8a4d8501424df0dceaa8e..77f395b28ec748bcd85b8dfa0a8c0b8a74684103 100644
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -483,18 +483,21 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
>  	__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
>  
>  	/* Lookup (or create) queue header */
> +	rcu_read_lock();
>  	qp = ip_find(net, ip_hdr(skb), user, vif);
>  	if (qp) {
> -		int ret, refs = 1;
> +		int ret, refs = 0;
>  
>  		spin_lock(&qp->q.lock);
>  
>  		ret = ip_frag_queue(qp, skb, &refs);
>  
>  		spin_unlock(&qp->q.lock);
> +		rcu_read_unlock();
>  		inet_frag_putn(&qp->q, refs);
>  		return ret;
>  	}
> +	rcu_read_unlock();
>  
>  	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
>  	kfree_skb(skb);
> diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
> index f33acb730dc5807205811c2675efd27a9ee99222..d6bd8f7079bb74ec99030201163332ed5c6d2eec 100644
> --- a/net/ipv6/netfilter/nf_conntrack_reasm.c
> +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
> @@ -450,7 +450,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
>  	struct frag_hdr *fhdr;
>  	struct frag_queue *fq;
>  	struct ipv6hdr *hdr;
> -	int refs = 1;
> +	int refs = 0;
>  	u8 prevhdr;
>  
>  	/* Jumbo payload inhibits frag. header */
> @@ -477,9 +477,11 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
>  	hdr = ipv6_hdr(skb);
>  	fhdr = (struct frag_hdr *)skb_transport_header(skb);
>  
> +	rcu_read_lock();
>  	fq = fq_find(net, fhdr->identification, user, hdr,
>  		     skb->dev ? skb->dev->ifindex : 0);
>  	if (fq == NULL) {
> +		rcu_read_unlock();
>  		pr_debug("Can't find and can't create new queue\n");
>  		return -ENOMEM;
>  	}
> @@ -493,6 +495,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
>  	}
>  
>  	spin_unlock_bh(&fq->q.lock);
> +	rcu_read_unlock();
>  	inet_frag_putn(&fq->q, refs);
>  	return ret;
>  }
> diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
> index 7560380bd5871217d476f2e0e39332926c458bc1..49740898bc1370ff0ca89928750c6de85a45303f 100644
> --- a/net/ipv6/reassembly.c
> +++ b/net/ipv6/reassembly.c
> @@ -305,9 +305,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
>  	skb_postpush_rcsum(skb, skb_network_header(skb),
>  			   skb_network_header_len(skb));
>  
> -	rcu_read_lock();
>  	__IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
> -	rcu_read_unlock();
>  	fq->q.rb_fragments = RB_ROOT;
>  	fq->q.fragments_tail = NULL;
>  	fq->q.last_run_head = NULL;
> @@ -319,9 +317,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
>  out_oom:
>  	net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
>  out_fail:
> -	rcu_read_lock();
>  	__IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
> -	rcu_read_unlock();
>  	inet_frag_kill(&fq->q, refs);
>  	return -1;
>  }
> @@ -379,10 +375,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
>  	}
>  
>  	iif = skb->dev ? skb->dev->ifindex : 0;
> +	rcu_read_lock();
>  	fq = fq_find(net, fhdr->identification, hdr, iif);
>  	if (fq) {
>  		u32 prob_offset = 0;
> -		int ret, refs = 1;
> +		int ret, refs = 0;
>  
>  		spin_lock(&fq->q.lock);
>  
> @@ -391,6 +388,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
>  				     &prob_offset, &refs);
>  
>  		spin_unlock(&fq->q.lock);
> +		rcu_read_unlock();
>  		inet_frag_putn(&fq->q, refs);
>  		if (prob_offset) {
>  			__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
> @@ -400,6 +398,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
>  		}
>  		return ret;
>  	}
> +	rcu_read_unlock();
>  
>  	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
>  	kfree_skb(skb);
diff mbox series

Patch

diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index c5f86cff06ee7f95556955f994b859c86a315646..d4b983d170382e616ea58821b197da89885a6bb2 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -304,17 +304,20 @@  int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
 		goto err;
 	}
 
+	rcu_read_lock();
 	fq = fq_find(net, cb, &hdr.source, &hdr.dest);
 	if (fq != NULL) {
-		int ret, refs = 1;
+		int ret, refs = 0;
 
 		spin_lock(&fq->q.lock);
 		ret = lowpan_frag_queue(fq, skb, frag_type, &refs);
 		spin_unlock(&fq->q.lock);
 
+		rcu_read_unlock();
 		inet_frag_putn(&fq->q, refs);
 		return ret;
 	}
+	rcu_read_unlock();
 
 err:
 	kfree_skb(skb);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 5eb18605001387e7f23b8661dc9f24a533ab1600..19fae4811ab2803bed2faa4900869f883cb3073c 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -327,7 +327,8 @@  static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
 
 	timer_setup(&q->timer, f->frag_expire, 0);
 	spin_lock_init(&q->lock);
-	refcount_set(&q->refcnt, 3);
+	/* One reference for the timer, one for the hash table. */
+	refcount_set(&q->refcnt, 2);
 
 	return q;
 }
@@ -349,7 +350,11 @@  static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
 	*prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
 						 &q->node, f->rhash_params);
 	if (*prev) {
-		int refs = 2;
+		/* We could not insert in the hash table,
+		 * we need to cancel what inet_frag_alloc()
+		 * anticipated.
+		 */
+		int refs = 1;
 
 		q->flags |= INET_FRAG_COMPLETE;
 		inet_frag_kill(q, &refs);
@@ -359,7 +364,6 @@  static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
 	return q;
 }
 
-/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
 struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
 {
 	/* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
@@ -369,17 +373,11 @@  struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
 	if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
 		return NULL;
 
-	rcu_read_lock();
-
 	prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
 	if (!prev)
 		fq = inet_frag_create(fqdir, key, &prev);
-	if (!IS_ERR_OR_NULL(prev)) {
+	if (!IS_ERR_OR_NULL(prev))
 		fq = prev;
-		if (!refcount_inc_not_zero(&fq->refcnt))
-			fq = NULL;
-	}
-	rcu_read_unlock();
 	return fq;
 }
 EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index c5f3c810706fb328c3d8a4d8501424df0dceaa8e..77f395b28ec748bcd85b8dfa0a8c0b8a74684103 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -483,18 +483,21 @@  int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
 
 	/* Lookup (or create) queue header */
+	rcu_read_lock();
 	qp = ip_find(net, ip_hdr(skb), user, vif);
 	if (qp) {
-		int ret, refs = 1;
+		int ret, refs = 0;
 
 		spin_lock(&qp->q.lock);
 
 		ret = ip_frag_queue(qp, skb, &refs);
 
 		spin_unlock(&qp->q.lock);
+		rcu_read_unlock();
 		inet_frag_putn(&qp->q, refs);
 		return ret;
 	}
+	rcu_read_unlock();
 
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index f33acb730dc5807205811c2675efd27a9ee99222..d6bd8f7079bb74ec99030201163332ed5c6d2eec 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -450,7 +450,7 @@  int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
 	struct ipv6hdr *hdr;
-	int refs = 1;
+	int refs = 0;
 	u8 prevhdr;
 
 	/* Jumbo payload inhibits frag. header */
@@ -477,9 +477,11 @@  int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 	hdr = ipv6_hdr(skb);
 	fhdr = (struct frag_hdr *)skb_transport_header(skb);
 
+	rcu_read_lock();
 	fq = fq_find(net, fhdr->identification, user, hdr,
 		     skb->dev ? skb->dev->ifindex : 0);
 	if (fq == NULL) {
+		rcu_read_unlock();
 		pr_debug("Can't find and can't create new queue\n");
 		return -ENOMEM;
 	}
@@ -493,6 +495,7 @@  int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 	}
 
 	spin_unlock_bh(&fq->q.lock);
+	rcu_read_unlock();
 	inet_frag_putn(&fq->q, refs);
 	return ret;
 }
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 7560380bd5871217d476f2e0e39332926c458bc1..49740898bc1370ff0ca89928750c6de85a45303f 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -305,9 +305,7 @@  static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
 	skb_postpush_rcsum(skb, skb_network_header(skb),
 			   skb_network_header_len(skb));
 
-	rcu_read_lock();
 	__IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
-	rcu_read_unlock();
 	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
 	fq->q.last_run_head = NULL;
@@ -319,9 +317,7 @@  static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
 out_oom:
 	net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
 out_fail:
-	rcu_read_lock();
 	__IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
-	rcu_read_unlock();
 	inet_frag_kill(&fq->q, refs);
 	return -1;
 }
@@ -379,10 +375,11 @@  static int ipv6_frag_rcv(struct sk_buff *skb)
 	}
 
 	iif = skb->dev ? skb->dev->ifindex : 0;
+	rcu_read_lock();
 	fq = fq_find(net, fhdr->identification, hdr, iif);
 	if (fq) {
 		u32 prob_offset = 0;
-		int ret, refs = 1;
+		int ret, refs = 0;
 
 		spin_lock(&fq->q.lock);
 
@@ -391,6 +388,7 @@  static int ipv6_frag_rcv(struct sk_buff *skb)
 				     &prob_offset, &refs);
 
 		spin_unlock(&fq->q.lock);
+		rcu_read_unlock();
 		inet_frag_putn(&fq->q, refs);
 		if (prob_offset) {
 			__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
@@ -400,6 +398,7 @@  static int ipv6_frag_rcv(struct sk_buff *skb)
 		}
 		return ret;
 	}
+	rcu_read_unlock();
 
 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);