Message ID | 20250312082250.1803501-5-edumazet@google.com (mailing list archive) |
---|---|
State | New |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | inet: frags: fully use RCU | expand |
On 3/12/2025 1:22 AM, Eric Dumazet wrote: > As mentioned in commit 648700f76b03 ("inet: frags: > use rhashtables for reassembly units"): > > A followup patch will even remove the refcount hold/release > left from prior implementation and save a couple of atomic > operations. > > This patch implements this idea, seven years later. > Easy for things to slip through the cracks and get forgotten. Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> > Signed-off-by: Eric Dumazet <edumazet@google.com> > --- > net/ieee802154/6lowpan/reassembly.c | 5 ++++- > net/ipv4/inet_fragment.c | 18 ++++++++---------- > net/ipv4/ip_fragment.c | 5 ++++- > net/ipv6/netfilter/nf_conntrack_reasm.c | 5 ++++- > net/ipv6/reassembly.c | 9 ++++----- > 5 files changed, 24 insertions(+), 18 deletions(-) > > diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c > index c5f86cff06ee7f95556955f994b859c86a315646..d4b983d170382e616ea58821b197da89885a6bb2 100644 > --- a/net/ieee802154/6lowpan/reassembly.c > +++ b/net/ieee802154/6lowpan/reassembly.c > @@ -304,17 +304,20 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) > goto err; > } > > + rcu_read_lock(); > fq = fq_find(net, cb, &hdr.source, &hdr.dest); > if (fq != NULL) { > - int ret, refs = 1; > + int ret, refs = 0; > > spin_lock(&fq->q.lock); > ret = lowpan_frag_queue(fq, skb, frag_type, &refs); > spin_unlock(&fq->q.lock); > > + rcu_read_unlock(); > inet_frag_putn(&fq->q, refs); > return ret; > } > + rcu_read_unlock(); > > err: > kfree_skb(skb); > diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c > index 5eb18605001387e7f23b8661dc9f24a533ab1600..19fae4811ab2803bed2faa4900869f883cb3073c 100644 > --- a/net/ipv4/inet_fragment.c > +++ b/net/ipv4/inet_fragment.c > @@ -327,7 +327,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, > > timer_setup(&q->timer, f->frag_expire, 0); > spin_lock_init(&q->lock); > - refcount_set(&q->refcnt, 3); > + /* One reference for the timer, one for the hash table. */ > + refcount_set(&q->refcnt, 2); > > return q; > } > @@ -349,7 +350,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, > *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key, > &q->node, f->rhash_params); > if (*prev) { > - int refs = 2; > + /* We could not insert in the hash table, > + * we need to cancel what inet_frag_alloc() > + * anticipated. > + */ > + int refs = 1; > > q->flags |= INET_FRAG_COMPLETE; > inet_frag_kill(q, &refs); > @@ -359,7 +364,6 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, > return q; > } > > -/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ > struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) > { > /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ > @@ -369,17 +373,11 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) > if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) > return NULL; > > - rcu_read_lock(); > - > prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params); > if (!prev) > fq = inet_frag_create(fqdir, key, &prev); > - if (!IS_ERR_OR_NULL(prev)) { > + if (!IS_ERR_OR_NULL(prev)) > fq = prev; > - if (!refcount_inc_not_zero(&fq->refcnt)) > - fq = NULL; > - } > - rcu_read_unlock(); > return fq; > } > EXPORT_SYMBOL(inet_frag_find); > diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c > index c5f3c810706fb328c3d8a4d8501424df0dceaa8e..77f395b28ec748bcd85b8dfa0a8c0b8a74684103 100644 > --- a/net/ipv4/ip_fragment.c > +++ b/net/ipv4/ip_fragment.c > @@ -483,18 +483,21 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) > __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); > > /* Lookup (or create) queue header */ > + rcu_read_lock(); > qp = ip_find(net, ip_hdr(skb), user, vif); > if (qp) { > - int ret, refs = 1; > + int ret, refs = 0; > > spin_lock(&qp->q.lock); > > ret = ip_frag_queue(qp, skb, &refs); > > spin_unlock(&qp->q.lock); > + rcu_read_unlock(); > inet_frag_putn(&qp->q, refs); > return ret; > } > + rcu_read_unlock(); > > __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); > kfree_skb(skb); > diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c > index f33acb730dc5807205811c2675efd27a9ee99222..d6bd8f7079bb74ec99030201163332ed5c6d2eec 100644 > --- a/net/ipv6/netfilter/nf_conntrack_reasm.c > +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c > @@ -450,7 +450,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) > struct frag_hdr *fhdr; > struct frag_queue *fq; > struct ipv6hdr *hdr; > - int refs = 1; > + int refs = 0; > u8 prevhdr; > > /* Jumbo payload inhibits frag. header */ > @@ -477,9 +477,11 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) > hdr = ipv6_hdr(skb); > fhdr = (struct frag_hdr *)skb_transport_header(skb); > > + rcu_read_lock(); > fq = fq_find(net, fhdr->identification, user, hdr, > skb->dev ? skb->dev->ifindex : 0); > if (fq == NULL) { > + rcu_read_unlock(); > pr_debug("Can't find and can't create new queue\n"); > return -ENOMEM; > } > @@ -493,6 +495,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) > } > > spin_unlock_bh(&fq->q.lock); > + rcu_read_unlock(); > inet_frag_putn(&fq->q, refs); > return ret; > } > diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c > index 7560380bd5871217d476f2e0e39332926c458bc1..49740898bc1370ff0ca89928750c6de85a45303f 100644 > --- a/net/ipv6/reassembly.c > +++ b/net/ipv6/reassembly.c > @@ -305,9 +305,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, > skb_postpush_rcsum(skb, skb_network_header(skb), > skb_network_header_len(skb)); > > - rcu_read_lock(); > __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS); > - rcu_read_unlock(); > fq->q.rb_fragments = RB_ROOT; > fq->q.fragments_tail = NULL; > fq->q.last_run_head = NULL; > @@ -319,9 +317,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, > out_oom: > net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); > out_fail: > - rcu_read_lock(); > __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS); > - rcu_read_unlock(); > inet_frag_kill(&fq->q, refs); > return -1; > } > @@ -379,10 +375,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb) > } > > iif = skb->dev ? skb->dev->ifindex : 0; > + rcu_read_lock(); > fq = fq_find(net, fhdr->identification, hdr, iif); > if (fq) { > u32 prob_offset = 0; > - int ret, refs = 1; > + int ret, refs = 0; > > spin_lock(&fq->q.lock); > > @@ -391,6 +388,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) > &prob_offset, &refs); > > spin_unlock(&fq->q.lock); > + rcu_read_unlock(); > inet_frag_putn(&fq->q, refs); > if (prob_offset) { > __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), > @@ -400,6 +398,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) > } > return ret; > } > + rcu_read_unlock(); > > __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); > kfree_skb(skb);
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index c5f86cff06ee7f95556955f994b859c86a315646..d4b983d170382e616ea58821b197da89885a6bb2 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -304,17 +304,20 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) goto err; } + rcu_read_lock(); fq = fq_find(net, cb, &hdr.source, &hdr.dest); if (fq != NULL) { - int ret, refs = 1; + int ret, refs = 0; spin_lock(&fq->q.lock); ret = lowpan_frag_queue(fq, skb, frag_type, &refs); spin_unlock(&fq->q.lock); + rcu_read_unlock(); inet_frag_putn(&fq->q, refs); return ret; } + rcu_read_unlock(); err: kfree_skb(skb); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5eb18605001387e7f23b8661dc9f24a533ab1600..19fae4811ab2803bed2faa4900869f883cb3073c 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -327,7 +327,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, timer_setup(&q->timer, f->frag_expire, 0); spin_lock_init(&q->lock); - refcount_set(&q->refcnt, 3); + /* One reference for the timer, one for the hash table. */ + refcount_set(&q->refcnt, 2); return q; } @@ -349,7 +350,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key, &q->node, f->rhash_params); if (*prev) { - int refs = 2; + /* We could not insert in the hash table, + * we need to cancel what inet_frag_alloc() + * anticipated. + */ + int refs = 1; q->flags |= INET_FRAG_COMPLETE; inet_frag_kill(q, &refs); @@ -359,7 +364,6 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, return q; } -/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) { /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ @@ -369,17 +373,11 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) return NULL; - rcu_read_lock(); - prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params); if (!prev) fq = inet_frag_create(fqdir, key, &prev); - if (!IS_ERR_OR_NULL(prev)) { + if (!IS_ERR_OR_NULL(prev)) fq = prev; - if (!refcount_inc_not_zero(&fq->refcnt)) - fq = NULL; - } - rcu_read_unlock(); return fq; } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index c5f3c810706fb328c3d8a4d8501424df0dceaa8e..77f395b28ec748bcd85b8dfa0a8c0b8a74684103 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -483,18 +483,21 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); /* Lookup (or create) queue header */ + rcu_read_lock(); qp = ip_find(net, ip_hdr(skb), user, vif); if (qp) { - int ret, refs = 1; + int ret, refs = 0; spin_lock(&qp->q.lock); ret = ip_frag_queue(qp, skb, &refs); spin_unlock(&qp->q.lock); + rcu_read_unlock(); inet_frag_putn(&qp->q, refs); return ret; } + rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); kfree_skb(skb); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index f33acb730dc5807205811c2675efd27a9ee99222..d6bd8f7079bb74ec99030201163332ed5c6d2eec 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -450,7 +450,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr; - int refs = 1; + int refs = 0; u8 prevhdr; /* Jumbo payload inhibits frag. header */ @@ -477,9 +477,11 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); + rcu_read_lock(); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { + rcu_read_unlock(); pr_debug("Can't find and can't create new queue\n"); return -ENOMEM; } @@ -493,6 +495,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) } spin_unlock_bh(&fq->q.lock); + rcu_read_unlock(); inet_frag_putn(&fq->q, refs); return ret; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 7560380bd5871217d476f2e0e39332926c458bc1..49740898bc1370ff0ca89928750c6de85a45303f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -305,9 +305,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, skb_postpush_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); - rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS); - rcu_read_unlock(); fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; fq->q.last_run_head = NULL; @@ -319,9 +317,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, out_oom: net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); out_fail: - rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS); - rcu_read_unlock(); inet_frag_kill(&fq->q, refs); return -1; } @@ -379,10 +375,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } iif = skb->dev ? skb->dev->ifindex : 0; + rcu_read_lock(); fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { u32 prob_offset = 0; - int ret, refs = 1; + int ret, refs = 0; spin_lock(&fq->q.lock); @@ -391,6 +388,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) &prob_offset, &refs); spin_unlock(&fq->q.lock); + rcu_read_unlock(); inet_frag_putn(&fq->q, refs); if (prob_offset) { __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), @@ -400,6 +398,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } return ret; } + rcu_read_unlock(); __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); kfree_skb(skb);
As mentioned in commit 648700f76b03 ("inet: frags: use rhashtables for reassembly units"): A followup patch will even remove the refcount hold/release left from prior implementation and save a couple of atomic operations. This patch implements this idea, seven years later. Signed-off-by: Eric Dumazet <edumazet@google.com> --- net/ieee802154/6lowpan/reassembly.c | 5 ++++- net/ipv4/inet_fragment.c | 18 ++++++++---------- net/ipv4/ip_fragment.c | 5 ++++- net/ipv6/netfilter/nf_conntrack_reasm.c | 5 ++++- net/ipv6/reassembly.c | 9 ++++----- 5 files changed, 24 insertions(+), 18 deletions(-)