@@ -1703,7 +1703,6 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (retried) {
__ip_set_get_netlink(set);
nfnl_unlock(NFNL_SUBSYS_IPSET);
- cond_resched();
nfnl_lock(NFNL_SUBSYS_IPSET);
__ip_set_put_netlink(set);
}
@@ -622,7 +622,6 @@ static void ip_vs_est_drain_temp_list(struct netns_ipvs *ipvs)
goto unlock;
}
mutex_unlock(&__ip_vs_mutex);
- cond_resched();
}
unlock:
@@ -681,7 +680,6 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max)
if (!ipvs->enable || kthread_should_stop())
goto stop;
- cond_resched();
diff = ktime_to_ns(ktime_sub(t2, t1));
if (diff <= 1 * NSEC_PER_USEC) {
@@ -815,7 +813,6 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs)
* and deleted (releasing kthread contexts)
*/
mutex_unlock(&__ip_vs_mutex);
- cond_resched();
mutex_lock(&__ip_vs_mutex);
/* Current kt released ? */
@@ -473,8 +473,6 @@ static void tree_gc_worker(struct work_struct *work)
rcu_read_unlock();
local_bh_enable();
- cond_resched();
-
spin_lock_bh(&nf_conncount_locks[tree]);
if (gc_count < ARRAY_SIZE(gc_nodes))
goto next; /* do not bother */
@@ -1563,7 +1563,6 @@ static void gc_worker(struct work_struct *work)
* we will just continue with next hash slot.
*/
rcu_read_unlock();
- cond_resched();
i++;
delta_time = nfct_time_stamp - end_time;
@@ -2393,7 +2392,6 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
}
spin_unlock(lockp);
local_bh_enable();
- cond_resched();
}
return NULL;
@@ -2418,7 +2416,6 @@ static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
nf_ct_delete(ct, iter_data->portid, iter_data->report);
nf_ct_put(ct);
- cond_resched();
}
mutex_unlock(&nf_conntrack_mutex);
}
@@ -84,7 +84,6 @@ static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet)
if (sent++ > 16) {
spin_unlock_bh(&cnet->ecache.dying_lock);
- cond_resched();
goto next;
}
}
@@ -96,8 +95,6 @@ static enum retry_state ecache_work_evict_list(struct nf_conntrack_net *cnet)
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
nf_ct_put(ct);
-
- cond_resched();
}
return ret;
@@ -3742,8 +3742,6 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
err = nft_chain_validate(&ctx, chain);
if (err < 0)
return err;
-
- cond_resched();
}
return 0;
@@ -495,8 +495,6 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
if (fatal_signal_pending(current))
return -EINTR;
- cond_resched();
-
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
err = __nft_rbtree_insert(net, set, rbe, ext);
@@ -1433,8 +1433,7 @@ xt_replace_table(struct xt_table *table,
if (seq & 1) {
do {
- cond_resched();
- cpu_relax();
+ cond_resched_stall();
} while (seq == raw_read_seqcount(s));
}
}
@@ -372,7 +372,6 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, bool select
dsthash_free(ht, dh);
}
spin_unlock_bh(&ht->lock);
- cond_resched();
}
}
There are broadly three sets of uses of cond_resched(): 1. Calls to cond_resched() out of the goodness of our heart, otherwise known as avoiding lockup splats. 2. Open coded variants of cond_resched_lock() which call cond_resched(). 3. Retry or error handling loops, where cond_resched() is used as a quick alternative to spinning in a tight-loop. When running under a full preemption model, the cond_resched() reduces to a NOP (not even a barrier) so removing it obviously cannot matter. But considering only voluntary preemption models (for say code that has been mostly tested under those), for set-1 and set-2 the scheduler can now preempt kernel tasks running beyond their time quanta anywhere they are preemptible() [1]. Which removes any need for these explicitly placed scheduling points. The cond_resched() calls in set-3 are a little more difficult. To start with, given it's NOP character under full preemption, it never actually saved us from a tight loop. With voluntary preemption, it's not a NOP, but it might as well be -- for most workloads the scheduler does not have an interminable supply of runnable tasks on the runqueue. So, cond_resched() is useful to not get softlockup splats, but not terribly good for error handling. Ideally, these should be replaced with some kind of timed or event wait. For now we use cond_resched_stall(), which tries to schedule if possible, and executes a cpu_relax() if not. Most of the uses here are in set-1 (some right after we give up a lock or enable bottom-halves, causing an explicit preemption check.) We can remove all of them. There's one case where we do "cond_resched(); cpu_relax()" while spinning on a seqcount. Replace with cond_resched_stall(). [1] https://lore.kernel.org/lkml/20231107215742.363031-1-ankur.a.arora@oracle.com/ Cc: Florian Westphal <fw@strlen.de> Cc: Eric Dumazet <edumazet@google.com> Cc: Jakub Kicinski <kuba@kernel.org> Cc: Paolo Abeni <pabeni@redhat.com> Cc: Simon Horman <horms@verge.net.au> Cc: Julian Anastasov <ja@ssi.bg> Cc: "David S. Miller" <davem@davemloft.net> Cc: Pablo Neira Ayuso <pablo@netfilter.org> Cc: Jozsef Kadlecsik <kadlec@netfilter.org> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com> --- net/netfilter/ipset/ip_set_core.c | 1 - net/netfilter/ipvs/ip_vs_est.c | 3 --- net/netfilter/nf_conncount.c | 2 -- net/netfilter/nf_conntrack_core.c | 3 --- net/netfilter/nf_conntrack_ecache.c | 3 --- net/netfilter/nf_tables_api.c | 2 -- net/netfilter/nft_set_rbtree.c | 2 -- net/netfilter/x_tables.c | 3 +-- net/netfilter/xt_hashlimit.c | 1 - 9 files changed, 1 insertion(+), 19 deletions(-)