diff mbox series

[ipsec-next,3/3] xfrm: Add an inbound percpu state cache.

Message ID 20240412060553.3483630-4-steffen.klassert@secunet.com (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series Add support for per cpu xfrm states. | expand

Checks

Context Check Description
netdev/series_format warning Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 5006 this patch: 12106
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 5 maintainers not CCed: pabeni@redhat.com kuba@kernel.org dsahern@kernel.org edumazet@google.com herbert@gondor.apana.org.au
netdev/build_clang fail Errors and warnings before: 124 this patch: 124
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 4259 this patch: 10383
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Steffen Klassert April 12, 2024, 6:05 a.m. UTC
Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we add a percpu cache to cache the used inbound xfrm states.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/netns/xfrm.h |  1 +
 include/net/xfrm.h       |  5 ++++
 net/ipv4/esp4_offload.c  |  6 ++---
 net/ipv6/esp6_offload.c  |  6 ++---
 net/xfrm/xfrm_input.c    |  2 +-
 net/xfrm/xfrm_state.c    | 57 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 70 insertions(+), 7 deletions(-)

Comments

Yujie Liu April 16, 2024, 2:33 a.m. UTC | #1
Hi Steffen,

kernel test robot noticed the following build warnings:

[auto build test WARNING on klassert-ipsec-next/master]
[also build test WARNING on klassert-ipsec/master net/main net-next/main linus/master v6.9-rc3 next-20240412]
[cannot apply to horms-ipvs/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Steffen-Klassert/xfrm-Add-support-for-per-cpu-xfrm-state-handling/20240412-140746
base:   https://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git master
patch link:    https://lore.kernel.org/r/20240412060553.3483630-4-steffen.klassert%40secunet.com
patch subject: [PATCH ipsec-next 3/3] xfrm: Add an inbound percpu state cache.
config: i386-randconfig-061-20240413 (https://download.01.org/0day-ci/archive/20240413/202404130802.rDxN3ijD-lkp@intel.com/config)
compiler: clang version 17.0.6 (https://github.com/llvm/llvm-project 6009708b4367171ccdbf4b5905cb6a803753fe18)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240413/202404130802.rDxN3ijD-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <yujie.liu@intel.com>
| Closes: https://lore.kernel.org/r/202404130802.rDxN3ijD-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
   drivers/net/ethernet/altera/altera_tse_ethtool.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_msgdma.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_utils.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h, include/linux/if_vlan.h, ...):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_sgdma.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h, include/linux/if_vlan.h, ...):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu
--
   drivers/net/ethernet/altera/altera_tse_main.c: note: in included file (through include/net/net_namespace.h, include/linux/netdevice.h, include/linux/etherdevice.h):
>> include/net/netns/xfrm.h:46:39: sparse: sparse: duplicate [noderef]
>> include/net/netns/xfrm.h:46:39: sparse: sparse: multiple address spaces given: __rcu & __percpu

vim +46 include/net/netns/xfrm.h

880a6fab8f6ba5b Christophe Gouault 2014-08-29  31  
d62ddc21b674b5a Alexey Dobriyan    2008-11-25  32  struct netns_xfrm {
9d4139c76905833 Alexey Dobriyan    2008-11-25  33  	struct list_head	state_all;
73d189dce486cd6 Alexey Dobriyan    2008-11-25  34  	/*
73d189dce486cd6 Alexey Dobriyan    2008-11-25  35  	 * Hash table to find appropriate SA towards given target (endpoint of
73d189dce486cd6 Alexey Dobriyan    2008-11-25  36  	 * tunnel or destination of transport mode) allowed by selector.
73d189dce486cd6 Alexey Dobriyan    2008-11-25  37  	 *
73d189dce486cd6 Alexey Dobriyan    2008-11-25  38  	 * Main use is finding SA after policy selected tunnel or transport
73d189dce486cd6 Alexey Dobriyan    2008-11-25  39  	 * mode. Also, it can be used by ah/esp icmp error handler to find
73d189dce486cd6 Alexey Dobriyan    2008-11-25  40  	 * offending SA.
73d189dce486cd6 Alexey Dobriyan    2008-11-25  41  	 */
d737a5805581c6f Florian Westphal   2016-08-09  42  	struct hlist_head	__rcu *state_bydst;
d737a5805581c6f Florian Westphal   2016-08-09  43  	struct hlist_head	__rcu *state_bysrc;
d737a5805581c6f Florian Westphal   2016-08-09  44  	struct hlist_head	__rcu *state_byspi;
fe9f1d8779cb470 Sabrina Dubroca    2021-04-25  45  	struct hlist_head	__rcu *state_byseq;
042bf7320e286f6 Steffen Klassert   2024-04-12 @46  	struct hlist_head	__rcu __percpu *state_cache_input;
529983ecabeae3d Alexey Dobriyan    2008-11-25  47  	unsigned int		state_hmask;
0bf7c5b019518d3 Alexey Dobriyan    2008-11-25  48  	unsigned int		state_num;
630827338585022 Alexey Dobriyan    2008-11-25  49  	struct work_struct	state_hash_work;
50a30657fd7ee77 Alexey Dobriyan    2008-11-25  50  
adfcf0b27e87d16 Alexey Dobriyan    2008-11-25  51  	struct list_head	policy_all;
93b851c1c93c7d5 Alexey Dobriyan    2008-11-25  52  	struct hlist_head	*policy_byidx;
8100bea7d619e84 Alexey Dobriyan    2008-11-25  53  	unsigned int		policy_idx_hmask;
3e4bc23926b83c3 Eric Dumazet       2023-09-08  54  	unsigned int		idx_generator;
53c2e285f970300 Herbert Xu         2014-11-13  55  	struct hlist_head	policy_inexact[XFRM_POLICY_MAX];
53c2e285f970300 Herbert Xu         2014-11-13  56  	struct xfrm_policy_hash	policy_bydst[XFRM_POLICY_MAX];
dc2caba7b321289 Alexey Dobriyan    2008-11-25  57  	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
66caf628c3b634c Alexey Dobriyan    2008-11-25  58  	struct work_struct	policy_hash_work;
880a6fab8f6ba5b Christophe Gouault 2014-08-29  59  	struct xfrm_policy_hthresh policy_hthresh;
24969facd704a5f Florian Westphal   2018-11-07  60  	struct list_head	inexact_bins;
a6483b790f8efcd Alexey Dobriyan    2008-11-25  61  
d7c7544c3d5f590 Alexey Dobriyan    2010-01-24  62  
a6483b790f8efcd Alexey Dobriyan    2008-11-25  63  	struct sock		*nlsk;
d79d792ef9f99cc Eric W. Biederman  2009-12-03  64  	struct sock		*nlsk_stash;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  65  
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  66  	u32			sysctl_aevent_etime;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  67  	u32			sysctl_aevent_rseqth;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  68  	int			sysctl_larval_drop;
b27aeadb5948d40 Alexey Dobriyan    2008-11-25  69  	u32			sysctl_acq_expires;
2d151d39073aff4 Steffen Klassert   2021-07-18  70  
b58b1f563ab7895 Nicolas Dichtel    2022-03-14  71  	u8			policy_default[XFRM_POLICY_MAX];
2d151d39073aff4 Steffen Klassert   2021-07-18  72
diff mbox series

Patch

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 423b52eca908..177516be776d 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -43,6 +43,7 @@  struct netns_xfrm {
 	struct hlist_head	__rcu *state_bysrc;
 	struct hlist_head	__rcu *state_byspi;
 	struct hlist_head	__rcu *state_byseq;
+	struct hlist_head	__rcu __percpu *state_cache_input;
 	unsigned int		state_hmask;
 	unsigned int		state_num;
 	struct work_struct	state_hash_work;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 49c85bcd9fd9..60c2b129e9e5 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -182,6 +182,7 @@  struct xfrm_state {
 	struct hlist_node	byspi;
 	struct hlist_node	byseq;
 	struct hlist_node	state_cache;
+	struct hlist_node	state_cache_input;
 
 	refcount_t		refcnt;
 	spinlock_t		lock;
@@ -1604,6 +1605,10 @@  int xfrm_state_update(struct xfrm_state *x);
 struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark,
 				     const xfrm_address_t *daddr, __be32 spi,
 				     u8 proto, unsigned short family);
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+					   const xfrm_address_t *daddr,
+					   __be32 spi, u8 proto,
+					   unsigned short family);
 struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 					    const xfrm_address_t *daddr,
 					    const xfrm_address_t *saddr,
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index b3271957ad9a..6ccb8d56ad2a 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -53,9 +53,9 @@  static struct sk_buff *esp4_gro_receive(struct list_head *head,
 		if (sp->len == XFRM_MAX_DEPTH)
 			goto out_reset;
 
-		x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
-				      (xfrm_address_t *)&ip_hdr(skb)->daddr,
-				      spi, IPPROTO_ESP, AF_INET);
+		x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+					    (xfrm_address_t *)&ip_hdr(skb)->daddr,
+					    spi, IPPROTO_ESP, AF_INET);
 		if (!x)
 			goto out_reset;
 
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 527b7caddbc6..c82ed369e888 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -80,9 +80,9 @@  static struct sk_buff *esp6_gro_receive(struct list_head *head,
 		if (sp->len == XFRM_MAX_DEPTH)
 			goto out_reset;
 
-		x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
-				      (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
-				      spi, IPPROTO_ESP, AF_INET6);
+		x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+					    (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+					    spi, IPPROTO_ESP, AF_INET6);
 		if (!x)
 			goto out_reset;
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 161f535c8b94..82dba4673296 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -563,7 +563,7 @@  int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 			goto drop;
 		}
 
-		x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
+		x = xfrm_input_state_lookup(net, mark, daddr, spi, nexthdr, family);
 		if (x == NULL) {
 			secpath_reset(skb);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index ff2b0fc0b206..86f8dde23ff1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -717,6 +717,9 @@  int __xfrm_state_delete(struct xfrm_state *x)
 			hlist_del_rcu(&x->byseq);
 		if (!hlist_unhashed(&x->state_cache))
 			hlist_del_rcu(&x->state_cache);
+		if (!hlist_unhashed(&x->state_cache_input))
+			hlist_del_rcu(&x->state_cache_input);
+
 		if (x->id.spi)
 			hlist_del_rcu(&x->byspi);
 		net->xfrm.state_num--;
@@ -1048,6 +1051,52 @@  static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 	return NULL;
 }
 
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+					   const xfrm_address_t *daddr,
+					   __be32 spi, u8 proto,
+					   unsigned short family)
+{
+	struct hlist_head *state_cache_input;
+	struct xfrm_state *x = NULL;
+	int cpu = get_cpu();
+
+	state_cache_input =  per_cpu_ptr(net->xfrm.state_cache_input, cpu);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) {
+		if (x->props.family != family ||
+		    x->id.spi       != spi ||
+		    x->id.proto     != proto ||
+		    !xfrm_addr_equal(&x->id.daddr, daddr, family))
+			continue;
+
+		if ((mark & x->mark.m) != x->mark.v)
+			continue;
+		if (!xfrm_state_hold_rcu(x))
+			continue;
+		goto out;
+	}
+
+	x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
+
+	if (x && x->km.state == XFRM_STATE_VALID) {
+		spin_lock_bh(&net->xfrm.xfrm_state_lock);
+		if (hlist_unhashed(&x->state_cache_input)) {
+			hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+		} else {
+			hlist_del_rcu(&x->state_cache_input);
+			hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+		}
+		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+	}
+
+out:
+	rcu_read_unlock();
+	put_cpu();
+	return x;
+}
+EXPORT_SYMBOL(xfrm_input_state_lookup);
+
 static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 						     const xfrm_address_t *daddr,
 						     const xfrm_address_t *saddr,
@@ -2987,6 +3036,11 @@  int __net_init xfrm_state_init(struct net *net)
 	net->xfrm.state_byseq = xfrm_hash_alloc(sz);
 	if (!net->xfrm.state_byseq)
 		goto out_byseq;
+
+	net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
+	if (!net->xfrm.state_cache_input)
+		goto out_state_cache_input;
+
 	net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
 
 	net->xfrm.state_num = 0;
@@ -2996,6 +3050,8 @@  int __net_init xfrm_state_init(struct net *net)
 			       &net->xfrm.xfrm_state_lock);
 	return 0;
 
+out_state_cache_input:
+	xfrm_hash_free(net->xfrm.state_byseq, sz);
 out_byseq:
 	xfrm_hash_free(net->xfrm.state_byspi, sz);
 out_byspi:
@@ -3025,6 +3081,7 @@  void xfrm_state_fini(struct net *net)
 	xfrm_hash_free(net->xfrm.state_bysrc, sz);
 	WARN_ON(!hlist_empty(net->xfrm.state_bydst));
 	xfrm_hash_free(net->xfrm.state_bydst, sz);
+	free_percpu(net->xfrm.state_cache_input);
 }
 
 #ifdef CONFIG_AUDITSYSCALL