diff mbox series

[ipsec-next,2/3] xfrm: Cache used outbound xfrm states at the policy.

Message ID 20240412060553.3483630-3-steffen.klassert@secunet.com (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series Add support for per cpu xfrm states. | expand

Checks

Context Check Description
netdev/series_format warning Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1163 this patch: 1163
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 4 maintainers not CCed: pabeni@redhat.com edumazet@google.com kuba@kernel.org herbert@gondor.apana.org.au
netdev/build_clang fail Errors and warnings before: 17 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 263 this patch: 303
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Steffen Klassert April 12, 2024, 6:05 a.m. UTC
Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we cache the used xfrm states at the policy for outbound
IPsec traffic.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     |  3 +++
 net/xfrm/xfrm_policy.c | 12 ++++++++++
 net/xfrm/xfrm_state.c  | 54 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+)

Comments

Daniel Xu April 16, 2024, 9:51 p.m. UTC | #1
Hi Steffen,

On Fri, Apr 12, 2024 at 08:05:52AM +0200, Steffen Klassert wrote:
> Now that we can have percpu xfrm states, the number of active
> states might increase. To get a better lookup performance,
> we cache the used xfrm states at the policy for outbound
> IPsec traffic.
> 
> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
> ---
>  include/net/xfrm.h     |  3 +++
>  net/xfrm/xfrm_policy.c | 12 ++++++++++
>  net/xfrm/xfrm_state.c  | 54 ++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 69 insertions(+)
> 
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 2ba4c077ccf9..49c85bcd9fd9 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -181,6 +181,7 @@ struct xfrm_state {
>  	struct hlist_node	bysrc;
>  	struct hlist_node	byspi;
>  	struct hlist_node	byseq;
> +	struct hlist_node	state_cache;
>  
>  	refcount_t		refcnt;
>  	spinlock_t		lock;
> @@ -524,6 +525,8 @@ struct xfrm_policy {
>  	struct hlist_node	bydst;
>  	struct hlist_node	byidx;
>  
> +	struct hlist_head	state_cache_list;
> +
>  	/* This lock only affects elements except for entry. */
>  	rwlock_t		lock;
>  	refcount_t		refcnt;
> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
> index 6affe5cd85d8..6a7f1d40d5f6 100644
> --- a/net/xfrm/xfrm_policy.c
> +++ b/net/xfrm/xfrm_policy.c
> @@ -410,6 +410,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
>  	if (policy) {
>  		write_pnet(&policy->xp_net, net);
>  		INIT_LIST_HEAD(&policy->walk.all);
> +		INIT_HLIST_HEAD(&policy->state_cache_list);
>  		INIT_HLIST_NODE(&policy->bydst_inexact_list);
>  		INIT_HLIST_NODE(&policy->bydst);
>  		INIT_HLIST_NODE(&policy->byidx);
> @@ -452,6 +453,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
>  
>  static void xfrm_policy_kill(struct xfrm_policy *policy)
>  {
> +	struct net *net = xp_net(policy);
> +	struct xfrm_state *x;
> +
>  	write_lock_bh(&policy->lock);
>  	policy->walk.dead = 1;
>  	write_unlock_bh(&policy->lock);
> @@ -465,6 +469,13 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
>  	if (del_timer(&policy->timer))
>  		xfrm_pol_put(policy);
>  
> +	/* XXX: Flush state cache */
> +	spin_lock_bh(&net->xfrm.xfrm_state_lock);
> +	hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
> +		hlist_del_init_rcu(&x->state_cache);
> +	}
> +	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
> +
>  	xfrm_pol_put(policy);
>  }
>  
> @@ -3253,6 +3264,7 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
>  		dst_release(dst);
>  		dst = dst_orig;
>  	}
> +
>  ok:
>  	xfrm_pols_put(pols, drop_pols);
>  	if (dst && dst->xfrm &&
> diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
> index b41b5dd72d8e..ff2b0fc0b206 100644
> --- a/net/xfrm/xfrm_state.c
> +++ b/net/xfrm/xfrm_state.c
> @@ -663,6 +663,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
>  		refcount_set(&x->refcnt, 1);
>  		atomic_set(&x->tunnel_users, 0);
>  		INIT_LIST_HEAD(&x->km.all);
> +		INIT_HLIST_NODE(&x->state_cache);
>  		INIT_HLIST_NODE(&x->bydst);
>  		INIT_HLIST_NODE(&x->bysrc);
>  		INIT_HLIST_NODE(&x->byspi);
> @@ -707,12 +708,15 @@ int __xfrm_state_delete(struct xfrm_state *x)
>  
>  	if (x->km.state != XFRM_STATE_DEAD) {
>  		x->km.state = XFRM_STATE_DEAD;
> +
>  		spin_lock(&net->xfrm.xfrm_state_lock);
>  		list_del(&x->km.all);
>  		hlist_del_rcu(&x->bydst);
>  		hlist_del_rcu(&x->bysrc);
>  		if (x->km.seq)
>  			hlist_del_rcu(&x->byseq);
> +		if (!hlist_unhashed(&x->state_cache))
> +			hlist_del_rcu(&x->state_cache);
>  		if (x->id.spi)
>  			hlist_del_rcu(&x->byspi);
>  		net->xfrm.state_num--;
> @@ -1160,6 +1164,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
>  	unsigned short encap_family = tmpl->encap_family;
>  	unsigned int sequence;
>  	struct km_event c;
> +	bool cached = false;
>  	unsigned int pcpu_id = get_cpu();
>  	put_cpu();
>  
> @@ -1168,6 +1173,45 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
>  	sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
>  
>  	rcu_read_lock();
> +	hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
> +		if (x->props.family == encap_family &&
> +		    x->props.reqid == tmpl->reqid &&
> +		    (mark & x->mark.m) == x->mark.v &&
> +		    x->if_id == if_id &&
> +		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
> +		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
> +		    tmpl->mode == x->props.mode &&
> +		    tmpl->id.proto == x->id.proto &&
> +		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
> +			xfrm_state_look_at(pol, x, fl, encap_family,
> +					   &best, &acquire_in_progress, &error);
> +	}
> +
> +	if (best)
> +		goto cached;
> +
> +	hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
> +		if (x->props.family == encap_family &&
> +		    x->props.reqid == tmpl->reqid &&
> +		    (mark & x->mark.m) == x->mark.v &&
> +		    x->if_id == if_id &&
> +		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
> +		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
> +		    tmpl->mode == x->props.mode &&
> +		    tmpl->id.proto == x->id.proto &&
> +		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
> +			xfrm_state_look_at(pol, x, fl, family,
> +					   &best, &acquire_in_progress, &error);
> +	}
> +
> +cached:
> +	if (best)

Need to set `cached = true` here otherwise slowpath will always be
taken.

[...]

Thanks,
Daniel
Steffen Klassert April 18, 2024, 10:01 a.m. UTC | #2
On Tue, Apr 16, 2024 at 03:51:55PM -0600, Daniel Xu wrote:
> > +
> > +	hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
> > +		if (x->props.family == encap_family &&
> > +		    x->props.reqid == tmpl->reqid &&
> > +		    (mark & x->mark.m) == x->mark.v &&
> > +		    x->if_id == if_id &&
> > +		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
> > +		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
> > +		    tmpl->mode == x->props.mode &&
> > +		    tmpl->id.proto == x->id.proto &&
> > +		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
> > +			xfrm_state_look_at(pol, x, fl, family,
> > +					   &best, &acquire_in_progress, &error);
> > +	}
> > +
> > +cached:
> > +	if (best)
> 
> Need to set `cached = true` here otherwise slowpath will always be
> taken.

Fixed, thanks Daniel!
diff mbox series

Patch

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2ba4c077ccf9..49c85bcd9fd9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -181,6 +181,7 @@  struct xfrm_state {
 	struct hlist_node	bysrc;
 	struct hlist_node	byspi;
 	struct hlist_node	byseq;
+	struct hlist_node	state_cache;
 
 	refcount_t		refcnt;
 	spinlock_t		lock;
@@ -524,6 +525,8 @@  struct xfrm_policy {
 	struct hlist_node	bydst;
 	struct hlist_node	byidx;
 
+	struct hlist_head	state_cache_list;
+
 	/* This lock only affects elements except for entry. */
 	rwlock_t		lock;
 	refcount_t		refcnt;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 6affe5cd85d8..6a7f1d40d5f6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -410,6 +410,7 @@  struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 	if (policy) {
 		write_pnet(&policy->xp_net, net);
 		INIT_LIST_HEAD(&policy->walk.all);
+		INIT_HLIST_HEAD(&policy->state_cache_list);
 		INIT_HLIST_NODE(&policy->bydst_inexact_list);
 		INIT_HLIST_NODE(&policy->bydst);
 		INIT_HLIST_NODE(&policy->byidx);
@@ -452,6 +453,9 @@  EXPORT_SYMBOL(xfrm_policy_destroy);
 
 static void xfrm_policy_kill(struct xfrm_policy *policy)
 {
+	struct net *net = xp_net(policy);
+	struct xfrm_state *x;
+
 	write_lock_bh(&policy->lock);
 	policy->walk.dead = 1;
 	write_unlock_bh(&policy->lock);
@@ -465,6 +469,13 @@  static void xfrm_policy_kill(struct xfrm_policy *policy)
 	if (del_timer(&policy->timer))
 		xfrm_pol_put(policy);
 
+	/* XXX: Flush state cache */
+	spin_lock_bh(&net->xfrm.xfrm_state_lock);
+	hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
+		hlist_del_init_rcu(&x->state_cache);
+	}
+	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
 	xfrm_pol_put(policy);
 }
 
@@ -3253,6 +3264,7 @@  struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
 		dst_release(dst);
 		dst = dst_orig;
 	}
+
 ok:
 	xfrm_pols_put(pols, drop_pols);
 	if (dst && dst->xfrm &&
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b41b5dd72d8e..ff2b0fc0b206 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -663,6 +663,7 @@  struct xfrm_state *xfrm_state_alloc(struct net *net)
 		refcount_set(&x->refcnt, 1);
 		atomic_set(&x->tunnel_users, 0);
 		INIT_LIST_HEAD(&x->km.all);
+		INIT_HLIST_NODE(&x->state_cache);
 		INIT_HLIST_NODE(&x->bydst);
 		INIT_HLIST_NODE(&x->bysrc);
 		INIT_HLIST_NODE(&x->byspi);
@@ -707,12 +708,15 @@  int __xfrm_state_delete(struct xfrm_state *x)
 
 	if (x->km.state != XFRM_STATE_DEAD) {
 		x->km.state = XFRM_STATE_DEAD;
+
 		spin_lock(&net->xfrm.xfrm_state_lock);
 		list_del(&x->km.all);
 		hlist_del_rcu(&x->bydst);
 		hlist_del_rcu(&x->bysrc);
 		if (x->km.seq)
 			hlist_del_rcu(&x->byseq);
+		if (!hlist_unhashed(&x->state_cache))
+			hlist_del_rcu(&x->state_cache);
 		if (x->id.spi)
 			hlist_del_rcu(&x->byspi);
 		net->xfrm.state_num--;
@@ -1160,6 +1164,7 @@  xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	unsigned short encap_family = tmpl->encap_family;
 	unsigned int sequence;
 	struct km_event c;
+	bool cached = false;
 	unsigned int pcpu_id = get_cpu();
 	put_cpu();
 
@@ -1168,6 +1173,45 @@  xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
 
 	rcu_read_lock();
+	hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+		if (x->props.family == encap_family &&
+		    x->props.reqid == tmpl->reqid &&
+		    (mark & x->mark.m) == x->mark.v &&
+		    x->if_id == if_id &&
+		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
+		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
+		    tmpl->mode == x->props.mode &&
+		    tmpl->id.proto == x->id.proto &&
+		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+			xfrm_state_look_at(pol, x, fl, encap_family,
+					   &best, &acquire_in_progress, &error);
+	}
+
+	if (best)
+		goto cached;
+
+	hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+		if (x->props.family == encap_family &&
+		    x->props.reqid == tmpl->reqid &&
+		    (mark & x->mark.m) == x->mark.v &&
+		    x->if_id == if_id &&
+		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
+		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
+		    tmpl->mode == x->props.mode &&
+		    tmpl->id.proto == x->id.proto &&
+		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+			xfrm_state_look_at(pol, x, fl, family,
+					   &best, &acquire_in_progress, &error);
+	}
+
+cached:
+	if (best)
+		goto found;
+	else if (error)
+		best = NULL;
+	else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
+		WARN_ON(1);
+
 	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
 #ifdef CONFIG_XFRM_OFFLOAD
@@ -1317,6 +1361,7 @@  xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 			XFRM_STATE_INSERT(bysrc, &x->bysrc,
 					  net->xfrm.state_bysrc + h,
 					  x->xso.type);
+			INIT_HLIST_NODE(&x->state_cache);
 			if (x->id.spi) {
 				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
 				XFRM_STATE_INSERT(byspi, &x->byspi,
@@ -1363,6 +1408,15 @@  xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	} else {
 		*err = acquire_in_progress ? -EAGAIN : error;
 	}
+
+	if (x && x->km.state == XFRM_STATE_VALID && !cached &&
+	    (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || x->pcpu_num == pcpu_id)) {
+		spin_lock_bh(&net->xfrm.xfrm_state_lock);
+		if (hlist_unhashed(&x->state_cache))
+			hlist_add_head_rcu(&x->state_cache, &pol->state_cache_list);
+		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+	}
+
 	rcu_read_unlock();
 	if (to_put)
 		xfrm_state_put(to_put);