diff mbox series

[ipsec-next,v1,6/8] iptfs: xfrm: Add mode_cbs module functionality

Message ID 20240219085735.1220113-7-chopps@chopps.org (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series Add IP-TFS mode to xfrm | expand

Checks

Context Check Description
netdev/series_format warning Series does not have a cover letter
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1177 this patch: 1177
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 4 maintainers not CCed: kuba@kernel.org pabeni@redhat.com herbert@gondor.apana.org.au edumazet@google.com
netdev/build_clang success Errors and warnings before: 960 this patch: 960
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1185 this patch: 1185
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Christian Hopps Feb. 19, 2024, 8:57 a.m. UTC
From: Christian Hopps <chopps@labn.net>

Add a set of callbacks xfrm_mode_cbs to xfrm_state. These callbacks
enable the addition of new xfrm modes, such as IP-TFS to be defined
in modules.

Signed-off-by: Christian Hopps <chopps@labn.net>
---
 include/net/xfrm.h     | 38 ++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_device.c |  3 ++-
 net/xfrm/xfrm_input.c  | 14 +++++++++++--
 net/xfrm/xfrm_output.c |  2 ++
 net/xfrm/xfrm_policy.c | 18 +++++++++-------
 net/xfrm/xfrm_state.c  | 47 ++++++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_user.c   | 10 +++++++++
 7 files changed, 122 insertions(+), 10 deletions(-)

Comments

Antony Antony Feb. 25, 2024, 12:17 p.m. UTC | #1
Hi Chris,

I was testing this version.
And I ran into issues when migrating states. IP-TFS values are set to 0.
I noticed 2-3 issues both in this patch and 8/8

On Mon, Feb 19, 2024 at 03:57:33AM -0500, Christian Hopps via Devel wrote:
> From: Christian Hopps <chopps@labn.net>
> 
> Add a set of callbacks xfrm_mode_cbs to xfrm_state. These callbacks
> enable the addition of new xfrm modes, such as IP-TFS to be defined
> in modules.
> 
> Signed-off-by: Christian Hopps <chopps@labn.net>
> ---
>  include/net/xfrm.h     | 38 ++++++++++++++++++++++++++++++++++
>  net/xfrm/xfrm_device.c |  3 ++-
>  net/xfrm/xfrm_input.c  | 14 +++++++++++--
>  net/xfrm/xfrm_output.c |  2 ++
>  net/xfrm/xfrm_policy.c | 18 +++++++++-------
>  net/xfrm/xfrm_state.c  | 47 ++++++++++++++++++++++++++++++++++++++++++
>  net/xfrm/xfrm_user.c   | 10 +++++++++
>  7 files changed, 122 insertions(+), 10 deletions(-)
> 
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index 1d107241b901..f1d5e99f0a47 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -204,6 +204,7 @@ struct xfrm_state {
>  		u16		family;
>  		xfrm_address_t	saddr;
>  		int		header_len;
> +		int		enc_hdr_len;
>  		int		trailer_len;
>  		u32		extra_flags;
>  		struct xfrm_mark	smark;
> @@ -289,6 +290,9 @@ struct xfrm_state {
>  	/* Private data of this transformer, format is opaque,
>  	 * interpreted by xfrm_type methods. */
>  	void			*data;
> +
> +	const struct xfrm_mode_cbs	*mode_cbs;
> +	void				*mode_data;
>  };
>  
>  static inline struct net *xs_net(struct xfrm_state *x)
> @@ -441,6 +445,40 @@ struct xfrm_type_offload {
>  int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
>  void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
>  
> +struct xfrm_mode_cbs {
> +	struct module	*owner;
> +	/* Add/delete state in the new xfrm_state in `x`. */
> +	int	(*create_state)(struct xfrm_state *x);
> +	void	(*delete_state)(struct xfrm_state *x);
> +
> +	/* Called while handling the user netlink options. */
> +	int	(*user_init)(struct net *net, struct xfrm_state *x,
> +			     struct nlattr **attrs,
> +			     struct netlink_ext_ack *extack);
> +	int	(*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb);
> +	int     (*clone)(struct xfrm_state *x, struct xfrm_state *orig);
> +
> +	u32	(*get_inner_mtu)(struct xfrm_state *x, int outer_mtu);
> +
> +	/* Called to handle received xfrm (egress) packets. */
> +	int	(*input)(struct xfrm_state *x, struct sk_buff *skb);
> +
> +	/* Placed in dst_output of the dst when an xfrm_state is bound. */
> +	int	(*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
> +
> +	/**
> +	 * Prepare the skb for output for the given mode. Returns:
> +	 *    Error value, if 0 then skb values should be as follows:
> +	 *    transport_header should point at ESP header
> +	 *    network_header should point at Outer IP header
> +	 *    mac_header should point at protocol/nexthdr of the outer IP
> +	 */
> +	int	(*prepare_output)(struct xfrm_state *x, struct sk_buff *skb);
> +};
> +
> +int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs);
> +void xfrm_unregister_mode_cbs(u8 mode);
> +
>  static inline int xfrm_af2proto(unsigned int family)
>  {
>  	switch(family) {
> diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
> index 3784534c9185..8b848540ea47 100644
> --- a/net/xfrm/xfrm_device.c
> +++ b/net/xfrm/xfrm_device.c
> @@ -42,7 +42,8 @@ static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb,
>  		skb->transport_header = skb->network_header + hsize;
>  
>  	skb_reset_mac_len(skb);
> -	pskb_pull(skb, skb->mac_len + x->props.header_len);
> +	pskb_pull(skb,
> +		  skb->mac_len + x->props.header_len - x->props.enc_hdr_len);
>  }
>  
>  static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb,
> diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
> index bd4ce21d76d7..824f7b7f90e0 100644
> --- a/net/xfrm/xfrm_input.c
> +++ b/net/xfrm/xfrm_input.c
> @@ -437,6 +437,9 @@ static int xfrm_inner_mode_input(struct xfrm_state *x,
>  		WARN_ON_ONCE(1);
>  		break;
>  	default:
> +		if (x->mode_cbs && x->mode_cbs->input)
> +			return x->mode_cbs->input(x, skb);
> +
>  		WARN_ON_ONCE(1);
>  		break;
>  	}
> @@ -479,6 +482,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
>  
>  		family = x->props.family;
>  
> +		/* An encap_type of -3 indicates reconstructed inner packet */
> +		if (encap_type == -3)
> +			goto resume_decapped;
> +
>  		/* An encap_type of -1 indicates async resumption. */
>  		if (encap_type == -1) {
>  			async = 1;
> @@ -660,11 +667,14 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
>  
>  		XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
>  
> -		if (xfrm_inner_mode_input(x, skb)) {
> +		err = xfrm_inner_mode_input(x, skb);
> +		if (err == -EINPROGRESS)
> +			return 0;
> +		else if (err) {
>  			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
>  			goto drop;
>  		}
> -
> +resume_decapped:
>  		if (x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL) {
>  			decaps = 1;
>  			break;
> diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
> index 662c83beb345..8f98e42d4252 100644
> --- a/net/xfrm/xfrm_output.c
> +++ b/net/xfrm/xfrm_output.c
> @@ -472,6 +472,8 @@ static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
>  		WARN_ON_ONCE(1);
>  		break;
>  	default:
> +		if (x->mode_cbs && x->mode_cbs->prepare_output)
> +			return x->mode_cbs->prepare_output(x, skb);
>  		WARN_ON_ONCE(1);
>  		break;
>  	}
> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
> index 53b7ce4a4db0..f3cd8483d427 100644
> --- a/net/xfrm/xfrm_policy.c
> +++ b/net/xfrm/xfrm_policy.c
> @@ -2713,13 +2713,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
>  
>  		dst1->input = dst_discard;
>  
> -		rcu_read_lock();
> -		afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
> -		if (likely(afinfo))
> -			dst1->output = afinfo->output;
> -		else
> -			dst1->output = dst_discard_out;
> -		rcu_read_unlock();
> +		if (xfrm[i]->mode_cbs && xfrm[i]->mode_cbs->output) {
> +			dst1->output = xfrm[i]->mode_cbs->output;
> +		} else {
> +			rcu_read_lock();
> +			afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
> +			if (likely(afinfo))
> +				dst1->output = afinfo->output;
> +			else
> +				dst1->output = dst_discard_out;
> +			rcu_read_unlock();
> +		}
>  
>  		xdst_prev = xdst;
>  
> diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
> index bda5327bf34d..2b58e35bea63 100644
> --- a/net/xfrm/xfrm_state.c
> +++ b/net/xfrm/xfrm_state.c
> @@ -513,6 +513,36 @@ static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
>  	return NULL;
>  }
>  
> +static struct xfrm_mode_cbs xfrm_mode_cbs_map[XFRM_MODE_MAX];
> +
> +int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs)
> +{
> +	if (mode >= XFRM_MODE_MAX)
> +		return -EINVAL;
> +
> +	xfrm_mode_cbs_map[mode] = *mode_cbs;
> +	return 0;
> +}
> +EXPORT_SYMBOL(xfrm_register_mode_cbs);
> +
> +void xfrm_unregister_mode_cbs(u8 mode)
> +{
> +	if (mode >= XFRM_MODE_MAX)
> +		return;
> +
> +	memset(&xfrm_mode_cbs_map[mode], 0, sizeof(xfrm_mode_cbs_map[mode]));
> +}
> +EXPORT_SYMBOL(xfrm_unregister_mode_cbs);
> +
> +static const struct xfrm_mode_cbs *xfrm_get_mode_cbs(u8 mode)
> +{
> +	if (mode >= XFRM_MODE_MAX)
> +		return NULL;
> +	if (mode == XFRM_MODE_IPTFS && !xfrm_mode_cbs_map[mode].create_state)
> +		request_module("xfrm-iptfs");
> +	return &xfrm_mode_cbs_map[mode];
> +}
> +
>  void xfrm_state_free(struct xfrm_state *x)
>  {
>  	kmem_cache_free(xfrm_state_cache, x);
> @@ -521,6 +551,8 @@ EXPORT_SYMBOL(xfrm_state_free);
>  
>  static void ___xfrm_state_destroy(struct xfrm_state *x)
>  {
> +	if (x->mode_cbs && x->mode_cbs->delete_state)
> +		x->mode_cbs->delete_state(x);
>  	hrtimer_cancel(&x->mtimer);
>  	del_timer_sync(&x->rtimer);
>  	kfree(x->aead);
> @@ -678,6 +710,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
>  		x->replay_maxage = 0;
>  		x->replay_maxdiff = 0;
>  		spin_lock_init(&x->lock);
> +		x->mode_data = NULL;
>  	}
>  	return x;
>  }
> @@ -1745,6 +1778,11 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
>  	x->new_mapping = 0;
>  	x->new_mapping_sport = 0;
>  
> +	if (x->mode_cbs && x->mode_cbs->clone) {

I notice x->mode_cbs,  it should check old state?

+ if (orig->mode_cbs && orig->mode_cbs->clone) {

> +		if (!x->mode_cbs->clone(x, orig))

iptfs_clone()  return 0 on success. So
"!". x->mode_cbs->clone -> iptfs_clone()
also use orig?
+		if (orig->mode_cbs->clone(x, orig))


> +			goto error;
> +	}
> +
>  	return x;
>  
>   error:
> @@ -2765,6 +2803,9 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
>  	case XFRM_MODE_TUNNEL:
>  		break;
>  	default:
> +		if (x->mode_cbs && x->mode_cbs->get_inner_mtu)
> +			return x->mode_cbs->get_inner_mtu(x, mtu);
> +
>  		WARN_ON_ONCE(1);
>  		break;
>  	}
> @@ -2850,6 +2891,12 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
>  			goto error;
>  	}
>  
> +	x->mode_cbs = xfrm_get_mode_cbs(x->props.mode);
> +	if (x->mode_cbs && x->mode_cbs->create_state) {
> +		err = x->mode_cbs->create_state(x);
> +		if (err)
> +			goto error;
> +	}
>  error:
>  	return err;
>  }
> diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
> index d4c88d29703e..92d11f2306e7 100644
> --- a/net/xfrm/xfrm_user.c
> +++ b/net/xfrm/xfrm_user.c
> @@ -779,6 +779,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
>  			goto error;
>  	}
>  
> +	if (x->mode_cbs && x->mode_cbs->user_init) {
> +		err = x->mode_cbs->user_init(net, x, attrs, extack);
> +		if (err)
> +			goto error;
> +	}
> +
>  	return x;
>  
>  error:
> @@ -1192,6 +1198,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
>  		if (ret)
>  			goto out;
>  	}
> +	if (x->mode_cbs && x->mode_cbs->copy_to_user)
> +		ret = x->mode_cbs->copy_to_user(x, skb);
> +	if (ret)
> +		goto out;
>  	if (x->mapping_maxage)
>  		ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage);
>  out:
> -- 
> 2.43.0
> 
> -- 
> Devel mailing list
> Devel@linux-ipsec.org
> https://linux-ipsec.org/mailman/listinfo/devel
Christian Hopps March 8, 2024, 10:21 p.m. UTC | #2
Antony Antony <antony@phenome.org> writes:

> Hi Chris,
>
> I was testing this version.
> And I ran into issues when migrating states. IP-TFS values are set to 0.
> I noticed 2-3 issues both in this patch and 8/8
>
> On Mon, Feb 19, 2024 at 03:57:33AM -0500, Christian Hopps via Devel wrote:
>> From: Christian Hopps <chopps@labn.net>
>>
>> Add a set of callbacks xfrm_mode_cbs to xfrm_state. These callbacks
>> enable the addition of new xfrm modes, such as IP-TFS to be defined
>> in modules.
>>
>> Signed-off-by: Christian Hopps <chopps@labn.net>
>> ---
>>  include/net/xfrm.h     | 38 ++++++++++++++++++++++++++++++++++
>>  net/xfrm/xfrm_device.c |  3 ++-
>>  net/xfrm/xfrm_input.c  | 14 +++++++++++--
>>  net/xfrm/xfrm_output.c |  2 ++
>>  net/xfrm/xfrm_policy.c | 18 +++++++++-------
>>  net/xfrm/xfrm_state.c  | 47 ++++++++++++++++++++++++++++++++++++++++++
>>  net/xfrm/xfrm_user.c   | 10 +++++++++
>>  7 files changed, 122 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
>> index 1d107241b901..f1d5e99f0a47 100644
>> --- a/include/net/xfrm.h
>> +++ b/include/net/xfrm.h
>> @@ -204,6 +204,7 @@ struct xfrm_state {
>>  		u16		family;
>>  		xfrm_address_t	saddr;
>>  		int		header_len;
>> +		int		enc_hdr_len;
>>  		int		trailer_len;
>>  		u32		extra_flags;
>>  		struct xfrm_mark	smark;
>> @@ -289,6 +290,9 @@ struct xfrm_state {
>>  	/* Private data of this transformer, format is opaque,
>>  	 * interpreted by xfrm_type methods. */
>>  	void			*data;
>> +
>> +	const struct xfrm_mode_cbs	*mode_cbs;
>> +	void				*mode_data;
>>  };
>>
>>  static inline struct net *xs_net(struct xfrm_state *x)
>> @@ -441,6 +445,40 @@ struct xfrm_type_offload {
>>  int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
>>  void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
>>
>> +struct xfrm_mode_cbs {
>> +	struct module	*owner;
>> +	/* Add/delete state in the new xfrm_state in `x`. */
>> +	int	(*create_state)(struct xfrm_state *x);
>> +	void	(*delete_state)(struct xfrm_state *x);
>> +
>> +	/* Called while handling the user netlink options. */
>> +	int	(*user_init)(struct net *net, struct xfrm_state *x,
>> +			     struct nlattr **attrs,
>> +			     struct netlink_ext_ack *extack);
>> +	int	(*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb);
>> +	int     (*clone)(struct xfrm_state *x, struct xfrm_state *orig);
>> +
>> +	u32	(*get_inner_mtu)(struct xfrm_state *x, int outer_mtu);
>> +
>> +	/* Called to handle received xfrm (egress) packets. */
>> +	int	(*input)(struct xfrm_state *x, struct sk_buff *skb);
>> +
>> +	/* Placed in dst_output of the dst when an xfrm_state is bound. */
>> +	int	(*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
>> +
>> +	/**
>> +	 * Prepare the skb for output for the given mode. Returns:
>> +	 *    Error value, if 0 then skb values should be as follows:
>> +	 *    transport_header should point at ESP header
>> +	 *    network_header should point at Outer IP header
>> +	 *    mac_header should point at protocol/nexthdr of the outer IP
>> +	 */
>> +	int	(*prepare_output)(struct xfrm_state *x, struct sk_buff *skb);
>> +};
>> +
>> +int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs);
>> +void xfrm_unregister_mode_cbs(u8 mode);
>> +
>>  static inline int xfrm_af2proto(unsigned int family)
>>  {
>>  	switch(family) {
>> diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
>> index 3784534c9185..8b848540ea47 100644
>> --- a/net/xfrm/xfrm_device.c
>> +++ b/net/xfrm/xfrm_device.c
>> @@ -42,7 +42,8 @@ static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb,
>>  		skb->transport_header = skb->network_header + hsize;
>>
>>  	skb_reset_mac_len(skb);
>> -	pskb_pull(skb, skb->mac_len + x->props.header_len);
>> +	pskb_pull(skb,
>> +		  skb->mac_len + x->props.header_len - x->props.enc_hdr_len);
>>  }
>>
>>  static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb,
>> diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
>> index bd4ce21d76d7..824f7b7f90e0 100644
>> --- a/net/xfrm/xfrm_input.c
>> +++ b/net/xfrm/xfrm_input.c
>> @@ -437,6 +437,9 @@ static int xfrm_inner_mode_input(struct xfrm_state *x,
>>  		WARN_ON_ONCE(1);
>>  		break;
>>  	default:
>> +		if (x->mode_cbs && x->mode_cbs->input)
>> +			return x->mode_cbs->input(x, skb);
>> +
>>  		WARN_ON_ONCE(1);
>>  		break;
>>  	}
>> @@ -479,6 +482,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
>>
>>  		family = x->props.family;
>>
>> +		/* An encap_type of -3 indicates reconstructed inner packet */
>> +		if (encap_type == -3)
>> +			goto resume_decapped;
>> +
>>  		/* An encap_type of -1 indicates async resumption. */
>>  		if (encap_type == -1) {
>>  			async = 1;
>> @@ -660,11 +667,14 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
>>
>>  		XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
>>
>> -		if (xfrm_inner_mode_input(x, skb)) {
>> +		err = xfrm_inner_mode_input(x, skb);
>> +		if (err == -EINPROGRESS)
>> +			return 0;
>> +		else if (err) {
>>  			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
>>  			goto drop;
>>  		}
>> -
>> +resume_decapped:
>>  		if (x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL) {
>>  			decaps = 1;
>>  			break;
>> diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
>> index 662c83beb345..8f98e42d4252 100644
>> --- a/net/xfrm/xfrm_output.c
>> +++ b/net/xfrm/xfrm_output.c
>> @@ -472,6 +472,8 @@ static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
>>  		WARN_ON_ONCE(1);
>>  		break;
>>  	default:
>> +		if (x->mode_cbs && x->mode_cbs->prepare_output)
>> +			return x->mode_cbs->prepare_output(x, skb);
>>  		WARN_ON_ONCE(1);
>>  		break;
>>  	}
>> diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
>> index 53b7ce4a4db0..f3cd8483d427 100644
>> --- a/net/xfrm/xfrm_policy.c
>> +++ b/net/xfrm/xfrm_policy.c
>> @@ -2713,13 +2713,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
>>
>>  		dst1->input = dst_discard;
>>
>> -		rcu_read_lock();
>> -		afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
>> -		if (likely(afinfo))
>> -			dst1->output = afinfo->output;
>> -		else
>> -			dst1->output = dst_discard_out;
>> -		rcu_read_unlock();
>> +		if (xfrm[i]->mode_cbs && xfrm[i]->mode_cbs->output) {
>> +			dst1->output = xfrm[i]->mode_cbs->output;
>> +		} else {
>> +			rcu_read_lock();
>> +			afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
>> +			if (likely(afinfo))
>> +				dst1->output = afinfo->output;
>> +			else
>> +				dst1->output = dst_discard_out;
>> +			rcu_read_unlock();
>> +		}
>>
>>  		xdst_prev = xdst;
>>
>> diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
>> index bda5327bf34d..2b58e35bea63 100644
>> --- a/net/xfrm/xfrm_state.c
>> +++ b/net/xfrm/xfrm_state.c
>> @@ -513,6 +513,36 @@ static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
>>  	return NULL;
>>  }
>>
>> +static struct xfrm_mode_cbs xfrm_mode_cbs_map[XFRM_MODE_MAX];
>> +
>> +int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs)
>> +{
>> +	if (mode >= XFRM_MODE_MAX)
>> +		return -EINVAL;
>> +
>> +	xfrm_mode_cbs_map[mode] = *mode_cbs;
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL(xfrm_register_mode_cbs);
>> +
>> +void xfrm_unregister_mode_cbs(u8 mode)
>> +{
>> +	if (mode >= XFRM_MODE_MAX)
>> +		return;
>> +
>> +	memset(&xfrm_mode_cbs_map[mode], 0, sizeof(xfrm_mode_cbs_map[mode]));
>> +}
>> +EXPORT_SYMBOL(xfrm_unregister_mode_cbs);
>> +
>> +static const struct xfrm_mode_cbs *xfrm_get_mode_cbs(u8 mode)
>> +{
>> +	if (mode >= XFRM_MODE_MAX)
>> +		return NULL;
>> +	if (mode == XFRM_MODE_IPTFS && !xfrm_mode_cbs_map[mode].create_state)
>> +		request_module("xfrm-iptfs");
>> +	return &xfrm_mode_cbs_map[mode];
>> +}
>> +
>>  void xfrm_state_free(struct xfrm_state *x)
>>  {
>>  	kmem_cache_free(xfrm_state_cache, x);
>> @@ -521,6 +551,8 @@ EXPORT_SYMBOL(xfrm_state_free);
>>
>>  static void ___xfrm_state_destroy(struct xfrm_state *x)
>>  {
>> +	if (x->mode_cbs && x->mode_cbs->delete_state)
>> +		x->mode_cbs->delete_state(x);
>>  	hrtimer_cancel(&x->mtimer);
>>  	del_timer_sync(&x->rtimer);
>>  	kfree(x->aead);
>> @@ -678,6 +710,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
>>  		x->replay_maxage = 0;
>>  		x->replay_maxdiff = 0;
>>  		spin_lock_init(&x->lock);
>> +		x->mode_data = NULL;
>>  	}
>>  	return x;
>>  }
>> @@ -1745,6 +1778,11 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
>>  	x->new_mapping = 0;
>>  	x->new_mapping_sport = 0;
>>
>> +	if (x->mode_cbs && x->mode_cbs->clone) {
>
> I notice x->mode_cbs,  it should check old state?
>
> + if (orig->mode_cbs && orig->mode_cbs->clone) {

Actually we need:

+	x->mode_cbs = orig->mode_cbs;
	if (x->mode_cbs && x->mode_cbs->clone) {

>> +		if (!x->mode_cbs->clone(x, orig))
>
> iptfs_clone()  return 0 on success. So
> "!". x->mode_cbs->clone -> iptfs_clone()
> also use orig?
> +		if (orig->mode_cbs->clone(x, orig))

Thanks for catching these!
Chris.
diff mbox series

Patch

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1d107241b901..f1d5e99f0a47 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -204,6 +204,7 @@  struct xfrm_state {
 		u16		family;
 		xfrm_address_t	saddr;
 		int		header_len;
+		int		enc_hdr_len;
 		int		trailer_len;
 		u32		extra_flags;
 		struct xfrm_mark	smark;
@@ -289,6 +290,9 @@  struct xfrm_state {
 	/* Private data of this transformer, format is opaque,
 	 * interpreted by xfrm_type methods. */
 	void			*data;
+
+	const struct xfrm_mode_cbs	*mode_cbs;
+	void				*mode_data;
 };
 
 static inline struct net *xs_net(struct xfrm_state *x)
@@ -441,6 +445,40 @@  struct xfrm_type_offload {
 int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
 void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
 
+struct xfrm_mode_cbs {
+	struct module	*owner;
+	/* Add/delete state in the new xfrm_state in `x`. */
+	int	(*create_state)(struct xfrm_state *x);
+	void	(*delete_state)(struct xfrm_state *x);
+
+	/* Called while handling the user netlink options. */
+	int	(*user_init)(struct net *net, struct xfrm_state *x,
+			     struct nlattr **attrs,
+			     struct netlink_ext_ack *extack);
+	int	(*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb);
+	int     (*clone)(struct xfrm_state *x, struct xfrm_state *orig);
+
+	u32	(*get_inner_mtu)(struct xfrm_state *x, int outer_mtu);
+
+	/* Called to handle received xfrm (egress) packets. */
+	int	(*input)(struct xfrm_state *x, struct sk_buff *skb);
+
+	/* Placed in dst_output of the dst when an xfrm_state is bound. */
+	int	(*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
+
+	/**
+	 * Prepare the skb for output for the given mode. Returns:
+	 *    Error value, if 0 then skb values should be as follows:
+	 *    transport_header should point at ESP header
+	 *    network_header should point at Outer IP header
+	 *    mac_header should point at protocol/nexthdr of the outer IP
+	 */
+	int	(*prepare_output)(struct xfrm_state *x, struct sk_buff *skb);
+};
+
+int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs);
+void xfrm_unregister_mode_cbs(u8 mode);
+
 static inline int xfrm_af2proto(unsigned int family)
 {
 	switch(family) {
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 3784534c9185..8b848540ea47 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -42,7 +42,8 @@  static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb,
 		skb->transport_header = skb->network_header + hsize;
 
 	skb_reset_mac_len(skb);
-	pskb_pull(skb, skb->mac_len + x->props.header_len);
+	pskb_pull(skb,
+		  skb->mac_len + x->props.header_len - x->props.enc_hdr_len);
 }
 
 static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb,
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index bd4ce21d76d7..824f7b7f90e0 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -437,6 +437,9 @@  static int xfrm_inner_mode_input(struct xfrm_state *x,
 		WARN_ON_ONCE(1);
 		break;
 	default:
+		if (x->mode_cbs && x->mode_cbs->input)
+			return x->mode_cbs->input(x, skb);
+
 		WARN_ON_ONCE(1);
 		break;
 	}
@@ -479,6 +482,10 @@  int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 		family = x->props.family;
 
+		/* An encap_type of -3 indicates reconstructed inner packet */
+		if (encap_type == -3)
+			goto resume_decapped;
+
 		/* An encap_type of -1 indicates async resumption. */
 		if (encap_type == -1) {
 			async = 1;
@@ -660,11 +667,14 @@  int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
 		XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
 
-		if (xfrm_inner_mode_input(x, skb)) {
+		err = xfrm_inner_mode_input(x, skb);
+		if (err == -EINPROGRESS)
+			return 0;
+		else if (err) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
 			goto drop;
 		}
-
+resume_decapped:
 		if (x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL) {
 			decaps = 1;
 			break;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 662c83beb345..8f98e42d4252 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -472,6 +472,8 @@  static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
 		WARN_ON_ONCE(1);
 		break;
 	default:
+		if (x->mode_cbs && x->mode_cbs->prepare_output)
+			return x->mode_cbs->prepare_output(x, skb);
 		WARN_ON_ONCE(1);
 		break;
 	}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 53b7ce4a4db0..f3cd8483d427 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2713,13 +2713,17 @@  static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 
 		dst1->input = dst_discard;
 
-		rcu_read_lock();
-		afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
-		if (likely(afinfo))
-			dst1->output = afinfo->output;
-		else
-			dst1->output = dst_discard_out;
-		rcu_read_unlock();
+		if (xfrm[i]->mode_cbs && xfrm[i]->mode_cbs->output) {
+			dst1->output = xfrm[i]->mode_cbs->output;
+		} else {
+			rcu_read_lock();
+			afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
+			if (likely(afinfo))
+				dst1->output = afinfo->output;
+			else
+				dst1->output = dst_discard_out;
+			rcu_read_unlock();
+		}
 
 		xdst_prev = xdst;
 
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index bda5327bf34d..2b58e35bea63 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -513,6 +513,36 @@  static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
 	return NULL;
 }
 
+static struct xfrm_mode_cbs xfrm_mode_cbs_map[XFRM_MODE_MAX];
+
+int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs)
+{
+	if (mode >= XFRM_MODE_MAX)
+		return -EINVAL;
+
+	xfrm_mode_cbs_map[mode] = *mode_cbs;
+	return 0;
+}
+EXPORT_SYMBOL(xfrm_register_mode_cbs);
+
+void xfrm_unregister_mode_cbs(u8 mode)
+{
+	if (mode >= XFRM_MODE_MAX)
+		return;
+
+	memset(&xfrm_mode_cbs_map[mode], 0, sizeof(xfrm_mode_cbs_map[mode]));
+}
+EXPORT_SYMBOL(xfrm_unregister_mode_cbs);
+
+static const struct xfrm_mode_cbs *xfrm_get_mode_cbs(u8 mode)
+{
+	if (mode >= XFRM_MODE_MAX)
+		return NULL;
+	if (mode == XFRM_MODE_IPTFS && !xfrm_mode_cbs_map[mode].create_state)
+		request_module("xfrm-iptfs");
+	return &xfrm_mode_cbs_map[mode];
+}
+
 void xfrm_state_free(struct xfrm_state *x)
 {
 	kmem_cache_free(xfrm_state_cache, x);
@@ -521,6 +551,8 @@  EXPORT_SYMBOL(xfrm_state_free);
 
 static void ___xfrm_state_destroy(struct xfrm_state *x)
 {
+	if (x->mode_cbs && x->mode_cbs->delete_state)
+		x->mode_cbs->delete_state(x);
 	hrtimer_cancel(&x->mtimer);
 	del_timer_sync(&x->rtimer);
 	kfree(x->aead);
@@ -678,6 +710,7 @@  struct xfrm_state *xfrm_state_alloc(struct net *net)
 		x->replay_maxage = 0;
 		x->replay_maxdiff = 0;
 		spin_lock_init(&x->lock);
+		x->mode_data = NULL;
 	}
 	return x;
 }
@@ -1745,6 +1778,11 @@  static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 	x->new_mapping = 0;
 	x->new_mapping_sport = 0;
 
+	if (x->mode_cbs && x->mode_cbs->clone) {
+		if (!x->mode_cbs->clone(x, orig))
+			goto error;
+	}
+
 	return x;
 
  error:
@@ -2765,6 +2803,9 @@  u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
 	case XFRM_MODE_TUNNEL:
 		break;
 	default:
+		if (x->mode_cbs && x->mode_cbs->get_inner_mtu)
+			return x->mode_cbs->get_inner_mtu(x, mtu);
+
 		WARN_ON_ONCE(1);
 		break;
 	}
@@ -2850,6 +2891,12 @@  int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
 			goto error;
 	}
 
+	x->mode_cbs = xfrm_get_mode_cbs(x->props.mode);
+	if (x->mode_cbs && x->mode_cbs->create_state) {
+		err = x->mode_cbs->create_state(x);
+		if (err)
+			goto error;
+	}
 error:
 	return err;
 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d4c88d29703e..92d11f2306e7 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -779,6 +779,12 @@  static struct xfrm_state *xfrm_state_construct(struct net *net,
 			goto error;
 	}
 
+	if (x->mode_cbs && x->mode_cbs->user_init) {
+		err = x->mode_cbs->user_init(net, x, attrs, extack);
+		if (err)
+			goto error;
+	}
+
 	return x;
 
 error:
@@ -1192,6 +1198,10 @@  static int copy_to_user_state_extra(struct xfrm_state *x,
 		if (ret)
 			goto out;
 	}
+	if (x->mode_cbs && x->mode_cbs->copy_to_user)
+		ret = x->mode_cbs->copy_to_user(x, skb);
+	if (ret)
+		goto out;
 	if (x->mapping_maxage)
 		ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage);
 out: