diff mbox series

[xfrm-next,v9,4/8] xfrm: add TX datapath support for IPsec packet offload mode

Message ID 5bb21e69cff4e720c4f057238902299a3bd15a04.1669547603.git.leonro@nvidia.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Extend XFRM core to allow packet offload configuration | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 104 this patch: 104
netdev/cc_maintainers warning 1 maintainers not CCed: pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 5 this patch: 5
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 104 this patch: 104
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 207 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Leon Romanovsky Nov. 27, 2022, 11:18 a.m. UTC
From: Leon Romanovsky <leonro@nvidia.com>

In IPsec packet mode, the device is going to encrypt and encapsulate
packets that are associated with offloaded policy. After successful
policy lookup to indicate if packets should be offloaded or not,
the stack forwards packets to the device to do the magic.

Signed-off-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Huy Nguyen <huyn@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 net/xfrm/xfrm_device.c |  15 +++++-
 net/xfrm/xfrm_output.c |  12 ++++-
 net/xfrm/xfrm_state.c  | 120 +++++++++++++++++++++++++++++++++++++++--
 3 files changed, 141 insertions(+), 6 deletions(-)

Comments

Steffen Klassert Dec. 2, 2022, 9:30 a.m. UTC | #1
On Sun, Nov 27, 2022 at 01:18:14PM +0200, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@nvidia.com>
> 
> In IPsec packet mode, the device is going to encrypt and encapsulate
> packets that are associated with offloaded policy. After successful
> policy lookup to indicate if packets should be offloaded or not,
> the stack forwards packets to the device to do the magic.
> 
> Signed-off-by: Raed Salem <raeds@nvidia.com>
> Signed-off-by: Huy Nguyen <huyn@nvidia.com>
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
>  net/xfrm/xfrm_device.c |  15 +++++-
>  net/xfrm/xfrm_output.c |  12 ++++-
>  net/xfrm/xfrm_state.c  | 120 +++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 141 insertions(+), 6 deletions(-)
...
> @@ -1161,7 +1240,31 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
>  			x = NULL;
>  			goto out;
>  		}
> -
> +#ifdef CONFIG_XFRM_OFFLOAD
> +		if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
> +			struct xfrm_dev_offload *xdo = &pol->xdo;
> +			struct xfrm_dev_offload *xso = &x->xso;
> +
> +			xso->type = XFRM_DEV_OFFLOAD_PACKET;
> +			xso->dir = xdo->dir;
> +			xso->dev = xdo->dev;
> +			xso->real_dev = xdo->real_dev;
> +			netdev_tracker_alloc(xso->dev, &xso->dev_tracker,
> +					     GFP_ATOMIC);
> +			error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x);
> +			if (error) {
> +				xso->dir = 0;
> +				netdev_put(xso->dev, &xso->dev_tracker);
> +				xso->dev = NULL;
> +				xso->real_dev = NULL;
> +				xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
> +				x->km.state = XFRM_STATE_DEAD;
> +				to_put = x;
> +				x = NULL;
> +				goto out;
> +			}
> +		}
> +#endif
>  		if (km_query(x, tmpl, pol) == 0) {
>  			spin_lock_bh(&net->xfrm.xfrm_state_lock);
>  			x->km.state = XFRM_STATE_ACQ;
> @@ -1185,6 +1288,17 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
>  			xfrm_hash_grow_check(net, x->bydst.next != NULL);
>  			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
>  		} else {
> +#ifdef CONFIG_XFRM_OFFLOAD
> +			struct xfrm_dev_offload *xso = &x->xso;
> +
> +			if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
> +				xso->dir = 0;
> +				netdev_put(xso->dev, &xso->dev_tracker);
> +				xso->dev = NULL;
> +				xso->real_dev = NULL;
> +				xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
> +			}

You do a xdo_dev_state_add call to add an acquire state to HW above.
Maybe we should do a xdo_dev_state_del call here when deleting the
acquire state.

> +#endif
>  			x->km.state = XFRM_STATE_DEAD;
>  			to_put = x;
>  			x = NULL;
> -- 
> 2.38.1
Leon Romanovsky Dec. 2, 2022, 5:59 p.m. UTC | #2
On Fri, Dec 02, 2022 at 10:30:28AM +0100, Steffen Klassert wrote:
> On Sun, Nov 27, 2022 at 01:18:14PM +0200, Leon Romanovsky wrote:
> > From: Leon Romanovsky <leonro@nvidia.com>
> > 
> > In IPsec packet mode, the device is going to encrypt and encapsulate
> > packets that are associated with offloaded policy. After successful
> > policy lookup to indicate if packets should be offloaded or not,
> > the stack forwards packets to the device to do the magic.
> > 
> > Signed-off-by: Raed Salem <raeds@nvidia.com>
> > Signed-off-by: Huy Nguyen <huyn@nvidia.com>
> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> > ---
> >  net/xfrm/xfrm_device.c |  15 +++++-
> >  net/xfrm/xfrm_output.c |  12 ++++-
> >  net/xfrm/xfrm_state.c  | 120 +++++++++++++++++++++++++++++++++++++++--
> >  3 files changed, 141 insertions(+), 6 deletions(-)
> ...
> > @@ -1161,7 +1240,31 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
> >  			x = NULL;
> >  			goto out;
> >  		}
> > -
> > +#ifdef CONFIG_XFRM_OFFLOAD
> > +		if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
> > +			struct xfrm_dev_offload *xdo = &pol->xdo;
> > +			struct xfrm_dev_offload *xso = &x->xso;
> > +
> > +			xso->type = XFRM_DEV_OFFLOAD_PACKET;
> > +			xso->dir = xdo->dir;
> > +			xso->dev = xdo->dev;
> > +			xso->real_dev = xdo->real_dev;
> > +			netdev_tracker_alloc(xso->dev, &xso->dev_tracker,
> > +					     GFP_ATOMIC);
> > +			error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x);
> > +			if (error) {
> > +				xso->dir = 0;
> > +				netdev_put(xso->dev, &xso->dev_tracker);
> > +				xso->dev = NULL;
> > +				xso->real_dev = NULL;
> > +				xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
> > +				x->km.state = XFRM_STATE_DEAD;
> > +				to_put = x;
> > +				x = NULL;
> > +				goto out;
> > +			}
> > +		}
> > +#endif
> >  		if (km_query(x, tmpl, pol) == 0) {
> >  			spin_lock_bh(&net->xfrm.xfrm_state_lock);
> >  			x->km.state = XFRM_STATE_ACQ;
> > @@ -1185,6 +1288,17 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
> >  			xfrm_hash_grow_check(net, x->bydst.next != NULL);
> >  			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
> >  		} else {
> > +#ifdef CONFIG_XFRM_OFFLOAD
> > +			struct xfrm_dev_offload *xso = &x->xso;
> > +
> > +			if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
> > +				xso->dir = 0;
> > +				netdev_put(xso->dev, &xso->dev_tracker);
> > +				xso->dev = NULL;
> > +				xso->real_dev = NULL;
> > +				xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
> > +			}
> 
> You do a xdo_dev_state_add call to add an acquire state to HW above.
> Maybe we should do a xdo_dev_state_del call here when deleting the
> acquire state.

Absolutely. Thanks

> 
> > +#endif
> >  			x->km.state = XFRM_STATE_DEAD;
> >  			to_put = x;
> >  			x = NULL;
> > -- 
> > 2.38.1
diff mbox series

Patch

diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 8e18abc5016f..6affb3d1e204 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -120,6 +120,16 @@  struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
 	if (xo->flags & XFRM_GRO || x->xso.dir == XFRM_DEV_OFFLOAD_IN)
 		return skb;
 
+	/* The packet was sent to HW IPsec packet offload engine,
+	 * but to wrong device. Drop the packet, so it won't skip
+	 * XFRM stack.
+	 */
+	if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && x->xso.dev != dev) {
+		kfree_skb(skb);
+		dev_core_stats_tx_dropped_inc(dev);
+		return NULL;
+	}
+
 	/* This skb was already validated on the upper/virtual dev */
 	if ((x->xso.dev != dev) && (x->xso.real_dev == dev))
 		return skb;
@@ -385,8 +395,9 @@  bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 	if (!x->type_offload || x->encap)
 		return false;
 
-	if ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
-	    (!xdst->child->xfrm)) {
+	if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET ||
+	    ((!dev || (dev == xfrm_dst_path(dst)->dev)) &&
+	     !xdst->child->xfrm)) {
 		mtu = xfrm_state_mtu(x, xdst->child_mtu_cached);
 		if (skb->len <= mtu)
 			goto ok;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 9a5e79a38c67..ce9e360a96e2 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -494,7 +494,7 @@  static int xfrm_output_one(struct sk_buff *skb, int err)
 	struct xfrm_state *x = dst->xfrm;
 	struct net *net = xs_net(x);
 
-	if (err <= 0)
+	if (err <= 0 || x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
 		goto resume;
 
 	do {
@@ -718,6 +718,16 @@  int xfrm_output(struct sock *sk, struct sk_buff *skb)
 		break;
 	}
 
+	if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) {
+		if (!xfrm_dev_offload_ok(skb, x)) {
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
+			kfree_skb(skb);
+			return -EHOSTUNREACH;
+		}
+
+		return xfrm_output_resume(sk, skb, 0);
+	}
+
 	secpath_reset(skb);
 
 	if (xfrm_dev_offload_ok(skb, x)) {
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3d2fe7712ac5..22940bc0d4e4 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -951,6 +951,49 @@  xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
 	x->props.family = tmpl->encap_family;
 }
 
+static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
+						  const xfrm_address_t *daddr,
+						  __be32 spi, u8 proto,
+						  unsigned short family,
+						  struct xfrm_dev_offload *xdo)
+{
+	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
+	struct xfrm_state *x;
+
+	hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
+#ifdef CONFIG_XFRM_OFFLOAD
+		if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) {
+			if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+				/* HW states are in the head of list, there is
+				 * no need to iterate further.
+				 */
+				break;
+
+			/* Packet offload: both policy and SA should
+			 * have same device.
+			 */
+			if (xdo->dev != x->xso.dev)
+				continue;
+		} else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
+			/* Skip HW policy for SW lookups */
+			continue;
+#endif
+		if (x->props.family != family ||
+		    x->id.spi       != spi ||
+		    x->id.proto     != proto ||
+		    !xfrm_addr_equal(&x->id.daddr, daddr, family))
+			continue;
+
+		if ((mark & x->mark.m) != x->mark.v)
+			continue;
+		if (!xfrm_state_hold_rcu(x))
+			continue;
+		return x;
+	}
+
+	return NULL;
+}
+
 static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 					      const xfrm_address_t *daddr,
 					      __be32 spi, u8 proto,
@@ -1092,6 +1135,23 @@  xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	rcu_read_lock();
 	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
+#ifdef CONFIG_XFRM_OFFLOAD
+		if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
+			if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+				/* HW states are in the head of list, there is
+				 * no need to iterate further.
+				 */
+				break;
+
+			/* Packet offload: both policy and SA should
+			 * have same device.
+			 */
+			if (pol->xdo.dev != x->xso.dev)
+				continue;
+		} else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
+			/* Skip HW policy for SW lookups */
+			continue;
+#endif
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -1109,6 +1169,23 @@  xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 
 	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
 	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
+#ifdef CONFIG_XFRM_OFFLOAD
+		if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
+			if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+				/* HW states are in the head of list, there is
+				 * no need to iterate further.
+				 */
+				break;
+
+			/* Packet offload: both policy and SA should
+			 * have same device.
+			 */
+			if (pol->xdo.dev != x->xso.dev)
+				continue;
+		} else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
+			/* Skip HW policy for SW lookups */
+			continue;
+#endif
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -1126,8 +1203,10 @@  xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 	x = best;
 	if (!x && !error && !acquire_in_progress) {
 		if (tmpl->id.spi &&
-		    (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
-					      tmpl->id.proto, encap_family)) != NULL) {
+		    (x0 = __xfrm_state_lookup_all(net, mark, daddr,
+						  tmpl->id.spi, tmpl->id.proto,
+						  encap_family,
+						  &pol->xdo)) != NULL) {
 			to_put = x0;
 			error = -EEXIST;
 			goto out;
@@ -1161,7 +1240,31 @@  xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 			x = NULL;
 			goto out;
 		}
-
+#ifdef CONFIG_XFRM_OFFLOAD
+		if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
+			struct xfrm_dev_offload *xdo = &pol->xdo;
+			struct xfrm_dev_offload *xso = &x->xso;
+
+			xso->type = XFRM_DEV_OFFLOAD_PACKET;
+			xso->dir = xdo->dir;
+			xso->dev = xdo->dev;
+			xso->real_dev = xdo->real_dev;
+			netdev_tracker_alloc(xso->dev, &xso->dev_tracker,
+					     GFP_ATOMIC);
+			error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x);
+			if (error) {
+				xso->dir = 0;
+				netdev_put(xso->dev, &xso->dev_tracker);
+				xso->dev = NULL;
+				xso->real_dev = NULL;
+				xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+				x->km.state = XFRM_STATE_DEAD;
+				to_put = x;
+				x = NULL;
+				goto out;
+			}
+		}
+#endif
 		if (km_query(x, tmpl, pol) == 0) {
 			spin_lock_bh(&net->xfrm.xfrm_state_lock);
 			x->km.state = XFRM_STATE_ACQ;
@@ -1185,6 +1288,17 @@  xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 			xfrm_hash_grow_check(net, x->bydst.next != NULL);
 			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 		} else {
+#ifdef CONFIG_XFRM_OFFLOAD
+			struct xfrm_dev_offload *xso = &x->xso;
+
+			if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
+				xso->dir = 0;
+				netdev_put(xso->dev, &xso->dev_tracker);
+				xso->dev = NULL;
+				xso->real_dev = NULL;
+				xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+			}
+#endif
 			x->km.state = XFRM_STATE_DEAD;
 			to_put = x;
 			x = NULL;