diff mbox series

[bpf-next] xsk: save the undone skb

Message ID 8c251b09e29f5c36a824f73211a22e64460d4e4e.1607678556.git.xuanzhuo@linux.alibaba.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [bpf-next] xsk: save the undone skb | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 67 this patch: 67
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 66 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 53 this patch: 53
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Xuan Zhuo Dec. 11, 2020, 9:25 a.m. UTC
We can reserve the skb. When sending fails, NETDEV_TX_BUSY or
xskq_prod_reserve fails. As long as skb is successfully generated and
successfully configured, we can reserve skb if we encounter exceptions
later.

Especially when NETDEV_TX_BUSY fails, there is no need to deal with
the problem that xskq_prod_reserve has been updated.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 include/net/xdp_sock.h |  3 +++
 net/xdp/xsk.c          | 36 +++++++++++++++++++++++++++---------
 2 files changed, 30 insertions(+), 9 deletions(-)

Comments

Magnus Karlsson Dec. 11, 2020, 3:32 p.m. UTC | #1
On Fri, Dec 11, 2020 at 2:12 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> We can reserve the skb. When sending fails, NETDEV_TX_BUSY or
> xskq_prod_reserve fails. As long as skb is successfully generated and
> successfully configured, we can reserve skb if we encounter exceptions
> later.
>
> Especially when NETDEV_TX_BUSY fails, there is no need to deal with
> the problem that xskq_prod_reserve has been updated.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  include/net/xdp_sock.h |  3 +++
>  net/xdp/xsk.c          | 36 +++++++++++++++++++++++++++---------
>  2 files changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
> index 4f4e93b..fead0c9 100644
> --- a/include/net/xdp_sock.h
> +++ b/include/net/xdp_sock.h
> @@ -76,6 +76,9 @@ struct xdp_sock {
>         struct mutex mutex;
>         struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */
>         struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
> +
> +       struct sk_buff *skb_undone;
> +       bool skb_undone_reserve;
>  };
>
>  #ifdef CONFIG_XDP_SOCKETS
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index e28c682..1051024 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -435,6 +435,19 @@ static int xsk_generic_xmit(struct sock *sk)
>         if (xs->queue_id >= xs->dev->real_num_tx_queues)
>                 goto out;
>
> +       if (xs->skb_undone) {
> +               if (xs->skb_undone_reserve) {
> +                       if (xskq_prod_reserve(xs->pool->cq))
> +                               goto out;
> +
> +                       xs->skb_undone_reserve = false;
> +               }
> +
> +               skb = xs->skb_undone;
> +               xs->skb_undone = NULL;
> +               goto xmit;
> +       }
> +
>         while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
>                 char *buffer;
>                 u64 addr;
> @@ -454,12 +467,7 @@ static int xsk_generic_xmit(struct sock *sk)
>                 addr = desc.addr;
>                 buffer = xsk_buff_raw_get_data(xs->pool, addr);
>                 err = skb_store_bits(skb, 0, buffer, len);
> -               /* This is the backpressure mechanism for the Tx path.
> -                * Reserve space in the completion queue and only proceed
> -                * if there is space in it. This avoids having to implement
> -                * any buffering in the Tx path.
> -                */
> -               if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
> +               if (unlikely(err)) {
>                         kfree_skb(skb);
>                         goto out;
>                 }
> @@ -470,12 +478,22 @@ static int xsk_generic_xmit(struct sock *sk)
>                 skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
>                 skb->destructor = xsk_destruct_skb;
>
> +               /* This is the backpressure mechanism for the Tx path.
> +                * Reserve space in the completion queue and only proceed
> +                * if there is space in it. This avoids having to implement
> +                * any buffering in the Tx path.
> +                */
> +               if (xskq_prod_reserve(xs->pool->cq)) {
> +                       xs->skb_undone_reserve = true;
> +                       xs->skb_undone = skb;
> +                       goto out;
> +               }
> +
> +xmit:

This will not work in the general case since we cannot guarantee that
the application does not replace the packet in the Tx ring before it
calls send() again. This is fully legal. I also do not like to
introduce state between calls. Much simpler to have it stateless which
means less error prone.

On the positive side, I will submit a patch that improves performance
of this transmit function by using the new batch interfaces I
introduced a month ago. With this patch I get a throughput improvement
of between 15 and 25% for the txpush benchmark in xdpsock. This is
much more than you will get from this patch. It also avoids the
problem you are addressing here completely. I will submit the patch
next week after the bug fix in this code has trickled down to
bpf-next. Hope you will like the throughput improvement that it
provides.

>                 err = __dev_direct_xmit(skb, xs->queue_id);
>                 if  (err == NETDEV_TX_BUSY) {
>                         /* Tell user-space to retry the send */
> -                       skb->destructor = sock_wfree;
> -                       /* Free skb without triggering the perf drop trace */
> -                       consume_skb(skb);
> +                       xs->skb_undone = skb;
>                         err = -EAGAIN;
>                         goto out;
>                 }
> --
> 1.8.3.1
>
diff mbox series

Patch

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 4f4e93b..fead0c9 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -76,6 +76,9 @@  struct xdp_sock {
 	struct mutex mutex;
 	struct xsk_queue *fq_tmp; /* Only as tmp storage before bind */
 	struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */
+
+	struct sk_buff *skb_undone;
+	bool skb_undone_reserve;
 };
 
 #ifdef CONFIG_XDP_SOCKETS
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index e28c682..1051024 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -435,6 +435,19 @@  static int xsk_generic_xmit(struct sock *sk)
 	if (xs->queue_id >= xs->dev->real_num_tx_queues)
 		goto out;
 
+	if (xs->skb_undone) {
+		if (xs->skb_undone_reserve) {
+			if (xskq_prod_reserve(xs->pool->cq))
+				goto out;
+
+			xs->skb_undone_reserve = false;
+		}
+
+		skb = xs->skb_undone;
+		xs->skb_undone = NULL;
+		goto xmit;
+	}
+
 	while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
 		char *buffer;
 		u64 addr;
@@ -454,12 +467,7 @@  static int xsk_generic_xmit(struct sock *sk)
 		addr = desc.addr;
 		buffer = xsk_buff_raw_get_data(xs->pool, addr);
 		err = skb_store_bits(skb, 0, buffer, len);
-		/* This is the backpressure mechanism for the Tx path.
-		 * Reserve space in the completion queue and only proceed
-		 * if there is space in it. This avoids having to implement
-		 * any buffering in the Tx path.
-		 */
-		if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
+		if (unlikely(err)) {
 			kfree_skb(skb);
 			goto out;
 		}
@@ -470,12 +478,22 @@  static int xsk_generic_xmit(struct sock *sk)
 		skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
 		skb->destructor = xsk_destruct_skb;
 
+		/* This is the backpressure mechanism for the Tx path.
+		 * Reserve space in the completion queue and only proceed
+		 * if there is space in it. This avoids having to implement
+		 * any buffering in the Tx path.
+		 */
+		if (xskq_prod_reserve(xs->pool->cq)) {
+			xs->skb_undone_reserve = true;
+			xs->skb_undone = skb;
+			goto out;
+		}
+
+xmit:
 		err = __dev_direct_xmit(skb, xs->queue_id);
 		if  (err == NETDEV_TX_BUSY) {
 			/* Tell user-space to retry the send */
-			skb->destructor = sock_wfree;
-			/* Free skb without triggering the perf drop trace */
-			consume_skb(skb);
+			xs->skb_undone = skb;
 			err = -EAGAIN;
 			goto out;
 		}