diff mbox series

[v2,1/3] net: tap: track dropped skb via kfree_skb_reason()

Message ID 20220219191246.4749-2-dongli.zhang@oracle.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series tun/tap: use kfree_skb_reason() to trace dropped skb | expand

Checks

Context Check Description
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5979 this patch: 5979
netdev/cc_maintainers warning 7 maintainers not CCed: kpsingh@kernel.org john.fastabend@gmail.com kafai@fb.com songliubraving@fb.com dsahern@kernel.org keescook@chromium.org yhs@fb.com
netdev/build_clang success Errors and warnings before: 878 this patch: 878
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6130 this patch: 6130
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 112 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/tree_selection success Guessing tree name failed - patch did not apply

Commit Message

Dongli Zhang Feb. 19, 2022, 7:12 p.m. UTC
The TAP can be used as vhost-net backend. E.g., the tap_handle_frame() is
the interface to forward the skb from TAP to vhost-net/virtio-net.

However, there are many "goto drop" in the TAP driver. Therefore, the
kfree_skb_reason() is involved at each "goto drop" to help userspace
ftrace/ebpf to track the reason for the loss of packets.

The below reasons are introduced:

- SKB_DROP_REASON_SKB_CSUM
- SKB_DROP_REASON_SKB_COPY_DATA
- SKB_DROP_REASON_SKB_GSO_SEG
- SKB_DROP_REASON_DEV_HDR
- SKB_DROP_REASON_FULL_RING

Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Joe Jin <joe.jin@oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
---
 drivers/net/tap.c          | 30 ++++++++++++++++++++++--------
 include/linux/skbuff.h     |  9 +++++++++
 include/trace/events/skb.h |  5 +++++
 3 files changed, 36 insertions(+), 8 deletions(-)

Comments

David Ahern Feb. 19, 2022, 10:46 p.m. UTC | #1
On 2/19/22 12:12 PM, Dongli Zhang wrote:
> The TAP can be used as vhost-net backend. E.g., the tap_handle_frame() is
> the interface to forward the skb from TAP to vhost-net/virtio-net.
> 
> However, there are many "goto drop" in the TAP driver. Therefore, the
> kfree_skb_reason() is involved at each "goto drop" to help userspace
> ftrace/ebpf to track the reason for the loss of packets.
> 
> The below reasons are introduced:
> 
> - SKB_DROP_REASON_SKB_CSUM
> - SKB_DROP_REASON_SKB_COPY_DATA
> - SKB_DROP_REASON_SKB_GSO_SEG
> - SKB_DROP_REASON_DEV_HDR
> - SKB_DROP_REASON_FULL_RING
> 
> Cc: Joao Martins <joao.m.martins@oracle.com>
> Cc: Joe Jin <joe.jin@oracle.com>
> Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
> ---
>  drivers/net/tap.c          | 30 ++++++++++++++++++++++--------
>  include/linux/skbuff.h     |  9 +++++++++
>  include/trace/events/skb.h |  5 +++++
>  3 files changed, 36 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
> index 8e3a28ba6b28..ab3592506ef8 100644
> --- a/drivers/net/tap.c
> +++ b/drivers/net/tap.c
> @@ -322,6 +322,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
>  	struct tap_dev *tap;
>  	struct tap_queue *q;
>  	netdev_features_t features = TAP_FEATURES;
> +	int drop_reason;

enum skb_drop_reason drop_reason;

>  
>  	tap = tap_dev_get_rcu(dev);
>  	if (!tap)
> @@ -343,12 +344,16 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
>  		struct sk_buff *segs = __skb_gso_segment(skb, features, false);
>  		struct sk_buff *next;
>  
> -		if (IS_ERR(segs))
> +		if (IS_ERR(segs)) {
> +			drop_reason = SKB_DROP_REASON_SKB_GSO_SEG;
>  			goto drop;
> +		}
>  
>  		if (!segs) {
> -			if (ptr_ring_produce(&q->ring, skb))
> +			if (ptr_ring_produce(&q->ring, skb)) {
> +				drop_reason = SKB_DROP_REASON_FULL_RING;
>  				goto drop;
> +			}
>  			goto wake_up;
>  		}

you missed the walk of segs and calling ptr_ring_produce.

>
Dongli Zhang Feb. 20, 2022, 5:39 a.m. UTC | #2
Hi David,

On 2/19/22 2:46 PM, David Ahern wrote:
> On 2/19/22 12:12 PM, Dongli Zhang wrote:
>> The TAP can be used as vhost-net backend. E.g., the tap_handle_frame() is
>> the interface to forward the skb from TAP to vhost-net/virtio-net.
>>
>> However, there are many "goto drop" in the TAP driver. Therefore, the
>> kfree_skb_reason() is involved at each "goto drop" to help userspace
>> ftrace/ebpf to track the reason for the loss of packets.
>>
>> The below reasons are introduced:
>>
>> - SKB_DROP_REASON_SKB_CSUM
>> - SKB_DROP_REASON_SKB_COPY_DATA
>> - SKB_DROP_REASON_SKB_GSO_SEG
>> - SKB_DROP_REASON_DEV_HDR
>> - SKB_DROP_REASON_FULL_RING
>>
>> Cc: Joao Martins <joao.m.martins@oracle.com>
>> Cc: Joe Jin <joe.jin@oracle.com>
>> Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
>> ---
>>  drivers/net/tap.c          | 30 ++++++++++++++++++++++--------
>>  include/linux/skbuff.h     |  9 +++++++++
>>  include/trace/events/skb.h |  5 +++++
>>  3 files changed, 36 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
>> index 8e3a28ba6b28..ab3592506ef8 100644
>> --- a/drivers/net/tap.c
>> +++ b/drivers/net/tap.c
>> @@ -322,6 +322,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
>>  	struct tap_dev *tap;
>>  	struct tap_queue *q;
>>  	netdev_features_t features = TAP_FEATURES;
>> +	int drop_reason;
> 
> enum skb_drop_reason drop_reason;

According to cscope, so far all 'drop_reason' are declared in type 'int' (e.g.,
ip_rcv_finish_core()).

I will change above to enum.

> 
>>  
>>  	tap = tap_dev_get_rcu(dev);
>>  	if (!tap)
>> @@ -343,12 +344,16 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
>>  		struct sk_buff *segs = __skb_gso_segment(skb, features, false);
>>  		struct sk_buff *next;
>>  
>> -		if (IS_ERR(segs))
>> +		if (IS_ERR(segs)) {
>> +			drop_reason = SKB_DROP_REASON_SKB_GSO_SEG;
>>  			goto drop;
>> +		}
>>  
>>  		if (!segs) {
>> -			if (ptr_ring_produce(&q->ring, skb))
>> +			if (ptr_ring_produce(&q->ring, skb)) {
>> +				drop_reason = SKB_DROP_REASON_FULL_RING;
>>  				goto drop;
>> +			}
>>  			goto wake_up;
>>  		}
> 
> you missed the walk of segs and calling ptr_ring_produce.

This call site of kfree_skb() and kfree_skb_list() is unique and there is not
any "goto drop" involved. From developers' view, we will be able to tell if the
'drop' is at here according to the instruction pointers in callstack.

 360                 consume_skb(skb);
 361                 skb_list_walk_safe(segs, skb, next) {
 362                         skb_mark_not_on_list(skb);
 363                         if (ptr_ring_produce(&q->ring, skb)) {
 364                                 kfree_skb(skb);
 365                                 kfree_skb_list(next);
 366                                 break;
 367                         }
 368                 }

I will add the reason to "walk of segs" case as well. About kfree_skb_list(), I
will introduce a kfree_skb_list_reason().

Thank you very much!

Dongli Zhang
diff mbox series

Patch

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 8e3a28ba6b28..ab3592506ef8 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -322,6 +322,7 @@  rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
 	struct tap_dev *tap;
 	struct tap_queue *q;
 	netdev_features_t features = TAP_FEATURES;
+	int drop_reason;
 
 	tap = tap_dev_get_rcu(dev);
 	if (!tap)
@@ -343,12 +344,16 @@  rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
 		struct sk_buff *segs = __skb_gso_segment(skb, features, false);
 		struct sk_buff *next;
 
-		if (IS_ERR(segs))
+		if (IS_ERR(segs)) {
+			drop_reason = SKB_DROP_REASON_SKB_GSO_SEG;
 			goto drop;
+		}
 
 		if (!segs) {
-			if (ptr_ring_produce(&q->ring, skb))
+			if (ptr_ring_produce(&q->ring, skb)) {
+				drop_reason = SKB_DROP_REASON_FULL_RING;
 				goto drop;
+			}
 			goto wake_up;
 		}
 
@@ -369,10 +374,14 @@  rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
 		 */
 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
 		    !(features & NETIF_F_CSUM_MASK) &&
-		    skb_checksum_help(skb))
+		    skb_checksum_help(skb)) {
+			drop_reason = SKB_DROP_REASON_SKB_CSUM;
 			goto drop;
-		if (ptr_ring_produce(&q->ring, skb))
+		}
+		if (ptr_ring_produce(&q->ring, skb)) {
+			drop_reason = SKB_DROP_REASON_FULL_RING;
 			goto drop;
+		}
 	}
 
 wake_up:
@@ -383,7 +392,7 @@  rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
 	/* Count errors/drops only here, thus don't care about args. */
 	if (tap->count_rx_dropped)
 		tap->count_rx_dropped(tap);
-	kfree_skb(skb);
+	kfree_skb_reason(skb, drop_reason);
 	return RX_HANDLER_CONSUMED;
 }
 EXPORT_SYMBOL_GPL(tap_handle_frame);
@@ -632,6 +641,7 @@  static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
 	int depth;
 	bool zerocopy = false;
 	size_t linear;
+	int drop_reason;
 
 	if (q->flags & IFF_VNET_HDR) {
 		vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
@@ -696,8 +706,10 @@  static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
 	else
 		err = skb_copy_datagram_from_iter(skb, 0, from, len);
 
-	if (err)
+	if (err) {
+		drop_reason = SKB_DROP_REASON_SKB_COPY_DATA;
 		goto err_kfree;
+	}
 
 	skb_set_network_header(skb, ETH_HLEN);
 	skb_reset_mac_header(skb);
@@ -706,8 +718,10 @@  static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
 	if (vnet_hdr_len) {
 		err = virtio_net_hdr_to_skb(skb, &vnet_hdr,
 					    tap_is_little_endian(q));
-		if (err)
+		if (err) {
+			drop_reason = SKB_DROP_REASON_DEV_HDR;
 			goto err_kfree;
+		}
 	}
 
 	skb_probe_transport_header(skb);
@@ -738,7 +752,7 @@  static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
 	return total_len;
 
 err_kfree:
-	kfree_skb(skb);
+	kfree_skb_reason(skb, drop_reason);
 
 err:
 	rcu_read_lock();
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a5adbf6b51e8..218f7ba753e7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -346,6 +346,15 @@  enum skb_drop_reason {
 					 * udp packet drop out of
 					 * udp_memory_allocated.
 					 */
+	SKB_DROP_REASON_SKB_CSUM,	/* sk_buff checksum error */
+	SKB_DROP_REASON_SKB_COPY_DATA,	/* failed to copy data from or to
+					 * sk_buff
+					 */
+	SKB_DROP_REASON_SKB_GSO_SEG,	/* gso segmentation error */
+	SKB_DROP_REASON_DEV_HDR,	/* there is something wrong with
+					 * device driver specific header
+					 */
+	SKB_DROP_REASON_FULL_RING,	/* ring buffer is full */
 	SKB_DROP_REASON_MAX,
 };
 
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index cfcfd26399f7..842020d532f2 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -27,6 +27,11 @@ 
 	EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO)		\
 	EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF)	\
 	EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM)		\
+	EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM)			\
+	EM(SKB_DROP_REASON_SKB_COPY_DATA, SKB_COPY_DATA)	\
+	EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG)		\
+	EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR)			\
+	EM(SKB_DROP_REASON_FULL_RING, FULL_RING)		\
 	EMe(SKB_DROP_REASON_MAX, MAX)
 
 #undef EM