diff mbox series

[net-next,v3,4/4] net: tun: track dropped skb via kfree_skb_reason()

Message ID 20220221053440.7320-5-dongli.zhang@oracle.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series tun/tap: use kfree_skb_reason() to trace dropped skb | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5979 this patch: 5979
netdev/cc_maintainers warning 9 maintainers not CCed: flyingpeng@tencent.com kpsingh@kernel.org john.fastabend@gmail.com kafai@fb.com songliubraving@fb.com mengensun@tencent.com dsahern@kernel.org keescook@chromium.org yhs@fb.com
netdev/build_clang success Errors and warnings before: 878 this patch: 878
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6130 this patch: 6130
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 135 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Dongli Zhang Feb. 21, 2022, 5:34 a.m. UTC
The TUN can be used as vhost-net backend. E.g, the tun_net_xmit() is the
interface to forward the skb from TUN to vhost-net/virtio-net.

However, there are many "goto drop" in the TUN driver. Therefore, the
kfree_skb_reason() is involved at each "goto drop" to help userspace
ftrace/ebpf to track the reason for the loss of packets.

The below reasons are introduced:

- SKB_DROP_REASON_SKB_PULL
- SKB_DROP_REASON_SKB_TRIM
- SKB_DROP_REASON_DEV_READY
- SKB_DROP_REASON_DEV_FILTER
- SKB_DROP_REASON_BPF_FILTER

Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Joe Jin <joe.jin@oracle.com>
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
---
Changed since v1:
- revise the reason name
Changed since v2:
- declare drop_reason as type "enum skb_drop_reason"

 drivers/net/tun.c          | 37 ++++++++++++++++++++++++++++---------
 include/linux/skbuff.h     |  7 +++++++
 include/trace/events/skb.h |  5 +++++
 3 files changed, 40 insertions(+), 9 deletions(-)

Comments

David Ahern Feb. 22, 2022, 3:28 a.m. UTC | #1
On 2/20/22 10:34 PM, Dongli Zhang wrote:
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index aa27268..bf7d8cd 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -1062,13 +1062,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>  	struct netdev_queue *queue;
>  	struct tun_file *tfile;
>  	int len = skb->len;
> +	enum skb_drop_reason drop_reason;

this function is already honoring reverse xmas tree style, so this needs
to be moved up.

>  
>  	rcu_read_lock();
>  	tfile = rcu_dereference(tun->tfiles[txq]);
>  
>  	/* Drop packet if interface is not attached */
> -	if (!tfile)
> +	if (!tfile) {
> +		drop_reason = SKB_DROP_REASON_DEV_READY;
>  		goto drop;
> +	}
>  
>  	if (!rcu_dereference(tun->steering_prog))
>  		tun_automq_xmit(tun, skb);
> @@ -1078,22 +1081,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>  	/* Drop if the filter does not like it.
>  	 * This is a noop if the filter is disabled.
>  	 * Filter can be enabled only for the TAP devices. */
> -	if (!check_filter(&tun->txflt, skb))
> +	if (!check_filter(&tun->txflt, skb)) {
> +		drop_reason = SKB_DROP_REASON_DEV_FILTER;
>  		goto drop;
> +	}
>  
>  	if (tfile->socket.sk->sk_filter &&
> -	    sk_filter(tfile->socket.sk, skb))
> +	    sk_filter(tfile->socket.sk, skb)) {
> +		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
>  		goto drop;
> +	}
>  
>  	len = run_ebpf_filter(tun, skb, len);
> -	if (len == 0)
> +	if (len == 0) {
> +		drop_reason = SKB_DROP_REASON_BPF_FILTER;

how does this bpf filter differ from SKB_DROP_REASON_SOCKET_FILTER? I
think the reason code needs to be a little clearer on the distinction.
Dongli Zhang Feb. 22, 2022, 4:45 a.m. UTC | #2
Hi David,

On 2/21/22 7:28 PM, David Ahern wrote:
> On 2/20/22 10:34 PM, Dongli Zhang wrote:
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index aa27268..bf7d8cd 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -1062,13 +1062,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>  	struct netdev_queue *queue;
>>  	struct tun_file *tfile;
>>  	int len = skb->len;
>> +	enum skb_drop_reason drop_reason;
> 
> this function is already honoring reverse xmas tree style, so this needs
> to be moved up.

I will move this up to before "int txq = skb->queue_mapping;".

> 
>>  
>>  	rcu_read_lock();
>>  	tfile = rcu_dereference(tun->tfiles[txq]);
>>  
>>  	/* Drop packet if interface is not attached */
>> -	if (!tfile)
>> +	if (!tfile) {
>> +		drop_reason = SKB_DROP_REASON_DEV_READY;
>>  		goto drop;
>> +	}
>>  
>>  	if (!rcu_dereference(tun->steering_prog))
>>  		tun_automq_xmit(tun, skb);
>> @@ -1078,22 +1081,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>  	/* Drop if the filter does not like it.
>>  	 * This is a noop if the filter is disabled.
>>  	 * Filter can be enabled only for the TAP devices. */
>> -	if (!check_filter(&tun->txflt, skb))
>> +	if (!check_filter(&tun->txflt, skb)) {
>> +		drop_reason = SKB_DROP_REASON_DEV_FILTER;
>>  		goto drop;
>> +	}
>>  
>>  	if (tfile->socket.sk->sk_filter &&
>> -	    sk_filter(tfile->socket.sk, skb))
>> +	    sk_filter(tfile->socket.sk, skb)) {
>> +		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
>>  		goto drop;
>> +	}
>>  
>>  	len = run_ebpf_filter(tun, skb, len);
>> -	if (len == 0)
>> +	if (len == 0) {
>> +		drop_reason = SKB_DROP_REASON_BPF_FILTER;
> 
> how does this bpf filter differ from SKB_DROP_REASON_SOCKET_FILTER? I
> think the reason code needs to be a little clearer on the distinction.
> 


While there is a diff between BPF_FILTER (here) and SOCKET_FILTER ...

... indeed the issue is: there is NO diff between BPF_FILTER (here) and
DEV_FILTER (introduced by the patch).


The run_ebpf_filter() is to run the bpf filter attached to the TUN device (not
socket). This is similar to DEV_FILTER, which is to run a device specific filter.

Initially, I would use DEV_FILTER at both locations. This makes trouble to me as
there would be two places with same reason=DEV_FILTER. I will not be able to
tell where the skb is dropped.


I was thinking about to introduce a SKB_DROP_REASON_DEV_BPF. While I have
limited experience in device specific bpf, the TUN is the only device I know
that has a device specific ebpf filter (by commit aff3d70a07ff ("tun: allow to
attach ebpf socket filter")). The SKB_DROP_REASON_DEV_BPF is not generic enough
to be re-used by other drivers.


Would you mind sharing your suggestion if I would re-use (1)
SKB_DROP_REASON_DEV_FILTER or (2) introduce a new SKB_DROP_REASON_DEV_BPF, which
is for sk_buff dropped by ebpf attached to device (not socket).


To answer your question, the SOCKET_FILTER is for filter attached to socket, the
BPF_FILTER was supposed for ebpf filter attached to device (tun->filter_prog).


Thank you very much!

Dongli Zhang
David Ahern Feb. 22, 2022, 2:39 p.m. UTC | #3
On 2/21/22 9:45 PM, Dongli Zhang wrote:
> Hi David,
> 
> On 2/21/22 7:28 PM, David Ahern wrote:
>> On 2/20/22 10:34 PM, Dongli Zhang wrote:
>>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>>> index aa27268..bf7d8cd 100644
>>> --- a/drivers/net/tun.c
>>> +++ b/drivers/net/tun.c
>>> @@ -1062,13 +1062,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>>  	struct netdev_queue *queue;
>>>  	struct tun_file *tfile;
>>>  	int len = skb->len;
>>> +	enum skb_drop_reason drop_reason;
>>
>> this function is already honoring reverse xmas tree style, so this needs
>> to be moved up.
> 
> I will move this up to before "int txq = skb->queue_mapping;".
> 
>>
>>>  
>>>  	rcu_read_lock();
>>>  	tfile = rcu_dereference(tun->tfiles[txq]);
>>>  
>>>  	/* Drop packet if interface is not attached */
>>> -	if (!tfile)
>>> +	if (!tfile) {
>>> +		drop_reason = SKB_DROP_REASON_DEV_READY;
>>>  		goto drop;
>>> +	}
>>>  
>>>  	if (!rcu_dereference(tun->steering_prog))
>>>  		tun_automq_xmit(tun, skb);
>>> @@ -1078,22 +1081,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>>  	/* Drop if the filter does not like it.
>>>  	 * This is a noop if the filter is disabled.
>>>  	 * Filter can be enabled only for the TAP devices. */
>>> -	if (!check_filter(&tun->txflt, skb))
>>> +	if (!check_filter(&tun->txflt, skb)) {
>>> +		drop_reason = SKB_DROP_REASON_DEV_FILTER;
>>>  		goto drop;
>>> +	}
>>>  
>>>  	if (tfile->socket.sk->sk_filter &&
>>> -	    sk_filter(tfile->socket.sk, skb))
>>> +	    sk_filter(tfile->socket.sk, skb)) {
>>> +		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
>>>  		goto drop;
>>> +	}
>>>  
>>>  	len = run_ebpf_filter(tun, skb, len);
>>> -	if (len == 0)
>>> +	if (len == 0) {
>>> +		drop_reason = SKB_DROP_REASON_BPF_FILTER;
>>
>> how does this bpf filter differ from SKB_DROP_REASON_SOCKET_FILTER? I
>> think the reason code needs to be a little clearer on the distinction.
>>
> 
> 
> While there is a diff between BPF_FILTER (here) and SOCKET_FILTER ...
> 
> ... indeed the issue is: there is NO diff between BPF_FILTER (here) and
> DEV_FILTER (introduced by the patch).
> 
> 
> The run_ebpf_filter() is to run the bpf filter attached to the TUN device (not
> socket). This is similar to DEV_FILTER, which is to run a device specific filter.
> 
> Initially, I would use DEV_FILTER at both locations. This makes trouble to me as
> there would be two places with same reason=DEV_FILTER. I will not be able to
> tell where the skb is dropped.
> 
> 
> I was thinking about to introduce a SKB_DROP_REASON_DEV_BPF. While I have
> limited experience in device specific bpf, the TUN is the only device I know
> that has a device specific ebpf filter (by commit aff3d70a07ff ("tun: allow to
> attach ebpf socket filter")). The SKB_DROP_REASON_DEV_BPF is not generic enough
> to be re-used by other drivers.
> 
> 
> Would you mind sharing your suggestion if I would re-use (1)
> SKB_DROP_REASON_DEV_FILTER or (2) introduce a new SKB_DROP_REASON_DEV_BPF, which
> is for sk_buff dropped by ebpf attached to device (not socket).
> 
> 
> To answer your question, the SOCKET_FILTER is for filter attached to socket, the
> BPF_FILTER was supposed for ebpf filter attached to device (tun->filter_prog).
> 
> 

tun/tap does have some unique filtering options. The other sets focused
on the core networking stack is adding a drop reason of
SKB_DROP_REASON_BPF_CGROUP_EGRESS for cgroup based egress filters.

For tun unique filters, how about using a shortened version of the ioctl
name used to set the filter.
Dongli Zhang Feb. 22, 2022, 6:20 p.m. UTC | #4
Hi David,

On 2/22/22 6:39 AM, David Ahern wrote:
> On 2/21/22 9:45 PM, Dongli Zhang wrote:
>> Hi David,
>>
>> On 2/21/22 7:28 PM, David Ahern wrote:
>>> On 2/20/22 10:34 PM, Dongli Zhang wrote:
>>>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>>>> index aa27268..bf7d8cd 100644
>>>> --- a/drivers/net/tun.c
>>>> +++ b/drivers/net/tun.c
>>>> @@ -1062,13 +1062,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>>>  	struct netdev_queue *queue;
>>>>  	struct tun_file *tfile;
>>>>  	int len = skb->len;
>>>> +	enum skb_drop_reason drop_reason;
>>>
>>> this function is already honoring reverse xmas tree style, so this needs
>>> to be moved up.
>>
>> I will move this up to before "int txq = skb->queue_mapping;".
>>
>>>
>>>>  
>>>>  	rcu_read_lock();
>>>>  	tfile = rcu_dereference(tun->tfiles[txq]);
>>>>  
>>>>  	/* Drop packet if interface is not attached */
>>>> -	if (!tfile)
>>>> +	if (!tfile) {
>>>> +		drop_reason = SKB_DROP_REASON_DEV_READY;
>>>>  		goto drop;
>>>> +	}
>>>>  
>>>>  	if (!rcu_dereference(tun->steering_prog))
>>>>  		tun_automq_xmit(tun, skb);
>>>> @@ -1078,22 +1081,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>>>  	/* Drop if the filter does not like it.
>>>>  	 * This is a noop if the filter is disabled.
>>>>  	 * Filter can be enabled only for the TAP devices. */
>>>> -	if (!check_filter(&tun->txflt, skb))
>>>> +	if (!check_filter(&tun->txflt, skb)) {
>>>> +		drop_reason = SKB_DROP_REASON_DEV_FILTER;
>>>>  		goto drop;
>>>> +	}
>>>>  
>>>>  	if (tfile->socket.sk->sk_filter &&
>>>> -	    sk_filter(tfile->socket.sk, skb))
>>>> +	    sk_filter(tfile->socket.sk, skb)) {
>>>> +		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
>>>>  		goto drop;
>>>> +	}
>>>>  
>>>>  	len = run_ebpf_filter(tun, skb, len);
>>>> -	if (len == 0)
>>>> +	if (len == 0) {
>>>> +		drop_reason = SKB_DROP_REASON_BPF_FILTER;
>>>
>>> how does this bpf filter differ from SKB_DROP_REASON_SOCKET_FILTER? I
>>> think the reason code needs to be a little clearer on the distinction.
>>>
>>
>>
>> While there is a diff between BPF_FILTER (here) and SOCKET_FILTER ...
>>
>> ... indeed the issue is: there is NO diff between BPF_FILTER (here) and
>> DEV_FILTER (introduced by the patch).
>>
>>
>> The run_ebpf_filter() is to run the bpf filter attached to the TUN device (not
>> socket). This is similar to DEV_FILTER, which is to run a device specific filter.
>>
>> Initially, I would use DEV_FILTER at both locations. This makes trouble to me as
>> there would be two places with same reason=DEV_FILTER. I will not be able to
>> tell where the skb is dropped.
>>
>>
>> I was thinking about to introduce a SKB_DROP_REASON_DEV_BPF. While I have
>> limited experience in device specific bpf, the TUN is the only device I know
>> that has a device specific ebpf filter (by commit aff3d70a07ff ("tun: allow to
>> attach ebpf socket filter")). The SKB_DROP_REASON_DEV_BPF is not generic enough
>> to be re-used by other drivers.
>>
>>
>> Would you mind sharing your suggestion if I would re-use (1)
>> SKB_DROP_REASON_DEV_FILTER or (2) introduce a new SKB_DROP_REASON_DEV_BPF, which
>> is for sk_buff dropped by ebpf attached to device (not socket).
>>
>>
>> To answer your question, the SOCKET_FILTER is for filter attached to socket, the
>> BPF_FILTER was supposed for ebpf filter attached to device (tun->filter_prog).
>>
>>
> 
> tun/tap does have some unique filtering options. The other sets focused
> on the core networking stack is adding a drop reason of
> SKB_DROP_REASON_BPF_CGROUP_EGRESS for cgroup based egress filters.

Thank you for the explanation!

> 
> For tun unique filters, how about using a shortened version of the ioctl
> name used to set the filter.
> 

Although TUN is widely used in virtualization environment, it is only one of
many drivers. I prefer to not introduce a reason that can be used only by a
specific driver.

In order to make it more generic and more re-usable (e.g., perhaps people may
add ebpf filter to TAP driver as well), how about we create below reasons.

SKB_DROP_REASON_DEV_FILTER,     /* dropped by filter attached to
				 * or directly implemented by a
				 * specific driver
				 */
SKB_DROP_REASON_BPF_DEV,	/* dropped by bpf directly
				 * attached to a specific device,
				 * e.g., via TUNSETFILTEREBPF
				 */

We already use SKB_DROP_REASON_DEV_FILTER in this patchset. We will use
SKB_DROP_REASON_BPF_DEV for the ebpf filter attached to TUN.

Thank you very much!

Dongli Zhang
Menglong Dong Feb. 25, 2022, 5:57 a.m. UTC | #5
>Hi David,
>
>On 2/22/22 6:39 AM, David Ahern wrote:
>> On 2/21/22 9:45 PM, Dongli Zhang wrote:
>>> Hi David,
>>>
>>> On 2/21/22 7:28 PM, David Ahern wrote:
>>>> On 2/20/22 10:34 PM, Dongli Zhang wrote:
>>>>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>>>>> index aa27268..bf7d8cd 100644
>>>>> --- a/drivers/net/tun.c
>>>>> +++ b/drivers/net/tun.c
>>>>> @@ -1062,13 +1062,16 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>>>>  	struct netdev_queue *queue;
>>>>>  	struct tun_file *tfile;
>>>>>  	int len = skb->len;
>>>>> +	enum skb_drop_reason drop_reason;
>>>>
>>>> this function is already honoring reverse xmas tree style, so this needs
>>>> to be moved up.
>>>
>>> I will move this up to before "int txq = skb->queue_mapping;".
>>>
>>>>
[...]
>>>>
>>>
>>>
>>> While there is a diff between BPF_FILTER (here) and SOCKET_FILTER ...
>>>
>>> ... indeed the issue is: there is NO diff between BPF_FILTER (here) and
>>> DEV_FILTER (introduced by the patch).
>>>
>>>
>>> The run_ebpf_filter() is to run the bpf filter attached to the TUN device (not
>>> socket). This is similar to DEV_FILTER, which is to run a device specific filter.
>>>
>>> Initially, I would use DEV_FILTER at both locations. This makes trouble to me as
>>> there would be two places with same reason=DEV_FILTER. I will not be able to
>>> tell where the skb is dropped.
>>>
>>>
>>> I was thinking about to introduce a SKB_DROP_REASON_DEV_BPF. While I have
>>> limited experience in device specific bpf, the TUN is the only device I know
>>> that has a device specific ebpf filter (by commit aff3d70a07ff ("tun: allow to
>>> attach ebpf socket filter")). The SKB_DROP_REASON_DEV_BPF is not generic enough
>>> to be re-used by other drivers.
>>>
>>>
>>> Would you mind sharing your suggestion if I would re-use (1)
>>> SKB_DROP_REASON_DEV_FILTER or (2) introduce a new SKB_DROP_REASON_DEV_BPF, which
>>> is for sk_buff dropped by ebpf attached to device (not socket).
>>>
>>>
>>> To answer your question, the SOCKET_FILTER is for filter attached to socket, the
>>> BPF_FILTER was supposed for ebpf filter attached to device (tun->filter_prog).
>>>
>>>
>> 
>> tun/tap does have some unique filtering options. The other sets focused
>> on the core networking stack is adding a drop reason of
>> SKB_DROP_REASON_BPF_CGROUP_EGRESS for cgroup based egress filters.
>
>Thank you for the explanation!
>
>> 
>> For tun unique filters, how about using a shortened version of the ioctl
>> name used to set the filter.
>> 
>
>Although TUN is widely used in virtualization environment, it is only one of
>many drivers. I prefer to not introduce a reason that can be used only by a
>specific driver.
>
>In order to make it more generic and more re-usable (e.g., perhaps people may
>add ebpf filter to TAP driver as well), how about we create below reasons.
>
>SKB_DROP_REASON_DEV_FILTER,     /* dropped by filter attached to
>				 * or directly implemented by a
>				 * specific driver
>				 */
>SKB_DROP_REASON_BPF_DEV,	/* dropped by bpf directly
>				 * attached to a specific device,
>				 * e.g., via TUNSETFILTEREBPF
>				 */

Aren't DEV_FILTER and BPF_DEV too generic? eBPF atached to netdev can
be many kinds, such as XDP, TC, etc.

I think that use TAP_TXFILTER instaed of DEV_FILTER maybe better?
and TAP_FILTER->BPF_DEV. Make them similar to the name in
__tun_chr_ioctl() may be easier for user to understand.

>
>We already use SKB_DROP_REASON_DEV_FILTER in this patchset. We will use
>SKB_DROP_REASON_BPF_DEV for the ebpf filter attached to TUN.
>
>Thank you very much!
>
>Dongli Zhang
David Ahern Feb. 25, 2022, 3:48 p.m. UTC | #6
On 2/24/22 10:57 PM, Menglong Dong wrote:
>>>
>>> For tun unique filters, how about using a shortened version of the ioctl
>>> name used to set the filter.
>>>
>>
>> Although TUN is widely used in virtualization environment, it is only one of
>> many drivers. I prefer to not introduce a reason that can be used only by a
>> specific driver.
>>
>> In order to make it more generic and more re-usable (e.g., perhaps people may
>> add ebpf filter to TAP driver as well), how about we create below reasons.
>>
>> SKB_DROP_REASON_DEV_FILTER,     /* dropped by filter attached to
>> 				 * or directly implemented by a
>> 				 * specific driver
>> 				 */
>> SKB_DROP_REASON_BPF_DEV,	/* dropped by bpf directly
>> 				 * attached to a specific device,
>> 				 * e.g., via TUNSETFILTEREBPF
>> 				 */
> 
> Aren't DEV_FILTER and BPF_DEV too generic? eBPF atached to netdev can
> be many kinds, such as XDP, TC, etc.

yes.

> 
> I think that use TAP_TXFILTER instaed of DEV_FILTER maybe better?
> and TAP_FILTER->BPF_DEV. Make them similar to the name in
> __tun_chr_ioctl() may be easier for user to understand.
> 

in this case given the unique attach points and API tap in the name
seems more appropriate
Dongli Zhang Feb. 25, 2022, 4:49 p.m. UTC | #7
Hi David and Menglong,

On 2/25/22 7:48 AM, David Ahern wrote:
> On 2/24/22 10:57 PM, Menglong Dong wrote:
>>>>
>>>> For tun unique filters, how about using a shortened version of the ioctl
>>>> name used to set the filter.
>>>>
>>>
>>> Although TUN is widely used in virtualization environment, it is only one of
>>> many drivers. I prefer to not introduce a reason that can be used only by a
>>> specific driver.
>>>
>>> In order to make it more generic and more re-usable (e.g., perhaps people may
>>> add ebpf filter to TAP driver as well), how about we create below reasons.
>>>
>>> SKB_DROP_REASON_DEV_FILTER,     /* dropped by filter attached to
>>> 				 * or directly implemented by a
>>> 				 * specific driver
>>> 				 */
>>> SKB_DROP_REASON_BPF_DEV,	/* dropped by bpf directly
>>> 				 * attached to a specific device,
>>> 				 * e.g., via TUNSETFILTEREBPF
>>> 				 */
>>
>> Aren't DEV_FILTER and BPF_DEV too generic? eBPF atached to netdev can
>> be many kinds, such as XDP, TC, etc.
> 
> yes.
> 
>>
>> I think that use TAP_TXFILTER instaed of DEV_FILTER maybe better?
>> and TAP_FILTER->BPF_DEV. Make them similar to the name in
>> __tun_chr_ioctl() may be easier for user to understand.
>>
> 
> in this case given the unique attach points and API tap in the name
> seems more appropriate
> 

Thank you very much for the suggestions.

I will add below in the next version.

SKB_DROP_REASON_TAP_TXFILTER,	/* dropped by tx filter implemented at
				 * tun/tap, e.g., check_filter()
				 */
SKB_DROP_REASON_TAP_FILTER,	/* dropped by (ebpf) filter directly
				 * attached to tun/tap, e.g., via
				 * TUNSETFILTEREBPF
				 */

Dongli Zhang
diff mbox series

Patch

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index aa27268..bf7d8cd 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1062,13 +1062,16 @@  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct netdev_queue *queue;
 	struct tun_file *tfile;
 	int len = skb->len;
+	enum skb_drop_reason drop_reason;
 
 	rcu_read_lock();
 	tfile = rcu_dereference(tun->tfiles[txq]);
 
 	/* Drop packet if interface is not attached */
-	if (!tfile)
+	if (!tfile) {
+		drop_reason = SKB_DROP_REASON_DEV_READY;
 		goto drop;
+	}
 
 	if (!rcu_dereference(tun->steering_prog))
 		tun_automq_xmit(tun, skb);
@@ -1078,22 +1081,32 @@  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Drop if the filter does not like it.
 	 * This is a noop if the filter is disabled.
 	 * Filter can be enabled only for the TAP devices. */
-	if (!check_filter(&tun->txflt, skb))
+	if (!check_filter(&tun->txflt, skb)) {
+		drop_reason = SKB_DROP_REASON_DEV_FILTER;
 		goto drop;
+	}
 
 	if (tfile->socket.sk->sk_filter &&
-	    sk_filter(tfile->socket.sk, skb))
+	    sk_filter(tfile->socket.sk, skb)) {
+		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
 		goto drop;
+	}
 
 	len = run_ebpf_filter(tun, skb, len);
-	if (len == 0)
+	if (len == 0) {
+		drop_reason = SKB_DROP_REASON_BPF_FILTER;
 		goto drop;
+	}
 
-	if (pskb_trim(skb, len))
+	if (pskb_trim(skb, len)) {
+		drop_reason = SKB_DROP_REASON_SKB_TRIM;
 		goto drop;
+	}
 
-	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
+	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) {
+		drop_reason = SKB_DROP_REASON_SKB_COPY_DATA;
 		goto drop;
+	}
 
 	skb_tx_timestamp(skb);
 
@@ -1104,8 +1117,10 @@  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	nf_reset_ct(skb);
 
-	if (ptr_ring_produce(&tfile->tx_ring, skb))
+	if (ptr_ring_produce(&tfile->tx_ring, skb)) {
+		drop_reason = SKB_DROP_REASON_FULL_RING;
 		goto drop;
+	}
 
 	/* NETIF_F_LLTX requires to do our own update of trans_start */
 	queue = netdev_get_tx_queue(dev, txq);
@@ -1122,7 +1137,7 @@  static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 drop:
 	atomic_long_inc(&dev->tx_dropped);
 	skb_tx_error(skb);
-	kfree_skb(skb);
+	kfree_skb_reason(skb, drop_reason);
 	rcu_read_unlock();
 	return NET_XMIT_DROP;
 }
@@ -1720,6 +1735,7 @@  static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	u32 rxhash = 0;
 	int skb_xdp = 1;
 	bool frags = tun_napi_frags_enabled(tfile);
+	enum skb_drop_reason drop_reason;
 
 	if (!(tun->flags & IFF_NO_PI)) {
 		if (len < sizeof(pi))
@@ -1823,9 +1839,10 @@  static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 
 		if (err) {
 			err = -EFAULT;
+			drop_reason = SKB_DROP_REASON_SKB_COPY_DATA;
 drop:
 			atomic_long_inc(&tun->dev->rx_dropped);
-			kfree_skb(skb);
+			kfree_skb_reason(skb, drop_reason);
 			if (frags) {
 				tfile->napi.skb = NULL;
 				mutex_unlock(&tfile->napi_mutex);
@@ -1872,6 +1889,7 @@  static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	case IFF_TAP:
 		if (frags && !pskb_may_pull(skb, ETH_HLEN)) {
 			err = -ENOMEM;
+			drop_reason = SKB_DROP_REASON_SKB_PULL;
 			goto drop;
 		}
 		skb->protocol = eth_type_trans(skb, tun->dev);
@@ -1925,6 +1943,7 @@  static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	if (unlikely(!(tun->dev->flags & IFF_UP))) {
 		err = -EIO;
 		rcu_read_unlock();
+		drop_reason = SKB_DROP_REASON_DEV_READY;
 		goto drop;
 	}
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 52550c7..5850590 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -385,10 +385,17 @@  enum skb_drop_reason {
 					 * sk_buff
 					 */
 	SKB_DROP_REASON_SKB_GSO_SEG,	/* gso segmentation error */
+	SKB_DROP_REASON_SKB_PULL,	/* failed to pull sk_buff data */
+	SKB_DROP_REASON_SKB_TRIM,	/* failed to trim sk_buff data */
 	SKB_DROP_REASON_DEV_HDR,	/* there is something wrong with
 					 * device driver specific header
 					 */
+	SKB_DROP_REASON_DEV_READY,	/* device is not ready */
+	SKB_DROP_REASON_DEV_FILTER,	/* dropped by device driver
+					 * specific filter
+					 */
 	SKB_DROP_REASON_FULL_RING,	/* ring buffer is full */
+	SKB_DROP_REASON_BPF_FILTER,	/* dropped by ebpf filter */
 	SKB_DROP_REASON_MAX,
 };
 
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 5b5f135..0db0962 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -40,8 +40,13 @@ 
 	EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM)			\
 	EM(SKB_DROP_REASON_SKB_COPY_DATA, SKB_COPY_DATA)	\
 	EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG)		\
+	EM(SKB_DROP_REASON_SKB_PULL, SKB_PULL)			\
+	EM(SKB_DROP_REASON_SKB_TRIM, SKB_TRIM)			\
 	EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR)			\
+	EM(SKB_DROP_REASON_DEV_READY, DEV_READY)		\
+	EM(SKB_DROP_REASON_DEV_FILTER, DEV_FILTER)		\
 	EM(SKB_DROP_REASON_FULL_RING, FULL_RING)		\
+	EM(SKB_DROP_REASON_BPF_FILTER, BPF_FILTER)		\
 	EMe(SKB_DROP_REASON_MAX, MAX)
 
 #undef EM