diff mbox series

[bpf-next,v6,3/3] selftests/bpf: Add selftest for XDP_REDIRECT in bpf_prog_run()

Message ID 20220106195938.261184-4-toke@redhat.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Add support for transmitting packets using XDP in bpf_prog_run() | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 1 maintainers not CCed: linux-kselftest@vger.kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: Macros with flow control statements should be avoided WARNING: Use of volatile is usually wrong: see Documentation/process/volatile-considered-harmful.rst WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 83 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next success VM_Test

Commit Message

Toke Høiland-Jørgensen Jan. 6, 2022, 7:59 p.m. UTC
This adds a selftest for the XDP_REDIRECT facility in bpf_prog_run, that
redirects packets into a veth and counts them using an XDP program on the
other side of the veth pair and a TC program on the local side of the veth.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
---
 .../bpf/prog_tests/xdp_do_redirect.c          | 151 ++++++++++++++++++
 .../bpf/progs/test_xdp_do_redirect.c          |  78 +++++++++
 2 files changed, 229 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c

Comments

Alexei Starovoitov Jan. 7, 2022, 12:50 a.m. UTC | #1
On Thu, Jan 6, 2022 at 11:59 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
> +
> +#define NUM_PKTS 10

I'm afraid this needs more work.
Just bumping above to 1M I got:
[  254.165911] ================================
[  254.166387] WARNING: inconsistent lock state
[  254.166882] 5.16.0-rc7-02011-g64923127f1b3 #3784 Tainted: G           O
[  254.167659] --------------------------------
[  254.168140] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
[  254.168793] swapper/7/0 [HC0[0]:SC1[5]:HE1:SE0] takes:
[  254.169373] ffff888113d24220 (&r->producer_lock){+.?.}-{2:2}, at:
veth_xmit+0x361/0x830
[  254.170317] {SOFTIRQ-ON-W} state was registered at:
[  254.170921]   lock_acquire+0x18a/0x450
[  254.171371]   _raw_spin_lock+0x2f/0x40
[  254.171815]   veth_xdp_xmit+0x1d7/0x8c0
[  254.172241]   veth_ndo_xdp_xmit+0x1d/0x50
[  254.172689]   bq_xmit_all+0x562/0xc30
[  254.173159]   __dev_flush+0xb1/0x220
[  254.173586]   xdp_do_flush+0xa/0x20
[  254.173983]   xdp_test_run_batch.constprop.25+0x90c/0xf00
[  254.174564]   bpf_test_run_xdp_live+0x369/0x480
[  254.175038]   bpf_prog_test_run_xdp+0x63f/0xe50
[  254.175512]   __sys_bpf+0x688/0x4410
[  254.175923]   __x64_sys_bpf+0x75/0xb0
[  254.176327]   do_syscall_64+0x34/0x80
[  254.176733]   entry_SYSCALL_64_after_hwframe+0x44/0xae
[  254.177297] irq event stamp: 130862
[  254.177681] hardirqs last  enabled at (130862):
[<ffffffff812d0812>] call_rcu+0x2a2/0x640
[  254.178561] hardirqs last disabled at (130861):
[<ffffffff812d08bd>] call_rcu+0x34d/0x640
[  254.179404] softirqs last  enabled at (130814):
[<ffffffff83c00534>] __do_softirq+0x534/0x835
[  254.180332] softirqs last disabled at (130839):
[<ffffffff811389f7>] irq_exit_rcu+0xe7/0x120
[  254.181255]
[  254.181255] other info that might help us debug this:
[  254.181969]  Possible unsafe locking scenario:
[  254.183172]   lock(&r->producer_lock);
[  254.183590]   <Interrupt>
[  254.183893]     lock(&r->producer_lock);
[  254.184321]
[  254.184321]  *** DEADLOCK ***
[  254.184321]
[  254.185047] 5 locks held by swapper/7/0:
[  254.185501]  #0: ffff8881f6d89db8 ((&ndev->rs_timer)){+.-.}-{0:0},
at: call_timer_fn+0xc8/0x440
[  254.186496]  #1: ffffffff854415e0 (rcu_read_lock){....}-{1:2}, at:
ndisc_send_skb+0x761/0x12e0
[  254.187444]  #2: ffffffff85441580 (rcu_read_lock_bh){....}-{1:2},
at: ip6_finish_output2+0x2da/0x1e00
[  254.188447]  #3: ffffffff85441580 (rcu_read_lock_bh){....}-{1:2},
at: __dev_queue_xmit+0x1de/0x2910
[  254.189502]  #4: ffffffff854415e0 (rcu_read_lock){....}-{1:2}, at:
veth_xmit+0x41/0x830
[  254.190455]
[  254.190455] stack backtrace:
[  254.190963] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G           O
  5.16.0-rc7-02011-g64923127f1b3 #3784
[  254.192109] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[  254.193427] Call Trace:
[  254.193711]  <IRQ>
[  254.193945]  dump_stack_lvl+0x44/0x57
[  254.194418]  mark_lock.part.54+0x157b/0x2210
[  254.194940]  ? mark_lock.part.54+0xfd/0x2210
[  254.195451]  ? print_usage_bug+0x80/0x80
[  254.195896]  ? rcu_read_lock_sched_held+0x91/0xc0
[  254.196413]  ? rcu_read_lock_bh_held+0xa0/0xa0
[  254.196903]  ? rcu_read_lock_bh_held+0xa0/0xa0
[  254.197389]  ? find_held_lock+0x33/0x1c0
[  254.197826]  ? lock_release+0x3a1/0x650
[  254.198251]  ? __stack_depot_save+0x274/0x490
[  254.198742]  ? lock_acquire+0x19a/0x450
[  254.199175]  ? lock_downgrade+0x690/0x690
[  254.199626]  ? do_raw_spin_lock+0x11d/0x270
[  254.200091]  ? rwlock_bug.part.2+0x90/0x90
[  254.200550]  __lock_acquire+0x151f/0x6310
[  254.201000]  ? mark_lock.part.54+0xfd/0x2210
[  254.201470]  ? lockdep_hardirqs_on_prepare+0x3f0/0x3f0
[  254.202083]  ? lock_is_held_type+0xda/0x130
[  254.202592]  ? rcu_read_lock_sched_held+0x91/0xc0
[  254.203134]  ? rcu_read_lock_bh_held+0xa0/0xa0
[  254.203630]  lock_acquire+0x18a/0x450
[  254.204041]  ? veth_xmit+0x361/0x830
[  254.204455]  ? lock_release+0x650/0x650
[  254.204932]  ? eth_gro_receive+0xc60/0xc60
[  254.205421]  ? rcu_read_lock_held+0x91/0xa0
[  254.205912]  _raw_spin_lock+0x2f/0x40
[  254.206314]  ? veth_xmit+0x361/0x830
[  254.206707]  veth_xmit+0x361/0x830

I suspect it points out that local_bh_disable is needed
around xdp_do_flush.

That's why I asked you to test it with something
more than 3 in NUM_PKTS.
What values did you test it with? I hope not just 10.

Please make sure XDP_PASS/TX/REDIRECT are all stress tested.
Toke Høiland-Jørgensen Jan. 7, 2022, 3:54 p.m. UTC | #2
Alexei Starovoitov <alexei.starovoitov@gmail.com> writes:

> On Thu, Jan 6, 2022 at 11:59 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>> +
>> +#define NUM_PKTS 10
>
> I'm afraid this needs more work.
> Just bumping above to 1M I got:
> [  254.165911] ================================
> [  254.166387] WARNING: inconsistent lock state
> [  254.166882] 5.16.0-rc7-02011-g64923127f1b3 #3784 Tainted: G           O
> [  254.167659] --------------------------------
> [  254.168140] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
> [  254.168793] swapper/7/0 [HC0[0]:SC1[5]:HE1:SE0] takes:
> [  254.169373] ffff888113d24220 (&r->producer_lock){+.?.}-{2:2}, at:
> veth_xmit+0x361/0x830
> [  254.170317] {SOFTIRQ-ON-W} state was registered at:
> [  254.170921]   lock_acquire+0x18a/0x450
> [  254.171371]   _raw_spin_lock+0x2f/0x40
> [  254.171815]   veth_xdp_xmit+0x1d7/0x8c0
> [  254.172241]   veth_ndo_xdp_xmit+0x1d/0x50
> [  254.172689]   bq_xmit_all+0x562/0xc30
> [  254.173159]   __dev_flush+0xb1/0x220
> [  254.173586]   xdp_do_flush+0xa/0x20
> [  254.173983]   xdp_test_run_batch.constprop.25+0x90c/0xf00
> [  254.174564]   bpf_test_run_xdp_live+0x369/0x480
> [  254.175038]   bpf_prog_test_run_xdp+0x63f/0xe50
> [  254.175512]   __sys_bpf+0x688/0x4410
> [  254.175923]   __x64_sys_bpf+0x75/0xb0
> [  254.176327]   do_syscall_64+0x34/0x80
> [  254.176733]   entry_SYSCALL_64_after_hwframe+0x44/0xae
> [  254.177297] irq event stamp: 130862
> [  254.177681] hardirqs last  enabled at (130862):
> [<ffffffff812d0812>] call_rcu+0x2a2/0x640
> [  254.178561] hardirqs last disabled at (130861):
> [<ffffffff812d08bd>] call_rcu+0x34d/0x640
> [  254.179404] softirqs last  enabled at (130814):
> [<ffffffff83c00534>] __do_softirq+0x534/0x835
> [  254.180332] softirqs last disabled at (130839):
> [<ffffffff811389f7>] irq_exit_rcu+0xe7/0x120
> [  254.181255]
> [  254.181255] other info that might help us debug this:
> [  254.181969]  Possible unsafe locking scenario:
> [  254.183172]   lock(&r->producer_lock);
> [  254.183590]   <Interrupt>
> [  254.183893]     lock(&r->producer_lock);
> [  254.184321]
> [  254.184321]  *** DEADLOCK ***
> [  254.184321]
> [  254.185047] 5 locks held by swapper/7/0:
> [  254.185501]  #0: ffff8881f6d89db8 ((&ndev->rs_timer)){+.-.}-{0:0},
> at: call_timer_fn+0xc8/0x440
> [  254.186496]  #1: ffffffff854415e0 (rcu_read_lock){....}-{1:2}, at:
> ndisc_send_skb+0x761/0x12e0
> [  254.187444]  #2: ffffffff85441580 (rcu_read_lock_bh){....}-{1:2},
> at: ip6_finish_output2+0x2da/0x1e00
> [  254.188447]  #3: ffffffff85441580 (rcu_read_lock_bh){....}-{1:2},
> at: __dev_queue_xmit+0x1de/0x2910
> [  254.189502]  #4: ffffffff854415e0 (rcu_read_lock){....}-{1:2}, at:
> veth_xmit+0x41/0x830
> [  254.190455]
> [  254.190455] stack backtrace:
> [  254.190963] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G           O
>   5.16.0-rc7-02011-g64923127f1b3 #3784
> [  254.192109] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
> [  254.193427] Call Trace:
> [  254.193711]  <IRQ>
> [  254.193945]  dump_stack_lvl+0x44/0x57
> [  254.194418]  mark_lock.part.54+0x157b/0x2210
> [  254.194940]  ? mark_lock.part.54+0xfd/0x2210
> [  254.195451]  ? print_usage_bug+0x80/0x80
> [  254.195896]  ? rcu_read_lock_sched_held+0x91/0xc0
> [  254.196413]  ? rcu_read_lock_bh_held+0xa0/0xa0
> [  254.196903]  ? rcu_read_lock_bh_held+0xa0/0xa0
> [  254.197389]  ? find_held_lock+0x33/0x1c0
> [  254.197826]  ? lock_release+0x3a1/0x650
> [  254.198251]  ? __stack_depot_save+0x274/0x490
> [  254.198742]  ? lock_acquire+0x19a/0x450
> [  254.199175]  ? lock_downgrade+0x690/0x690
> [  254.199626]  ? do_raw_spin_lock+0x11d/0x270
> [  254.200091]  ? rwlock_bug.part.2+0x90/0x90
> [  254.200550]  __lock_acquire+0x151f/0x6310
> [  254.201000]  ? mark_lock.part.54+0xfd/0x2210
> [  254.201470]  ? lockdep_hardirqs_on_prepare+0x3f0/0x3f0
> [  254.202083]  ? lock_is_held_type+0xda/0x130
> [  254.202592]  ? rcu_read_lock_sched_held+0x91/0xc0
> [  254.203134]  ? rcu_read_lock_bh_held+0xa0/0xa0
> [  254.203630]  lock_acquire+0x18a/0x450
> [  254.204041]  ? veth_xmit+0x361/0x830
> [  254.204455]  ? lock_release+0x650/0x650
> [  254.204932]  ? eth_gro_receive+0xc60/0xc60
> [  254.205421]  ? rcu_read_lock_held+0x91/0xa0
> [  254.205912]  _raw_spin_lock+0x2f/0x40
> [  254.206314]  ? veth_xmit+0x361/0x830
> [  254.206707]  veth_xmit+0x361/0x830
>
> I suspect it points out that local_bh_disable is needed
> around xdp_do_flush.
>
> That's why I asked you to test it with something
> more than 3 in NUM_PKTS.
> What values did you test it with? I hope not just 10.
>
> Please make sure XDP_PASS/TX/REDIRECT are all stress tested.

Okay, finally managed to reproduce this; it did not show up at all in my
own tests.

Did you run the old version of the selftest by any chance? At least I
can only reproduce it with the forwarding sysctl enabled; it happens
because the XDP_PASS path races with the XDP_REDIRECT path and end up
trying to grab the same lock, which only happens when the XDP_PASS path
sends the packets back out the same interface. The fix is to extend the
local_bh_disable() to cover the full loop in xdp_test_run_batch().

I'll send an update with that fixed. But I'm not sure what to do about
the selftest? I can keep the forwarding enabled + 1 million iterations -
that seems to trigger the bug fairly reliably for me, but it takes a bit
longer to run. Is that acceptable?

-Toke
Toke Høiland-Jørgensen Jan. 7, 2022, 4:05 p.m. UTC | #3
Toke Høiland-Jørgensen <toke@redhat.com> writes:

> Alexei Starovoitov <alexei.starovoitov@gmail.com> writes:
>
>> On Thu, Jan 6, 2022 at 11:59 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote:
>>> +
>>> +#define NUM_PKTS 10
>>
>> I'm afraid this needs more work.
>> Just bumping above to 1M I got:
>> [  254.165911] ================================
>> [  254.166387] WARNING: inconsistent lock state
>> [  254.166882] 5.16.0-rc7-02011-g64923127f1b3 #3784 Tainted: G           O
>> [  254.167659] --------------------------------
>> [  254.168140] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
>> [  254.168793] swapper/7/0 [HC0[0]:SC1[5]:HE1:SE0] takes:
>> [  254.169373] ffff888113d24220 (&r->producer_lock){+.?.}-{2:2}, at:
>> veth_xmit+0x361/0x830
>> [  254.170317] {SOFTIRQ-ON-W} state was registered at:
>> [  254.170921]   lock_acquire+0x18a/0x450
>> [  254.171371]   _raw_spin_lock+0x2f/0x40
>> [  254.171815]   veth_xdp_xmit+0x1d7/0x8c0
>> [  254.172241]   veth_ndo_xdp_xmit+0x1d/0x50
>> [  254.172689]   bq_xmit_all+0x562/0xc30
>> [  254.173159]   __dev_flush+0xb1/0x220
>> [  254.173586]   xdp_do_flush+0xa/0x20
>> [  254.173983]   xdp_test_run_batch.constprop.25+0x90c/0xf00
>> [  254.174564]   bpf_test_run_xdp_live+0x369/0x480
>> [  254.175038]   bpf_prog_test_run_xdp+0x63f/0xe50
>> [  254.175512]   __sys_bpf+0x688/0x4410
>> [  254.175923]   __x64_sys_bpf+0x75/0xb0
>> [  254.176327]   do_syscall_64+0x34/0x80
>> [  254.176733]   entry_SYSCALL_64_after_hwframe+0x44/0xae
>> [  254.177297] irq event stamp: 130862
>> [  254.177681] hardirqs last  enabled at (130862):
>> [<ffffffff812d0812>] call_rcu+0x2a2/0x640
>> [  254.178561] hardirqs last disabled at (130861):
>> [<ffffffff812d08bd>] call_rcu+0x34d/0x640
>> [  254.179404] softirqs last  enabled at (130814):
>> [<ffffffff83c00534>] __do_softirq+0x534/0x835
>> [  254.180332] softirqs last disabled at (130839):
>> [<ffffffff811389f7>] irq_exit_rcu+0xe7/0x120
>> [  254.181255]
>> [  254.181255] other info that might help us debug this:
>> [  254.181969]  Possible unsafe locking scenario:
>> [  254.183172]   lock(&r->producer_lock);
>> [  254.183590]   <Interrupt>
>> [  254.183893]     lock(&r->producer_lock);
>> [  254.184321]
>> [  254.184321]  *** DEADLOCK ***
>> [  254.184321]
>> [  254.185047] 5 locks held by swapper/7/0:
>> [  254.185501]  #0: ffff8881f6d89db8 ((&ndev->rs_timer)){+.-.}-{0:0},
>> at: call_timer_fn+0xc8/0x440
>> [  254.186496]  #1: ffffffff854415e0 (rcu_read_lock){....}-{1:2}, at:
>> ndisc_send_skb+0x761/0x12e0
>> [  254.187444]  #2: ffffffff85441580 (rcu_read_lock_bh){....}-{1:2},
>> at: ip6_finish_output2+0x2da/0x1e00
>> [  254.188447]  #3: ffffffff85441580 (rcu_read_lock_bh){....}-{1:2},
>> at: __dev_queue_xmit+0x1de/0x2910
>> [  254.189502]  #4: ffffffff854415e0 (rcu_read_lock){....}-{1:2}, at:
>> veth_xmit+0x41/0x830
>> [  254.190455]
>> [  254.190455] stack backtrace:
>> [  254.190963] CPU: 7 PID: 0 Comm: swapper/7 Tainted: G           O
>>   5.16.0-rc7-02011-g64923127f1b3 #3784
>> [  254.192109] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
>> BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
>> [  254.193427] Call Trace:
>> [  254.193711]  <IRQ>
>> [  254.193945]  dump_stack_lvl+0x44/0x57
>> [  254.194418]  mark_lock.part.54+0x157b/0x2210
>> [  254.194940]  ? mark_lock.part.54+0xfd/0x2210
>> [  254.195451]  ? print_usage_bug+0x80/0x80
>> [  254.195896]  ? rcu_read_lock_sched_held+0x91/0xc0
>> [  254.196413]  ? rcu_read_lock_bh_held+0xa0/0xa0
>> [  254.196903]  ? rcu_read_lock_bh_held+0xa0/0xa0
>> [  254.197389]  ? find_held_lock+0x33/0x1c0
>> [  254.197826]  ? lock_release+0x3a1/0x650
>> [  254.198251]  ? __stack_depot_save+0x274/0x490
>> [  254.198742]  ? lock_acquire+0x19a/0x450
>> [  254.199175]  ? lock_downgrade+0x690/0x690
>> [  254.199626]  ? do_raw_spin_lock+0x11d/0x270
>> [  254.200091]  ? rwlock_bug.part.2+0x90/0x90
>> [  254.200550]  __lock_acquire+0x151f/0x6310
>> [  254.201000]  ? mark_lock.part.54+0xfd/0x2210
>> [  254.201470]  ? lockdep_hardirqs_on_prepare+0x3f0/0x3f0
>> [  254.202083]  ? lock_is_held_type+0xda/0x130
>> [  254.202592]  ? rcu_read_lock_sched_held+0x91/0xc0
>> [  254.203134]  ? rcu_read_lock_bh_held+0xa0/0xa0
>> [  254.203630]  lock_acquire+0x18a/0x450
>> [  254.204041]  ? veth_xmit+0x361/0x830
>> [  254.204455]  ? lock_release+0x650/0x650
>> [  254.204932]  ? eth_gro_receive+0xc60/0xc60
>> [  254.205421]  ? rcu_read_lock_held+0x91/0xa0
>> [  254.205912]  _raw_spin_lock+0x2f/0x40
>> [  254.206314]  ? veth_xmit+0x361/0x830
>> [  254.206707]  veth_xmit+0x361/0x830
>>
>> I suspect it points out that local_bh_disable is needed
>> around xdp_do_flush.
>>
>> That's why I asked you to test it with something
>> more than 3 in NUM_PKTS.
>> What values did you test it with? I hope not just 10.
>>
>> Please make sure XDP_PASS/TX/REDIRECT are all stress tested.
>
> Okay, finally managed to reproduce this; it did not show up at all in my
> own tests.
>
> Did you run the old version of the selftest by any chance? At least I
> can only reproduce it with the forwarding sysctl enabled; it happens
> because the XDP_PASS path races with the XDP_REDIRECT path and end up
> trying to grab the same lock, which only happens when the XDP_PASS path
> sends the packets back out the same interface. The fix is to extend the
> local_bh_disable() to cover the full loop in xdp_test_run_batch().
>
> I'll send an update with that fixed. But I'm not sure what to do about
> the selftest? I can keep the forwarding enabled + 1 million iterations -
> that seems to trigger the bug fairly reliably for me, but it takes a bit
> longer to run. Is that acceptable?

The absolute difference is just over three seconds on my machine, BTW:

1M pkts:

[root@(none) bpf]# time ./test_progs -a xdp_do_redirect
#221 xdp_do_redirect:OK
Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

real	0m5,042s
user	0m0,109s
sys	0m3,968s

10 pkts:

[root@(none) bpf]# time ./test_progs -a xdp_do_redirect
#221 xdp_do_redirect:OK
Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

real	0m1,823s
user	0m0,117s
sys	0m0,685s


-Toke
diff mbox series

Patch

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
new file mode 100644
index 000000000000..3789c380f24e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -0,0 +1,151 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <net/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include "test_xdp_do_redirect.skel.h"
+
+#define SYS(fmt, ...)						\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		if (!ASSERT_OK(system(cmd), cmd))		\
+			goto out;				\
+	})
+
+struct udp_packet {
+	struct ethhdr eth;
+	struct ipv6hdr iph;
+	struct udphdr udp;
+	__u8 payload[64 - sizeof(struct udphdr)
+		     - sizeof(struct ethhdr) - sizeof(struct ipv6hdr)];
+} __packed;
+
+static struct udp_packet pkt_udp = {
+	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+	.eth.h_dest = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
+	.eth.h_source = {0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb},
+	.iph.version = 6,
+	.iph.nexthdr = IPPROTO_UDP,
+	.iph.payload_len = bpf_htons(sizeof(struct udp_packet)
+				     - offsetof(struct udp_packet, udp)),
+	.iph.hop_limit = 2,
+	.iph.saddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(1)},
+	.iph.daddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(2)},
+	.udp.source = bpf_htons(1),
+	.udp.dest = bpf_htons(1),
+	.udp.len = bpf_htons(sizeof(struct udp_packet)
+			     - offsetof(struct udp_packet, udp)),
+	.payload = {0x42}, /* receiver XDP program matches on this */
+};
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
+	int ret;
+
+	ret = bpf_tc_hook_create(hook);
+	if (!ASSERT_OK(ret, "create tc hook"))
+		return ret;
+
+	ret = bpf_tc_attach(hook, &opts);
+	if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+		bpf_tc_hook_destroy(hook);
+		return ret;
+	}
+
+	return 0;
+}
+
+#define NUM_PKTS 10
+void test_xdp_do_redirect(void)
+{
+	int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
+	char data[sizeof(pkt_udp) + sizeof(__u32)];
+	struct test_xdp_do_redirect *skel = NULL;
+	struct nstoken *nstoken = NULL;
+	struct bpf_link *link;
+
+	struct xdp_md ctx_in = { .data = sizeof(__u32),
+				 .data_end = sizeof(data) };
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+			    .data_in = &data,
+			    .data_size_in = sizeof(data),
+			    .ctx_in = &ctx_in,
+			    .ctx_size_in = sizeof(ctx_in),
+			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
+			    .repeat = NUM_PKTS,
+		);
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+			    .attach_point = BPF_TC_INGRESS);
+
+	memcpy(&data[sizeof(__u32)], &pkt_udp, sizeof(pkt_udp));
+	*((__u32 *)data) = 0x42; /* metadata test value */
+
+	skel = test_xdp_do_redirect__open();
+	if (!ASSERT_OK_PTR(skel, "skel"))
+		return;
+
+	/* The XDP program we run with bpf_prog_run() will cycle through all
+	 * three xmit (PASS/TX/REDIRECT) return codes starting from above, and
+	 * ending up with PASS, so we should end up with two packets on the dst
+	 * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
+	 * payload.
+	 */
+	SYS("ip netns add testns");
+	nstoken = open_netns("testns");
+	if (!ASSERT_OK_PTR(nstoken, "setns"))
+		goto out;
+
+	SYS("ip link add veth_src type veth peer name veth_dst");
+	SYS("ip link set dev veth_src up");
+	SYS("ip link set dev veth_dst up");
+
+	ifindex_src = if_nametoindex("veth_src");
+	ifindex_dst = if_nametoindex("veth_dst");
+	if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") ||
+	    !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
+		goto out;
+
+	memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
+	skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */
+	skel->rodata->ifindex_in = ifindex_src;
+	ctx_in.ingress_ifindex = ifindex_src;
+	tc_hook.ifindex = ifindex_src;
+
+	if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load"))
+		goto out;
+
+	link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst);
+	if (!ASSERT_OK_PTR(link, "prog_attach"))
+		goto out;
+	skel->links.xdp_count_pkts = link;
+
+	tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts);
+	if (attach_tc_prog(&tc_hook, tc_prog_fd))
+		goto out;
+
+	xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect);
+	err = bpf_prog_test_run_opts(xdp_prog_fd, &opts);
+	if (!ASSERT_OK(err, "prog_run"))
+		goto out_tc;
+
+	/* wait for the packets to be flushed */
+	kern_sync_rcu();
+
+	ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp");
+	ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc");
+
+out_tc:
+	bpf_tc_hook_destroy(&tc_hook);
+out:
+	if (nstoken)
+		close_netns(nstoken);
+	system("ip netns del testns");
+	test_xdp_do_redirect__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
new file mode 100644
index 000000000000..cdb0ddb691c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
@@ -0,0 +1,78 @@ 
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define ETH_ALEN 6
+const volatile int ifindex_out;
+const volatile int ifindex_in;
+const volatile __u8 expect_dst[ETH_ALEN];
+volatile int pkts_seen_xdp = 0;
+volatile int pkts_seen_tc = 0;
+volatile int retcode = XDP_REDIRECT;
+
+SEC("xdp")
+int xdp_redirect(struct xdp_md *xdp)
+{
+	__u32 *metadata = (void *)(long)xdp->data_meta;
+	void *data = (void *)(long)xdp->data;
+	int ret = retcode;
+
+	if (xdp->ingress_ifindex != ifindex_in)
+		return XDP_ABORTED;
+
+	if (metadata + 1 > data)
+		return XDP_ABORTED;
+
+	if (*metadata != 0x42)
+		return XDP_ABORTED;
+
+	if (bpf_xdp_adjust_meta(xdp, 4))
+		return XDP_ABORTED;
+
+	if (retcode > XDP_PASS)
+		retcode--;
+
+	if (ret == XDP_REDIRECT)
+		return bpf_redirect(ifindex_out, 0);
+
+	return ret;
+}
+
+static bool check_pkt(void *data, void *data_end)
+{
+	struct ethhdr *eth = data;
+	struct ipv6hdr *iph = (void *)(eth + 1);
+	struct udphdr *udp = (void *)(iph + 1);
+	__u8 *payload = (void *)(udp + 1);
+
+	if (payload + 1 > data_end)
+		return false;
+
+	return iph->nexthdr == IPPROTO_UDP && *payload == 0x42;
+}
+
+SEC("xdp")
+int xdp_count_pkts(struct xdp_md *xdp)
+{
+	void *data = (void *)(long)xdp->data;
+	void *data_end = (void *)(long)xdp->data_end;
+
+	if (check_pkt(data, data_end))
+		pkts_seen_xdp++;
+
+	return XDP_PASS;
+}
+
+SEC("tc")
+int tc_count_pkts(struct __sk_buff *skb)
+{
+	void *data = (void *)(long)skb->data;
+	void *data_end = (void *)(long)skb->data_end;
+
+	if (check_pkt(data, data_end))
+		pkts_seen_tc++;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";