diff mbox series

[bpf-next,v4,08/15] veth: Support RX XDP metadata

Message ID 20221213023605.737383-9-sdf@google.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series xdp: hints via kfuncs | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for set-matrix
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 4 maintainers not CCed: edumazet@google.com davem@davemloft.net pabeni@redhat.com hawk@kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 60 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix

Commit Message

Stanislav Fomichev Dec. 13, 2022, 2:35 a.m. UTC
The goal is to enable end-to-end testing of the metadata for AF_XDP.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
Cc: Maryam Tahhan <mtahhan@redhat.com>
Cc: xdp-hints@xdp-project.net
Cc: netdev@vger.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
 drivers/net/veth.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

Comments

Jesper Dangaard Brouer Dec. 13, 2022, 3:55 p.m. UTC | #1
On 13/12/2022 03.35, Stanislav Fomichev wrote:
> The goal is to enable end-to-end testing of the metadata for AF_XDP.
> 
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: David Ahern <dsahern@gmail.com>
> Cc: Martin KaFai Lau <martin.lau@linux.dev>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Willem de Bruijn <willemb@google.com>
> Cc: Jesper Dangaard Brouer <brouer@redhat.com>
> Cc: Anatoly Burakov <anatoly.burakov@intel.com>
> Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
> Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
> Cc: Maryam Tahhan <mtahhan@redhat.com>
> Cc: xdp-hints@xdp-project.net
> Cc: netdev@vger.kernel.org
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
>   drivers/net/veth.c | 24 ++++++++++++++++++++++++
>   1 file changed, 24 insertions(+)
> 
> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
> index 04ffd8cb2945..d5491e7a2798 100644
> --- a/drivers/net/veth.c
> +++ b/drivers/net/veth.c
> @@ -118,6 +118,7 @@ static struct {
>   
>   struct veth_xdp_buff {
>   	struct xdp_buff xdp;
> +	struct sk_buff *skb;
>   };
>   
>   static int veth_get_link_ksettings(struct net_device *dev,
> @@ -602,6 +603,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
>   
>   		xdp_convert_frame_to_buff(frame, xdp);
>   		xdp->rxq = &rq->xdp_rxq;
> +		vxbuf.skb = NULL;
>   
>   		act = bpf_prog_run_xdp(xdp_prog, xdp);
>   
> @@ -823,6 +825,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
>   	__skb_push(skb, skb->data - skb_mac_header(skb));
>   	if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
>   		goto drop;
> +	vxbuf.skb = skb;
>   
>   	orig_data = xdp->data;
>   	orig_data_end = xdp->data_end;
> @@ -1601,6 +1604,21 @@ static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
>   	}
>   }
>   
> +static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
> +{
> +	*timestamp = ktime_get_mono_fast_ns();

This should be reading the hardware timestamp in the SKB.

Details: This hardware timestamp in the SKB is located in
skb_shared_info area, which is also available for xdp_frame (currently
used for multi-buffer purposes).  Thus, when adding xdp-hints "store"
functionality, it would be natural to store the HW TS in the same place.
Making the veth skb/xdp_frame code paths able to share code.

> +	return 0;
> +}
> +
> +static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
> +{
> +	struct veth_xdp_buff *_ctx = (void *)ctx;
> +
> +	if (_ctx->skb)
> +		*hash = skb_get_hash(_ctx->skb);
> +	return 0;
> +}
> +
>   static const struct net_device_ops veth_netdev_ops = {
>   	.ndo_init            = veth_dev_init,
>   	.ndo_open            = veth_open,
> @@ -1622,6 +1640,11 @@ static const struct net_device_ops veth_netdev_ops = {
>   	.ndo_get_peer_dev	= veth_peer_dev,
>   };
>   
> +static const struct xdp_metadata_ops veth_xdp_metadata_ops = {
> +	.xmo_rx_timestamp		= veth_xdp_rx_timestamp,
> +	.xmo_rx_hash			= veth_xdp_rx_hash,
> +};
> +
>   #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
>   		       NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
>   		       NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
> @@ -1638,6 +1661,7 @@ static void veth_setup(struct net_device *dev)
>   	dev->priv_flags |= IFF_PHONY_HEADROOM;
>   
>   	dev->netdev_ops = &veth_netdev_ops;
> +	dev->xdp_metadata_ops = &veth_xdp_metadata_ops;
>   	dev->ethtool_ops = &veth_ethtool_ops;
>   	dev->features |= NETIF_F_LLTX;
>   	dev->features |= VETH_FEATURES;
Stanislav Fomichev Dec. 13, 2022, 8:42 p.m. UTC | #2
On Tue, Dec 13, 2022 at 7:55 AM Jesper Dangaard Brouer
<jbrouer@redhat.com> wrote:
>
>
> On 13/12/2022 03.35, Stanislav Fomichev wrote:
> > The goal is to enable end-to-end testing of the metadata for AF_XDP.
> >
> > Cc: John Fastabend <john.fastabend@gmail.com>
> > Cc: David Ahern <dsahern@gmail.com>
> > Cc: Martin KaFai Lau <martin.lau@linux.dev>
> > Cc: Jakub Kicinski <kuba@kernel.org>
> > Cc: Willem de Bruijn <willemb@google.com>
> > Cc: Jesper Dangaard Brouer <brouer@redhat.com>
> > Cc: Anatoly Burakov <anatoly.burakov@intel.com>
> > Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
> > Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
> > Cc: Maryam Tahhan <mtahhan@redhat.com>
> > Cc: xdp-hints@xdp-project.net
> > Cc: netdev@vger.kernel.org
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> >   drivers/net/veth.c | 24 ++++++++++++++++++++++++
> >   1 file changed, 24 insertions(+)
> >
> > diff --git a/drivers/net/veth.c b/drivers/net/veth.c
> > index 04ffd8cb2945..d5491e7a2798 100644
> > --- a/drivers/net/veth.c
> > +++ b/drivers/net/veth.c
> > @@ -118,6 +118,7 @@ static struct {
> >
> >   struct veth_xdp_buff {
> >       struct xdp_buff xdp;
> > +     struct sk_buff *skb;
> >   };
> >
> >   static int veth_get_link_ksettings(struct net_device *dev,
> > @@ -602,6 +603,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
> >
> >               xdp_convert_frame_to_buff(frame, xdp);
> >               xdp->rxq = &rq->xdp_rxq;
> > +             vxbuf.skb = NULL;
> >
> >               act = bpf_prog_run_xdp(xdp_prog, xdp);
> >
> > @@ -823,6 +825,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
> >       __skb_push(skb, skb->data - skb_mac_header(skb));
> >       if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
> >               goto drop;
> > +     vxbuf.skb = skb;
> >
> >       orig_data = xdp->data;
> >       orig_data_end = xdp->data_end;
> > @@ -1601,6 +1604,21 @@ static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
> >       }
> >   }
> >
> > +static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
> > +{
> > +     *timestamp = ktime_get_mono_fast_ns();
>
> This should be reading the hardware timestamp in the SKB.
>
> Details: This hardware timestamp in the SKB is located in
> skb_shared_info area, which is also available for xdp_frame (currently
> used for multi-buffer purposes).  Thus, when adding xdp-hints "store"
> functionality, it would be natural to store the HW TS in the same place.
> Making the veth skb/xdp_frame code paths able to share code.

Does something like the following look acceptable as well?

*timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp;
if (!*timestamp)
        *timestamp = ktime_get_mono_fast_ns(); /* sw fallback */

Because I'd like to be able to test this path in the selftests. As
long as I get some number from veth_xdp_rx_timestamp, I can test it.
No amount of SOF_TIMESTAMPING_{SOFTWARE,RX_SOFTWARE,RAW_HARDWARE}
triggers non-zero hwtstamp for xsk receive path. Any suggestions?


> > +     return 0;
> > +}
> > +
> > +static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
> > +{
> > +     struct veth_xdp_buff *_ctx = (void *)ctx;
> > +
> > +     if (_ctx->skb)
> > +             *hash = skb_get_hash(_ctx->skb);
> > +     return 0;
> > +}
> > +
> >   static const struct net_device_ops veth_netdev_ops = {
> >       .ndo_init            = veth_dev_init,
> >       .ndo_open            = veth_open,
> > @@ -1622,6 +1640,11 @@ static const struct net_device_ops veth_netdev_ops = {
> >       .ndo_get_peer_dev       = veth_peer_dev,
> >   };
> >
> > +static const struct xdp_metadata_ops veth_xdp_metadata_ops = {
> > +     .xmo_rx_timestamp               = veth_xdp_rx_timestamp,
> > +     .xmo_rx_hash                    = veth_xdp_rx_hash,
> > +};
> > +
> >   #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
> >                      NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
> >                      NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
> > @@ -1638,6 +1661,7 @@ static void veth_setup(struct net_device *dev)
> >       dev->priv_flags |= IFF_PHONY_HEADROOM;
> >
> >       dev->netdev_ops = &veth_netdev_ops;
> > +     dev->xdp_metadata_ops = &veth_xdp_metadata_ops;
> >       dev->ethtool_ops = &veth_ethtool_ops;
> >       dev->features |= NETIF_F_LLTX;
> >       dev->features |= VETH_FEATURES;
>
Jesper Dangaard Brouer Dec. 14, 2022, 9:48 a.m. UTC | #3
On 13/12/2022 21.42, Stanislav Fomichev wrote:
> On Tue, Dec 13, 2022 at 7:55 AM Jesper Dangaard Brouer
> <jbrouer@redhat.com> wrote:
>>
>>
>> On 13/12/2022 03.35, Stanislav Fomichev wrote:
>>> The goal is to enable end-to-end testing of the metadata for AF_XDP.
>>>
>>> Cc: John Fastabend <john.fastabend@gmail.com>
>>> Cc: David Ahern <dsahern@gmail.com>
>>> Cc: Martin KaFai Lau <martin.lau@linux.dev>
>>> Cc: Jakub Kicinski <kuba@kernel.org>
>>> Cc: Willem de Bruijn <willemb@google.com>
>>> Cc: Jesper Dangaard Brouer <brouer@redhat.com>
>>> Cc: Anatoly Burakov <anatoly.burakov@intel.com>
>>> Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
>>> Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
>>> Cc: Maryam Tahhan <mtahhan@redhat.com>
>>> Cc: xdp-hints@xdp-project.net
>>> Cc: netdev@vger.kernel.org
>>> Signed-off-by: Stanislav Fomichev <sdf@google.com>
>>> ---
>>>    drivers/net/veth.c | 24 ++++++++++++++++++++++++
>>>    1 file changed, 24 insertions(+)
>>>
>>> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
>>> index 04ffd8cb2945..d5491e7a2798 100644
>>> --- a/drivers/net/veth.c
>>> +++ b/drivers/net/veth.c
>>> @@ -118,6 +118,7 @@ static struct {
>>>
>>>    struct veth_xdp_buff {
>>>        struct xdp_buff xdp;
>>> +     struct sk_buff *skb;
>>>    };
>>>
>>>    static int veth_get_link_ksettings(struct net_device *dev,
>>> @@ -602,6 +603,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
>>>
>>>                xdp_convert_frame_to_buff(frame, xdp);
>>>                xdp->rxq = &rq->xdp_rxq;
>>> +             vxbuf.skb = NULL;
>>>
>>>                act = bpf_prog_run_xdp(xdp_prog, xdp);
>>>
>>> @@ -823,6 +825,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
>>>        __skb_push(skb, skb->data - skb_mac_header(skb));
>>>        if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
>>>                goto drop;
>>> +     vxbuf.skb = skb;
>>>
>>>        orig_data = xdp->data;
>>>        orig_data_end = xdp->data_end;
>>> @@ -1601,6 +1604,21 @@ static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
>>>        }
>>>    }
>>>
>>> +static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
>>> +{
>>> +     *timestamp = ktime_get_mono_fast_ns();
>>
>> This should be reading the hardware timestamp in the SKB.
>>
>> Details: This hardware timestamp in the SKB is located in
>> skb_shared_info area, which is also available for xdp_frame (currently
>> used for multi-buffer purposes).  Thus, when adding xdp-hints "store"
>> functionality, it would be natural to store the HW TS in the same place.
>> Making the veth skb/xdp_frame code paths able to share code.
> 
> Does something like the following look acceptable as well?
> 
> *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp;
> if (!*timestamp)
>          *timestamp = ktime_get_mono_fast_ns(); /* sw fallback */
> 

How can the BPF programmer tell the difference between getting hardware 
or software timestamp?

This will get really confusing when someone implements a tcpdump feature
(like/extend xdpdump) and some packets (e.g. PTP packets) have HW
timestamps and some don't.  The time sequence in the pcap will be strange.

> Because I'd like to be able to test this path in the selftests. As
> long as I get some number from veth_xdp_rx_timestamp, I can test it.
> No amount of SOF_TIMESTAMPING_{SOFTWARE,RX_SOFTWARE,RAW_HARDWARE}
> triggers non-zero hwtstamp for xsk receive path. Any suggestions?
> 

You could implement the "store" operation I mentioned before.
For testing you can store an arbitrary value in the timestamp and check
it later by reading it back.

I can see you have changed the API to send down a pointer. Thus, a
simple flag could implement the writing the provided timestamp.

Regarding flags for reading the timestamp.  Should we be able to specify
what clock type we are asking for?
Have you notice that tcpdump can ask for different types of
timestamps[1]. e.g. for hardware timestamps it is either
'adapter_unsynced' or 'adaptor'. (See semantic in [1])

  # tcpdump -ni eth1 -j adapter_unsynced --time-stamp-precision=nano

[1] https://www.tcpdump.org/manpages/pcap-tstamp.7.txt


Hint list which your NIC 'adaptor' supports via this cmdline:

  # tcpdump -i mlx5p2 --list-time-stamp-types

--Jesper
Toke Høiland-Jørgensen Dec. 14, 2022, 10:47 a.m. UTC | #4
Jesper Dangaard Brouer <jbrouer@redhat.com> writes:

> On 13/12/2022 21.42, Stanislav Fomichev wrote:
>> On Tue, Dec 13, 2022 at 7:55 AM Jesper Dangaard Brouer
>> <jbrouer@redhat.com> wrote:
>>>
>>>
>>> On 13/12/2022 03.35, Stanislav Fomichev wrote:
>>>> The goal is to enable end-to-end testing of the metadata for AF_XDP.
>>>>
>>>> Cc: John Fastabend <john.fastabend@gmail.com>
>>>> Cc: David Ahern <dsahern@gmail.com>
>>>> Cc: Martin KaFai Lau <martin.lau@linux.dev>
>>>> Cc: Jakub Kicinski <kuba@kernel.org>
>>>> Cc: Willem de Bruijn <willemb@google.com>
>>>> Cc: Jesper Dangaard Brouer <brouer@redhat.com>
>>>> Cc: Anatoly Burakov <anatoly.burakov@intel.com>
>>>> Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
>>>> Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
>>>> Cc: Maryam Tahhan <mtahhan@redhat.com>
>>>> Cc: xdp-hints@xdp-project.net
>>>> Cc: netdev@vger.kernel.org
>>>> Signed-off-by: Stanislav Fomichev <sdf@google.com>
>>>> ---
>>>>    drivers/net/veth.c | 24 ++++++++++++++++++++++++
>>>>    1 file changed, 24 insertions(+)
>>>>
>>>> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
>>>> index 04ffd8cb2945..d5491e7a2798 100644
>>>> --- a/drivers/net/veth.c
>>>> +++ b/drivers/net/veth.c
>>>> @@ -118,6 +118,7 @@ static struct {
>>>>
>>>>    struct veth_xdp_buff {
>>>>        struct xdp_buff xdp;
>>>> +     struct sk_buff *skb;
>>>>    };
>>>>
>>>>    static int veth_get_link_ksettings(struct net_device *dev,
>>>> @@ -602,6 +603,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
>>>>
>>>>                xdp_convert_frame_to_buff(frame, xdp);
>>>>                xdp->rxq = &rq->xdp_rxq;
>>>> +             vxbuf.skb = NULL;
>>>>
>>>>                act = bpf_prog_run_xdp(xdp_prog, xdp);
>>>>
>>>> @@ -823,6 +825,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
>>>>        __skb_push(skb, skb->data - skb_mac_header(skb));
>>>>        if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
>>>>                goto drop;
>>>> +     vxbuf.skb = skb;
>>>>
>>>>        orig_data = xdp->data;
>>>>        orig_data_end = xdp->data_end;
>>>> @@ -1601,6 +1604,21 @@ static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
>>>>        }
>>>>    }
>>>>
>>>> +static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
>>>> +{
>>>> +     *timestamp = ktime_get_mono_fast_ns();
>>>
>>> This should be reading the hardware timestamp in the SKB.
>>>
>>> Details: This hardware timestamp in the SKB is located in
>>> skb_shared_info area, which is also available for xdp_frame (currently
>>> used for multi-buffer purposes).  Thus, when adding xdp-hints "store"
>>> functionality, it would be natural to store the HW TS in the same place.
>>> Making the veth skb/xdp_frame code paths able to share code.
>> 
>> Does something like the following look acceptable as well?
>> 
>> *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp;
>> if (!*timestamp)
>>          *timestamp = ktime_get_mono_fast_ns(); /* sw fallback */
>> 
>
> How can the BPF programmer tell the difference between getting hardware 
> or software timestamp?
>
> This will get really confusing when someone implements a tcpdump feature
> (like/extend xdpdump) and some packets (e.g. PTP packets) have HW
> timestamps and some don't.  The time sequence in the pcap will be strange.
>
>> Because I'd like to be able to test this path in the selftests. As
>> long as I get some number from veth_xdp_rx_timestamp, I can test it.
>> No amount of SOF_TIMESTAMPING_{SOFTWARE,RX_SOFTWARE,RAW_HARDWARE}
>> triggers non-zero hwtstamp for xsk receive path. Any suggestions?
>> 
>
> You could implement the "store" operation I mentioned before.
> For testing you can store an arbitrary value in the timestamp and check
> it later by reading it back.
>
> I can see you have changed the API to send down a pointer. Thus, a
> simple flag could implement the writing the provided timestamp.
>
> Regarding flags for reading the timestamp.  Should we be able to specify
> what clock type we are asking for?
> Have you notice that tcpdump can ask for different types of
> timestamps[1]. e.g. for hardware timestamps it is either
> 'adapter_unsynced' or 'adaptor'. (See semantic in [1])
>
>   # tcpdump -ni eth1 -j adapter_unsynced --time-stamp-precision=nano

I don't think it makes sense for XDP to *ask* for a specific timestamp
(any individual packet probably only has one, right?). But we could add
a way to query which type of timestamp is available, either as a second
argument to the same function, or as a separate one. Same thing for the
hash, BTW (skb_set_hash() also takes a type argument).

I guess the easiest would be to just add a second parameter to both
getter functions for the type, but maybe there's a performance argument
for having it be a separate kfunc (at least for timestamp)?

-Toke
Martin KaFai Lau Dec. 14, 2022, 6:09 p.m. UTC | #5
On 12/14/22 2:47 AM, Toke Høiland-Jørgensen wrote:
> Jesper Dangaard Brouer <jbrouer@redhat.com> writes:
> 
>> On 13/12/2022 21.42, Stanislav Fomichev wrote:
>>> On Tue, Dec 13, 2022 at 7:55 AM Jesper Dangaard Brouer
>>> <jbrouer@redhat.com> wrote:
>>>>
>>>>
>>>> On 13/12/2022 03.35, Stanislav Fomichev wrote:
>>>>> The goal is to enable end-to-end testing of the metadata for AF_XDP.
>>>>>
>>>>> Cc: John Fastabend <john.fastabend@gmail.com>
>>>>> Cc: David Ahern <dsahern@gmail.com>
>>>>> Cc: Martin KaFai Lau <martin.lau@linux.dev>
>>>>> Cc: Jakub Kicinski <kuba@kernel.org>
>>>>> Cc: Willem de Bruijn <willemb@google.com>
>>>>> Cc: Jesper Dangaard Brouer <brouer@redhat.com>
>>>>> Cc: Anatoly Burakov <anatoly.burakov@intel.com>
>>>>> Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
>>>>> Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
>>>>> Cc: Maryam Tahhan <mtahhan@redhat.com>
>>>>> Cc: xdp-hints@xdp-project.net
>>>>> Cc: netdev@vger.kernel.org
>>>>> Signed-off-by: Stanislav Fomichev <sdf@google.com>
>>>>> ---
>>>>>     drivers/net/veth.c | 24 ++++++++++++++++++++++++
>>>>>     1 file changed, 24 insertions(+)
>>>>>
>>>>> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
>>>>> index 04ffd8cb2945..d5491e7a2798 100644
>>>>> --- a/drivers/net/veth.c
>>>>> +++ b/drivers/net/veth.c
>>>>> @@ -118,6 +118,7 @@ static struct {
>>>>>
>>>>>     struct veth_xdp_buff {
>>>>>         struct xdp_buff xdp;
>>>>> +     struct sk_buff *skb;
>>>>>     };
>>>>>
>>>>>     static int veth_get_link_ksettings(struct net_device *dev,
>>>>> @@ -602,6 +603,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
>>>>>
>>>>>                 xdp_convert_frame_to_buff(frame, xdp);
>>>>>                 xdp->rxq = &rq->xdp_rxq;
>>>>> +             vxbuf.skb = NULL;
>>>>>
>>>>>                 act = bpf_prog_run_xdp(xdp_prog, xdp);
>>>>>
>>>>> @@ -823,6 +825,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
>>>>>         __skb_push(skb, skb->data - skb_mac_header(skb));
>>>>>         if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
>>>>>                 goto drop;
>>>>> +     vxbuf.skb = skb;
>>>>>
>>>>>         orig_data = xdp->data;
>>>>>         orig_data_end = xdp->data_end;
>>>>> @@ -1601,6 +1604,21 @@ static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
>>>>>         }
>>>>>     }
>>>>>
>>>>> +static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
>>>>> +{
>>>>> +     *timestamp = ktime_get_mono_fast_ns();
>>>>
>>>> This should be reading the hardware timestamp in the SKB.
>>>>
>>>> Details: This hardware timestamp in the SKB is located in
>>>> skb_shared_info area, which is also available for xdp_frame (currently
>>>> used for multi-buffer purposes).  Thus, when adding xdp-hints "store"
>>>> functionality, it would be natural to store the HW TS in the same place.
>>>> Making the veth skb/xdp_frame code paths able to share code.
>>>
>>> Does something like the following look acceptable as well?
>>>
>>> *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp;

If it is to test the kfunc and ensure veth_xdp_rx_timestamp is called, this 
alone should be enough. skb_hwtstamps(_ctx->skb)->hwtstamp should be 0 if 
hwtstamp is unavailable?  The test can initialize the 'u64 *timestamp' arg to 
non-zero first.  If it is not good enough, an fentry tracing can be done to 
veth_xdp_rx_timestamp to ensure it is called also.  There is also fmod_ret that 
could change the return value but the timestamp is not the return value though.

If the above is not enough, another direction of thought could be doing it 
through bpf_prog_test_run_xdp() but it will need a way to initialize the 
veth_xdp_buff.

That said, overall, I don't think it is a good idea to bend the 
veth_xdp_rx_timestamp kfunc too much only for testing purpose unless there is no 
other way out.
Stanislav Fomichev Dec. 14, 2022, 6:44 p.m. UTC | #6
On Wed, Dec 14, 2022 at 10:09 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 12/14/22 2:47 AM, Toke Høiland-Jørgensen wrote:
> > Jesper Dangaard Brouer <jbrouer@redhat.com> writes:
> >
> >> On 13/12/2022 21.42, Stanislav Fomichev wrote:
> >>> On Tue, Dec 13, 2022 at 7:55 AM Jesper Dangaard Brouer
> >>> <jbrouer@redhat.com> wrote:
> >>>>
> >>>>
> >>>> On 13/12/2022 03.35, Stanislav Fomichev wrote:
> >>>>> The goal is to enable end-to-end testing of the metadata for AF_XDP.
> >>>>>
> >>>>> Cc: John Fastabend <john.fastabend@gmail.com>
> >>>>> Cc: David Ahern <dsahern@gmail.com>
> >>>>> Cc: Martin KaFai Lau <martin.lau@linux.dev>
> >>>>> Cc: Jakub Kicinski <kuba@kernel.org>
> >>>>> Cc: Willem de Bruijn <willemb@google.com>
> >>>>> Cc: Jesper Dangaard Brouer <brouer@redhat.com>
> >>>>> Cc: Anatoly Burakov <anatoly.burakov@intel.com>
> >>>>> Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
> >>>>> Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
> >>>>> Cc: Maryam Tahhan <mtahhan@redhat.com>
> >>>>> Cc: xdp-hints@xdp-project.net
> >>>>> Cc: netdev@vger.kernel.org
> >>>>> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> >>>>> ---
> >>>>>     drivers/net/veth.c | 24 ++++++++++++++++++++++++
> >>>>>     1 file changed, 24 insertions(+)
> >>>>>
> >>>>> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
> >>>>> index 04ffd8cb2945..d5491e7a2798 100644
> >>>>> --- a/drivers/net/veth.c
> >>>>> +++ b/drivers/net/veth.c
> >>>>> @@ -118,6 +118,7 @@ static struct {
> >>>>>
> >>>>>     struct veth_xdp_buff {
> >>>>>         struct xdp_buff xdp;
> >>>>> +     struct sk_buff *skb;
> >>>>>     };
> >>>>>
> >>>>>     static int veth_get_link_ksettings(struct net_device *dev,
> >>>>> @@ -602,6 +603,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
> >>>>>
> >>>>>                 xdp_convert_frame_to_buff(frame, xdp);
> >>>>>                 xdp->rxq = &rq->xdp_rxq;
> >>>>> +             vxbuf.skb = NULL;
> >>>>>
> >>>>>                 act = bpf_prog_run_xdp(xdp_prog, xdp);
> >>>>>
> >>>>> @@ -823,6 +825,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
> >>>>>         __skb_push(skb, skb->data - skb_mac_header(skb));
> >>>>>         if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
> >>>>>                 goto drop;
> >>>>> +     vxbuf.skb = skb;
> >>>>>
> >>>>>         orig_data = xdp->data;
> >>>>>         orig_data_end = xdp->data_end;
> >>>>> @@ -1601,6 +1604,21 @@ static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
> >>>>>         }
> >>>>>     }
> >>>>>
> >>>>> +static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
> >>>>> +{
> >>>>> +     *timestamp = ktime_get_mono_fast_ns();
> >>>>
> >>>> This should be reading the hardware timestamp in the SKB.
> >>>>
> >>>> Details: This hardware timestamp in the SKB is located in
> >>>> skb_shared_info area, which is also available for xdp_frame (currently
> >>>> used for multi-buffer purposes).  Thus, when adding xdp-hints "store"
> >>>> functionality, it would be natural to store the HW TS in the same place.
> >>>> Making the veth skb/xdp_frame code paths able to share code.
> >>>
> >>> Does something like the following look acceptable as well?
> >>>
> >>> *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp;
>
> If it is to test the kfunc and ensure veth_xdp_rx_timestamp is called, this
> alone should be enough. skb_hwtstamps(_ctx->skb)->hwtstamp should be 0 if
> hwtstamp is unavailable?  The test can initialize the 'u64 *timestamp' arg to
> non-zero first.  If it is not good enough, an fentry tracing can be done to
> veth_xdp_rx_timestamp to ensure it is called also.  There is also fmod_ret that
> could change the return value but the timestamp is not the return value though.
>
> If the above is not enough, another direction of thought could be doing it
> through bpf_prog_test_run_xdp() but it will need a way to initialize the
> veth_xdp_buff.
>
> That said, overall, I don't think it is a good idea to bend the
> veth_xdp_rx_timestamp kfunc too much only for testing purpose unless there is no
> other way out.

Oh, good point about just making sure veth_xdp_rx_timestamp returns
timestamp=0. That should be enough, thanks!
diff mbox series

Patch

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 04ffd8cb2945..d5491e7a2798 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -118,6 +118,7 @@  static struct {
 
 struct veth_xdp_buff {
 	struct xdp_buff xdp;
+	struct sk_buff *skb;
 };
 
 static int veth_get_link_ksettings(struct net_device *dev,
@@ -602,6 +603,7 @@  static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
 
 		xdp_convert_frame_to_buff(frame, xdp);
 		xdp->rxq = &rq->xdp_rxq;
+		vxbuf.skb = NULL;
 
 		act = bpf_prog_run_xdp(xdp_prog, xdp);
 
@@ -823,6 +825,7 @@  static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 	__skb_push(skb, skb->data - skb_mac_header(skb));
 	if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
 		goto drop;
+	vxbuf.skb = skb;
 
 	orig_data = xdp->data;
 	orig_data_end = xdp->data_end;
@@ -1601,6 +1604,21 @@  static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	}
 }
 
+static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
+{
+	*timestamp = ktime_get_mono_fast_ns();
+	return 0;
+}
+
+static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
+{
+	struct veth_xdp_buff *_ctx = (void *)ctx;
+
+	if (_ctx->skb)
+		*hash = skb_get_hash(_ctx->skb);
+	return 0;
+}
+
 static const struct net_device_ops veth_netdev_ops = {
 	.ndo_init            = veth_dev_init,
 	.ndo_open            = veth_open,
@@ -1622,6 +1640,11 @@  static const struct net_device_ops veth_netdev_ops = {
 	.ndo_get_peer_dev	= veth_peer_dev,
 };
 
+static const struct xdp_metadata_ops veth_xdp_metadata_ops = {
+	.xmo_rx_timestamp		= veth_xdp_rx_timestamp,
+	.xmo_rx_hash			= veth_xdp_rx_hash,
+};
+
 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
 		       NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
 		       NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
@@ -1638,6 +1661,7 @@  static void veth_setup(struct net_device *dev)
 	dev->priv_flags |= IFF_PHONY_HEADROOM;
 
 	dev->netdev_ops = &veth_netdev_ops;
+	dev->xdp_metadata_ops = &veth_xdp_metadata_ops;
 	dev->ethtool_ops = &veth_ethtool_ops;
 	dev->features |= NETIF_F_LLTX;
 	dev->features |= VETH_FEATURES;