diff mbox series

[RFCv2,bpf-next,04/18] net: create xdp_hints_common and set functions

Message ID 166256552083.1434226.577215984964402996.stgit@firesoul (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series XDP-hints: XDP gaining access to HW offload hints via BTF | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 pending Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain }}
bpf/vmtest-bpf-next-VM_Test-2 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-6 success Logs for set-matrix
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count fail Series longer than 15 patches (and no cover letter)
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 4383 this patch: 4386
netdev/cc_maintainers warning 7 maintainers not CCed: edumazet@google.com john.fastabend@gmail.com pabeni@redhat.com daniel@iogearbox.net kuba@kernel.org hawk@kernel.org davem@davemloft.net
netdev/build_clang success Errors and warnings before: 1069 this patch: 1069
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 4573 this patch: 4576
netdev/checkpatch warning CHECK: Lines should not end with a '(' CHECK: Please don't use multiple blank lines WARNING: Prefer __aligned(4) over __attribute__((aligned(4)))
netdev/kdoc fail Errors and warnings before: 0 this patch: 23
netdev/source_inline success Was 0 now: 0

Commit Message

Jesper Dangaard Brouer Sept. 7, 2022, 3:45 p.m. UTC
XDP-hints via BTF are about giving drivers the ability to extend the
common set of hardware offload hints in a flexible way.

This patch start out with defining the common set, based on what is
used available in the SKB. Having this as a common struct in core
vmlinux makes it easier to implement xdp_frame to SKB conversion
routines as normal C-code, see later patches.

Drivers can redefine the layout of the entire metadata area, but are
encouraged to use this common struct as the base, on which they can
extend on top for their extra hardware offload hints. When doing so,
drivers can mark the xdp_buff (and xdp_frame) with flags indicating
this it compatible with the common struct.

Patch also provides XDP-hints driver helper functions for updating the
common struct. Helpers gets inlined and are defined for maximum
performance, which does require some extra care in drivers, e.g. to
keep track of flags to reduce data dependencies, see code DOC.

Userspace and BPF-prog's MUST not consider the common struct UAPI.
The common struct (and enum flags) are only exposed via BTF, which
implies consumers must read and decode this BTF before using/consuming
data layout.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/net/xdp.h |  147 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/xdp.c    |    5 ++
 2 files changed, 152 insertions(+)

Comments

Burakov, Anatoly Sept. 9, 2022, 10:49 a.m. UTC | #1
On 07-Sep-22 4:45 PM, Jesper Dangaard Brouer wrote:
> XDP-hints via BTF are about giving drivers the ability to extend the
> common set of hardware offload hints in a flexible way.
> 
> This patch start out with defining the common set, based on what is
> used available in the SKB. Having this as a common struct in core
> vmlinux makes it easier to implement xdp_frame to SKB conversion
> routines as normal C-code, see later patches.
> 
> Drivers can redefine the layout of the entire metadata area, but are
> encouraged to use this common struct as the base, on which they can
> extend on top for their extra hardware offload hints. When doing so,
> drivers can mark the xdp_buff (and xdp_frame) with flags indicating
> this it compatible with the common struct.
> 
> Patch also provides XDP-hints driver helper functions for updating the
> common struct. Helpers gets inlined and are defined for maximum
> performance, which does require some extra care in drivers, e.g. to
> keep track of flags to reduce data dependencies, see code DOC.
> 
> Userspace and BPF-prog's MUST not consider the common struct UAPI.
> The common struct (and enum flags) are only exposed via BTF, which
> implies consumers must read and decode this BTF before using/consuming
> data layout.
> 
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
> ---
>   include/net/xdp.h |  147 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   net/core/xdp.c    |    5 ++
>   2 files changed, 152 insertions(+)
> 
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 04c852c7a77f..ea5836ccee82 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -8,6 +8,151 @@
>   
>   #include <linux/skbuff.h> /* skb_shared_info */
>   
> +/**
> + * struct xdp_hints_common - Common XDP-hints offloads shared with netstack
> + * @btf_full_id: The modules BTF object + type ID for specific struct
> + * @vlan_tci: Hardware provided VLAN tag + proto type in @xdp_hints_flags
> + * @rx_hash32: Hardware provided RSS hash value
> + * @xdp_hints_flags: see &enum xdp_hints_flags
> + *
> + * This structure contains the most commonly used hardware offloads hints
> + * provided by NIC drivers and supported by the SKB.
> + *
> + * Driver are expected to extend this structure by include &struct
> + * xdp_hints_common as part of the drivers own specific xdp_hints struct's, but
> + * at the end-of their struct given XDP metadata area grows backwards.
> + *
> + * The member @btf_full_id is populated by driver modules to uniquely identify
> + * the BTF struct.  The high 32-bits store the modules BTF object ID and the
> + * lower 32-bit the BTF type ID within that BTF object.
> + */
> +struct xdp_hints_common {
> +	union {
> +		__wsum		csum;
> +		struct {
> +			__u16	csum_start;
> +			__u16	csum_offset;
> +		};
> +	};
> +	u16 rx_queue;
> +	u16 vlan_tci;
> +	u32 rx_hash32;
> +	u32 xdp_hints_flags;
> +	u64 btf_full_id; /* BTF object + type ID */
> +} __attribute__((aligned(4))) __attribute__((packed));

I'm assuming any Tx metadata will have to go before the Rx checksum union?

> +
> +
> +/**
> + * enum xdp_hints_flags - flags used by &struct xdp_hints_common
> + *
> + * The &enum xdp_hints_flags have reserved the first 16 bits for common flags
> + * and drivers can introduce use their own flags bits from BIT(16). For
> + * BPF-progs to find these flags (via BTF) drivers should define an enum
> + * xdp_hints_flags_driver.
> + */
> +enum xdp_hints_flags {
> +	HINT_FLAG_CSUM_TYPE_BIT0  = BIT(0),
> +	HINT_FLAG_CSUM_TYPE_BIT1  = BIT(1),
> +	HINT_FLAG_CSUM_TYPE_MASK  = 0x3,
> +
> +	HINT_FLAG_CSUM_LEVEL_BIT0 = BIT(2),
> +	HINT_FLAG_CSUM_LEVEL_BIT1 = BIT(3),
> +	HINT_FLAG_CSUM_LEVEL_MASK = 0xC,
> +	HINT_FLAG_CSUM_LEVEL_SHIFT = 2,
> +
> +	HINT_FLAG_RX_HASH_TYPE_BIT0 = BIT(4),
> +	HINT_FLAG_RX_HASH_TYPE_BIT1 = BIT(5),
> +	HINT_FLAG_RX_HASH_TYPE_MASK = 0x30,
> +	HINT_FLAG_RX_HASH_TYPE_SHIFT = 0x4,
> +
> +	HINT_FLAG_RX_QUEUE = BIT(7),
> +
> +	HINT_FLAG_VLAN_PRESENT            = BIT(8),
> +	HINT_FLAG_VLAN_PROTO_ETH_P_8021Q  = BIT(9),
> +	HINT_FLAG_VLAN_PROTO_ETH_P_8021AD = BIT(10),
> +	/* Flags from BIT(16) can be used by drivers */

If we assumed we also have Tx section, would 16 bits be enough? For a 
basic implementation of UDP checksumming, AF_XDP would need 3x16 more 
bits (to store L2/L3/L4 offsets) plus probably a flag field indicating 
presence of each. Is there any way to expand common fields in the future 
(or is it at all intended to be expandable)?
Jesper Dangaard Brouer Sept. 9, 2022, 2:13 p.m. UTC | #2
On 09/09/2022 12.49, Burakov, Anatoly wrote:
> On 07-Sep-22 4:45 PM, Jesper Dangaard Brouer wrote:
>> XDP-hints via BTF are about giving drivers the ability to extend the
>> common set of hardware offload hints in a flexible way.
>>
>> This patch start out with defining the common set, based on what is
>> used available in the SKB. Having this as a common struct in core
>> vmlinux makes it easier to implement xdp_frame to SKB conversion
>> routines as normal C-code, see later patches.
>>
>> Drivers can redefine the layout of the entire metadata area, but are
>> encouraged to use this common struct as the base, on which they can
>> extend on top for their extra hardware offload hints. When doing so,
>> drivers can mark the xdp_buff (and xdp_frame) with flags indicating
>> this it compatible with the common struct.
>>
>> Patch also provides XDP-hints driver helper functions for updating the
>> common struct. Helpers gets inlined and are defined for maximum
>> performance, which does require some extra care in drivers, e.g. to
>> keep track of flags to reduce data dependencies, see code DOC.
>>
>> Userspace and BPF-prog's MUST not consider the common struct UAPI.
>> The common struct (and enum flags) are only exposed via BTF, which
>> implies consumers must read and decode this BTF before using/consuming
>> data layout.
>>
>> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
>> ---
>>   include/net/xdp.h |  147 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   net/core/xdp.c    |    5 ++
>>   2 files changed, 152 insertions(+)
>>
>> diff --git a/include/net/xdp.h b/include/net/xdp.h
>> index 04c852c7a77f..ea5836ccee82 100644
>> --- a/include/net/xdp.h
>> +++ b/include/net/xdp.h
>> @@ -8,6 +8,151 @@
>>   #include <linux/skbuff.h> /* skb_shared_info */
>> +/**
>> + * struct xdp_hints_common - Common XDP-hints offloads shared with 
>> netstack
>> + * @btf_full_id: The modules BTF object + type ID for specific struct
>> + * @vlan_tci: Hardware provided VLAN tag + proto type in 
>> @xdp_hints_flags
>> + * @rx_hash32: Hardware provided RSS hash value
>> + * @xdp_hints_flags: see &enum xdp_hints_flags
>> + *
>> + * This structure contains the most commonly used hardware offloads 
>> hints
>> + * provided by NIC drivers and supported by the SKB.
>> + *
>> + * Driver are expected to extend this structure by include &struct
>> + * xdp_hints_common as part of the drivers own specific xdp_hints 
>> struct's, but
>> + * at the end-of their struct given XDP metadata area grows backwards.
>> + *
>> + * The member @btf_full_id is populated by driver modules to uniquely 
>> identify
>> + * the BTF struct.  The high 32-bits store the modules BTF object ID 
>> and the
>> + * lower 32-bit the BTF type ID within that BTF object.
>> + */
>> +struct xdp_hints_common {
>> +    union {
>> +        __wsum        csum;
>> +        struct {
>> +            __u16    csum_start;
>> +            __u16    csum_offset;
>> +        };
>> +    };
>> +    u16 rx_queue;
>> +    u16 vlan_tci;
>> +    u32 rx_hash32;
>> +    u32 xdp_hints_flags;
>> +    u64 btf_full_id; /* BTF object + type ID */
>> +} __attribute__((aligned(4))) __attribute__((packed));
> 
> I'm assuming any Tx metadata will have to go before the Rx checksum union?
> 

Nope.  The plan is that the TX metadata can reuse the same metadata area
with its own layout.  I imagine a new xdp_buff->flags bit that tell us
the layout is now TX-layout with xdp_hints_common_tx.

We could rename xdp_hints_common to xdp_hints_common_rx to anticipate
and prepare for this. But that would be getting a head of ourselves,
because someone in the community might have a smarter solution, e.g.
that could combine common RX and TX in a single struct. e.g. overlapping
csum and vlan_tci might make sense.

>> +
>> +
>> +/**
>> + * enum xdp_hints_flags - flags used by &struct xdp_hints_common
>> + *
>> + * The &enum xdp_hints_flags have reserved the first 16 bits for 
>> common flags
>> + * and drivers can introduce use their own flags bits from BIT(16). For
>> + * BPF-progs to find these flags (via BTF) drivers should define an enum
>> + * xdp_hints_flags_driver.
>> + */
>> +enum xdp_hints_flags {
>> +    HINT_FLAG_CSUM_TYPE_BIT0  = BIT(0),
>> +    HINT_FLAG_CSUM_TYPE_BIT1  = BIT(1),
>> +    HINT_FLAG_CSUM_TYPE_MASK  = 0x3,
>> +
>> +    HINT_FLAG_CSUM_LEVEL_BIT0 = BIT(2),
>> +    HINT_FLAG_CSUM_LEVEL_BIT1 = BIT(3),
>> +    HINT_FLAG_CSUM_LEVEL_MASK = 0xC,
>> +    HINT_FLAG_CSUM_LEVEL_SHIFT = 2,
>> +
>> +    HINT_FLAG_RX_HASH_TYPE_BIT0 = BIT(4),
>> +    HINT_FLAG_RX_HASH_TYPE_BIT1 = BIT(5),
>> +    HINT_FLAG_RX_HASH_TYPE_MASK = 0x30,
>> +    HINT_FLAG_RX_HASH_TYPE_SHIFT = 0x4,
>> +
>> +    HINT_FLAG_RX_QUEUE = BIT(7),
>> +
>> +    HINT_FLAG_VLAN_PRESENT            = BIT(8),
>> +    HINT_FLAG_VLAN_PROTO_ETH_P_8021Q  = BIT(9),
>> +    HINT_FLAG_VLAN_PROTO_ETH_P_8021AD = BIT(10),
>> +    /* Flags from BIT(16) can be used by drivers */
> 
> If we assumed we also have Tx section, would 16 bits be enough? For a 
> basic implementation of UDP checksumming, AF_XDP would need 3x16 more 
> bits (to store L2/L3/L4 offsets) plus probably a flag field indicating 
> presence of each. Is there any way to expand common fields in the future 
> (or is it at all intended to be expandable)?
> 

As above we could have separate flags for TX side, e.g.
xdp_hints_flags_tx.  But some of the flags might still be valid for
TX-side, so they could potentially share some.

BUT it is also important to realize that I'm saying this is not UAPI
flags being exposed (like in include/uapi/bpf.h).  The runtime value of
these enum defined flags MUST be obtained via BTF (through help of
libbpf CO-RE or in userspace by parsing BTF).

Thus, in principle the kernel is free to change these structs and enums.
In practice it will be very annoying for BPF-progs and AF_XDP userspace
code if we change the names of the struct's and somewhat annoying if
members change name.  CO-RE can deal with kernel changes and feature
detection[1] down to the avail enums e.g. via using
bpf_core_enum_value_exists().  But we should avoid too many changes as
the code becomes harder to read.

--Jesper

[1] 
https://nakryiko.com/posts/bpf-core-reference-guide/#bpf-core-enum-value-exists
diff mbox series

Patch

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 04c852c7a77f..ea5836ccee82 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -8,6 +8,151 @@ 
 
 #include <linux/skbuff.h> /* skb_shared_info */
 
+/**
+ * struct xdp_hints_common - Common XDP-hints offloads shared with netstack
+ * @btf_full_id: The modules BTF object + type ID for specific struct
+ * @vlan_tci: Hardware provided VLAN tag + proto type in @xdp_hints_flags
+ * @rx_hash32: Hardware provided RSS hash value
+ * @xdp_hints_flags: see &enum xdp_hints_flags
+ *
+ * This structure contains the most commonly used hardware offloads hints
+ * provided by NIC drivers and supported by the SKB.
+ *
+ * Driver are expected to extend this structure by include &struct
+ * xdp_hints_common as part of the drivers own specific xdp_hints struct's, but
+ * at the end-of their struct given XDP metadata area grows backwards.
+ *
+ * The member @btf_full_id is populated by driver modules to uniquely identify
+ * the BTF struct.  The high 32-bits store the modules BTF object ID and the
+ * lower 32-bit the BTF type ID within that BTF object.
+ */
+struct xdp_hints_common {
+	union {
+		__wsum		csum;
+		struct {
+			__u16	csum_start;
+			__u16	csum_offset;
+		};
+	};
+	u16 rx_queue;
+	u16 vlan_tci;
+	u32 rx_hash32;
+	u32 xdp_hints_flags;
+	u64 btf_full_id; /* BTF object + type ID */
+} __attribute__((aligned(4))) __attribute__((packed));
+
+
+/**
+ * enum xdp_hints_flags - flags used by &struct xdp_hints_common
+ *
+ * The &enum xdp_hints_flags have reserved the first 16 bits for common flags
+ * and drivers can introduce use their own flags bits from BIT(16). For
+ * BPF-progs to find these flags (via BTF) drivers should define an enum
+ * xdp_hints_flags_driver.
+ */
+enum xdp_hints_flags {
+	HINT_FLAG_CSUM_TYPE_BIT0  = BIT(0),
+	HINT_FLAG_CSUM_TYPE_BIT1  = BIT(1),
+	HINT_FLAG_CSUM_TYPE_MASK  = 0x3,
+
+	HINT_FLAG_CSUM_LEVEL_BIT0 = BIT(2),
+	HINT_FLAG_CSUM_LEVEL_BIT1 = BIT(3),
+	HINT_FLAG_CSUM_LEVEL_MASK = 0xC,
+	HINT_FLAG_CSUM_LEVEL_SHIFT = 2,
+
+	HINT_FLAG_RX_HASH_TYPE_BIT0 = BIT(4),
+	HINT_FLAG_RX_HASH_TYPE_BIT1 = BIT(5),
+	HINT_FLAG_RX_HASH_TYPE_MASK = 0x30,
+	HINT_FLAG_RX_HASH_TYPE_SHIFT = 0x4,
+
+	HINT_FLAG_RX_QUEUE = BIT(7),
+
+	HINT_FLAG_VLAN_PRESENT            = BIT(8),
+	HINT_FLAG_VLAN_PROTO_ETH_P_8021Q  = BIT(9),
+	HINT_FLAG_VLAN_PROTO_ETH_P_8021AD = BIT(10),
+	/* Flags from BIT(16) can be used by drivers */
+};
+
+/**
+ * enum xdp_hints_csum_type - BTF exposing checksum defines
+ *
+ * This enum is primarily for BTF exposing ``CHECKSUM_*`` defines (as an enum)
+ * used by &struct skb->ip_summed (see Documentation/networking/skbuff.rst
+ * section "Checksum information").
+ *
+ * These values are stored in &enum xdp_hints_flags as bit locations
+ * ``HINT_FLAG_CSUM_TYPE_BIT*``
+ */
+enum xdp_hints_csum_type {
+	HINT_CHECKSUM_NONE        = CHECKSUM_NONE,
+	HINT_CHECKSUM_UNNECESSARY = CHECKSUM_UNNECESSARY,
+	HINT_CHECKSUM_COMPLETE    = CHECKSUM_COMPLETE,
+	HINT_CHECKSUM_PARTIAL     = CHECKSUM_PARTIAL,
+};
+
+/** DOC: XDP hints driver helpers
+ *
+ * Helpers for drivers updating struct xdp_hints_common.
+ *
+ * Avoid creating a data dependency on xdp_hints_flags via returning the flags
+ * that need to be set.  Drivers MUST update the xdp_hints_flags member
+ * themselves, which allows drivers to construct code with less data dependency
+ * between instructions by OR'ing the final flags together.
+ */
+
+/* Drivers please use this simple helper to ease changes across drives */
+static __always_inline void xdp_hints_set_flags(struct xdp_hints_common *hints,
+						u32 flags)
+{
+	hints->xdp_hints_flags = flags;
+}
+
+static __always_inline u32 xdp_hints_set_rx_csum(
+	struct xdp_hints_common *hints,
+	u16 type, u16 level)
+{
+	u32 flags;
+
+	flags = type & HINT_FLAG_CSUM_TYPE_MASK;
+	flags |= (level << HINT_FLAG_CSUM_LEVEL_SHIFT)
+		& HINT_FLAG_CSUM_LEVEL_MASK;
+
+	// TODO: handle CHECKSUM_PARTIAL and COMPLETE (needs updating *hints)
+	return flags;
+}
+
+/* @type	Must be &enum enum pkt_hash_types (PKT_HASH_TYPE_*) */
+static __always_inline u32 xdp_hints_set_rx_hash(
+	struct xdp_hints_common *hints,
+	u32 hash, u32 type)
+{
+	hints->rx_hash32 = hash;
+	return (type << HINT_FLAG_RX_HASH_TYPE_SHIFT) &
+		HINT_FLAG_RX_HASH_TYPE_MASK;
+}
+
+static __always_inline u32 xdp_hints_set_rxq(struct xdp_hints_common *hints,
+					     u16 q_idx)
+{
+	hints->rx_queue = q_idx;
+	return HINT_FLAG_RX_QUEUE;
+}
+
+/* @proto	Must be ETH_P_8021Q or ETH_P_8021AD in network order */
+static __always_inline u32 xdp_hints_set_vlan(struct xdp_hints_common *hints,
+					      u16 vlan_tag, const u16 proto)
+{
+	u32 flags = HINT_FLAG_VLAN_PRESENT;
+
+	hints->vlan_tci = vlan_tag;
+	if (proto == htons(ETH_P_8021Q))
+		flags |= HINT_FLAG_VLAN_PROTO_ETH_P_8021Q;
+	if (proto == htons(ETH_P_8021AD))
+		flags |= HINT_FLAG_VLAN_PROTO_ETH_P_8021AD;
+
+	return flags;
+}
+
 /**
  * DOC: XDP RX-queue information
  *
@@ -72,6 +217,8 @@  enum xdp_buff_flags {
 	XDP_FLAGS_FRAGS_PF_MEMALLOC	= BIT(1), /* xdp paged memory is under
 						   * pressure
 						   */
+	XDP_FLAGS_HAS_HINTS		= BIT(2),
+	XDP_FLAGS_HINTS_COMPAT_COMMON	= BIT(3),
 };
 
 struct xdp_buff {
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 24420209bf0e..a57bd5278b47 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -33,6 +33,11 @@  static int mem_id_next = MEM_ID_MIN;
 static bool mem_id_init; /* false */
 static struct rhashtable *mem_id_ht;
 
+/* Make xdp_hints part of core vmlinux BTF */
+struct xdp_hints_common  xdp_hints_common;
+enum xdp_hints_flags     xdp_hints_flags;
+enum xdp_hints_csum_type xdp_hints_csum_type;
+
 static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
 {
 	const u32 *k = data;