diff mbox series

[net-next,v9,7/9] libeth: add Rx buffer management

Message ID 20240404154402.3581254-8-aleksander.lobakin@intel.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: intel: start The Great Code Dedup + Page Pool for iavf | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 943 this patch: 945
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 2 maintainers not CCed: jesse.brandeburg@intel.com anthony.l.nguyen@intel.com
netdev/build_clang success Errors and warnings before: 954 this patch: 954
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 954 this patch: 956
netdev/checkpatch warning CHECK: Alignment should match open parenthesis
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc fail Errors and warnings before: 0 this patch: 1
netdev/source_inline success Was 0 now: 0

Commit Message

Alexander Lobakin April 4, 2024, 3:44 p.m. UTC
Add a couple intuitive helpers to hide Rx buffer implementation details
in the library and not multiplicate it between drivers. The settings are
sorta optimized for 100G+ NICs, but nothing really HW-specific here.
Use the new page_pool_dev_alloc() to dynamically switch between
split-page and full-page modes depending on MTU, page size, required
headroom etc. For example, on x86_64 with the default driver settings
each page is shared between 2 buffers. Turning on XDP (not in this
series) -> increasing headroom requirement pushes truesize out of 2048
boundary, leading to that each buffer starts getting a full page.
The "ceiling" limit is %PAGE_SIZE, as only order-0 pages are used to
avoid compound overhead. For the above architecture, this means maximum
linear frame size of 3712 w/o XDP.
Not that &libeth_buf_queue is not a complete queue/ring structure for
now, rather a shim, but eventually the libeth-enabled drivers will move
to it, with iavf being the first one.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
---
 drivers/net/ethernet/intel/libeth/Kconfig |   1 +
 include/net/libeth/rx.h                   | 117 ++++++++++++++++++++++
 drivers/net/ethernet/intel/libeth/rx.c    |  98 ++++++++++++++++++
 3 files changed, 216 insertions(+)

Comments

Przemek Kitszel April 5, 2024, 10:32 a.m. UTC | #1
On 4/4/24 17:44, Alexander Lobakin wrote:
> Add a couple intuitive helpers to hide Rx buffer implementation details
> in the library and not multiplicate it between drivers. The settings are
> sorta optimized for 100G+ NICs, but nothing really HW-specific here.
> Use the new page_pool_dev_alloc() to dynamically switch between
> split-page and full-page modes depending on MTU, page size, required
> headroom etc. For example, on x86_64 with the default driver settings
> each page is shared between 2 buffers. Turning on XDP (not in this
> series) -> increasing headroom requirement pushes truesize out of 2048
> boundary, leading to that each buffer starts getting a full page.
> The "ceiling" limit is %PAGE_SIZE, as only order-0 pages are used to
> avoid compound overhead. For the above architecture, this means maximum
> linear frame size of 3712 w/o XDP.
> Not that &libeth_buf_queue is not a complete queue/ring structure for
> now, rather a shim, but eventually the libeth-enabled drivers will move
> to it, with iavf being the first one.
> 
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
> ---
>   drivers/net/ethernet/intel/libeth/Kconfig |   1 +
>   include/net/libeth/rx.h                   | 117 ++++++++++++++++++++++
>   drivers/net/ethernet/intel/libeth/rx.c    |  98 ++++++++++++++++++
>   3 files changed, 216 insertions(+)
>
[...]

> +/**
> + * struct libeth_fqe - structure representing an Rx buffer
> + * @page: page holding the buffer
> + * @offset: offset from the page start (to the headroom)
> + * @truesize: total space occupied by the buffer (w/ headroom and tailroom)
> + *
> + * Depending on the MTU, API switches between one-page-per-frame and shared
> + * page model (to conserve memory on bigger-page platforms). In case of the
> + * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
> + */
> +struct libeth_fqe {
> +	struct page		*page;
> +	u32			offset;
> +	u32			truesize;
> +} __aligned_largest;
> +
> +/**
> + * struct libeth_fq - structure representing a buffer queue
> + * @fp: hotpath part of the structure
> + * @pp: &page_pool for buffer management
> + * @fqes: array of Rx buffers
> + * @truesize: size to allocate per buffer, w/overhead
> + * @count: number of descriptors/buffers the queue has
> + * @buf_len: HW-writeable length per each buffer
> + * @nid: ID of the closest NUMA node with memory
> + */
> +struct libeth_fq {
> +	struct_group_tagged(libeth_fq_fp, fp,
> +		struct page_pool	*pp;
> +		struct libeth_fqe	*fqes;
> +
> +		u32			truesize;
> +		u32			count;
> +	);
> +
> +	/* Cold fields */
> +	u32			buf_len;
> +	int			nid;
> +};

[...]

Could you please unpack the meaning of `fq` and `fqe` acronyms here?

otherwise the whole series is very good for me, thank you very much!
Jakub Kicinski April 6, 2024, 4:25 a.m. UTC | #2
On Thu,  4 Apr 2024 17:44:00 +0200 Alexander Lobakin wrote:
> +/**
> + * struct libeth_fq - structure representing a buffer queue
> + * @fp: hotpath part of the structure

Second time this happens this week, so maybe some tooling change in 6.9
but apparently kdoc does not want to know about the tagged struct:

include/net/libeth/rx.h:69: warning: Excess struct member 'fp' description in 'libeth_fq'

> + * @pp: &page_pool for buffer management
> + * @fqes: array of Rx buffers
> + * @truesize: size to allocate per buffer, w/overhead
> + * @count: number of descriptors/buffers the queue has
> + * @buf_len: HW-writeable length per each buffer
> + * @nid: ID of the closest NUMA node with memory
> + */
> +struct libeth_fq {
> +	struct_group_tagged(libeth_fq_fp, fp,
> +		struct page_pool	*pp;
> +		struct libeth_fqe	*fqes;
> +
> +		u32			truesize;
> +		u32			count;
> +	);
> +
> +	/* Cold fields */
> +	u32			buf_len;
> +	int			nid;
> +};
Alexander Lobakin April 8, 2024, 9:09 a.m. UTC | #3
From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Date: Fri, 5 Apr 2024 12:32:55 +0200

> On 4/4/24 17:44, Alexander Lobakin wrote:
>> Add a couple intuitive helpers to hide Rx buffer implementation details

[...]

>> +struct libeth_fqe {
>> +    struct page        *page;
>> +    u32            offset;
>> +    u32            truesize;
>> +} __aligned_largest;
>> +
>> +/**
>> + * struct libeth_fq - structure representing a buffer queue
>> + * @fp: hotpath part of the structure
>> + * @pp: &page_pool for buffer management
>> + * @fqes: array of Rx buffers
>> + * @truesize: size to allocate per buffer, w/overhead
>> + * @count: number of descriptors/buffers the queue has
>> + * @buf_len: HW-writeable length per each buffer
>> + * @nid: ID of the closest NUMA node with memory
>> + */
>> +struct libeth_fq {
>> +    struct_group_tagged(libeth_fq_fp, fp,
>> +        struct page_pool    *pp;
>> +        struct libeth_fqe    *fqes;
>> +
>> +        u32            truesize;
>> +        u32            count;
>> +    );
>> +
>> +    /* Cold fields */
>> +    u32            buf_len;
>> +    int            nid;
>> +};
> 
> [...]
> 
> Could you please unpack the meaning of `fq` and `fqe` acronyms here?

Rx:

RQ -- receive queue, on which you get Rx DMA complete descriptors
FQ -- fill queue, the one you fill with free buffers
  FQE -- fill queue element, i.e. smth like "iavf_rx_buffer" or whatever

Tx:

SQ -- send queue, the one you fill with buffers to transmit
  SQE -- send queue element, i.e. "iavf_tx_buffer"
CQ -- completion queue, on which you get Tx DMA complete descriptors

XDPSQ, XSkRQ etc. -- same as above, but for XDP / XSk

I know that rxq, txq, bufq, complq is more common since it's been used
for years, but I like these "new" ones more :>

> 
> otherwise the whole series is very good for me, thank you very much!
> 

Thanks,
Olek
Alexander Lobakin April 8, 2024, 9:11 a.m. UTC | #4
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 5 Apr 2024 21:25:13 -0700

> On Thu,  4 Apr 2024 17:44:00 +0200 Alexander Lobakin wrote:
>> +/**
>> + * struct libeth_fq - structure representing a buffer queue
>> + * @fp: hotpath part of the structure
> 
> Second time this happens this week, so maybe some tooling change in 6.9
> but apparently kdoc does not want to know about the tagged struct:
> 
> include/net/libeth/rx.h:69: warning: Excess struct member 'fp' description in 'libeth_fq'

Oh no, maybe we should teach kdoc to parse struct_group*()?

> 
>> + * @pp: &page_pool for buffer management
>> + * @fqes: array of Rx buffers
>> + * @truesize: size to allocate per buffer, w/overhead
>> + * @count: number of descriptors/buffers the queue has
>> + * @buf_len: HW-writeable length per each buffer
>> + * @nid: ID of the closest NUMA node with memory
>> + */
>> +struct libeth_fq {
>> +	struct_group_tagged(libeth_fq_fp, fp,
>> +		struct page_pool	*pp;
>> +		struct libeth_fqe	*fqes;
>> +
>> +		u32			truesize;
>> +		u32			count;
>> +	);
>> +
>> +	/* Cold fields */
>> +	u32			buf_len;
>> +	int			nid;
>> +};

Thanks,
Olek
Alexander Lobakin April 8, 2024, 9:45 a.m. UTC | #5
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Mon, 8 Apr 2024 11:11:12 +0200

> From: Jakub Kicinski <kuba@kernel.org>
> Date: Fri, 5 Apr 2024 21:25:13 -0700
> 
>> On Thu,  4 Apr 2024 17:44:00 +0200 Alexander Lobakin wrote:
>>> +/**
>>> + * struct libeth_fq - structure representing a buffer queue
>>> + * @fp: hotpath part of the structure
>>
>> Second time this happens this week, so maybe some tooling change in 6.9
>> but apparently kdoc does not want to know about the tagged struct:
>>
>> include/net/libeth/rx.h:69: warning: Excess struct member 'fp' description in 'libeth_fq'
> 
> Oh no, maybe we should teach kdoc to parse struct_group*()?

scripts/kernel-doc apparently can handle them...

+ Kees

> 
>>
>>> + * @pp: &page_pool for buffer management
>>> + * @fqes: array of Rx buffers
>>> + * @truesize: size to allocate per buffer, w/overhead
>>> + * @count: number of descriptors/buffers the queue has
>>> + * @buf_len: HW-writeable length per each buffer
>>> + * @nid: ID of the closest NUMA node with memory
>>> + */
>>> +struct libeth_fq {
>>> +	struct_group_tagged(libeth_fq_fp, fp,
>>> +		struct page_pool	*pp;
>>> +		struct libeth_fqe	*fqes;
>>> +
>>> +		u32			truesize;
>>> +		u32			count;
>>> +	);
>>> +
>>> +	/* Cold fields */
>>> +	u32			buf_len;
>>> +	int			nid;
>>> +};

Thanks,
Olek
Przemek Kitszel April 9, 2024, 10:58 a.m. UTC | #6
On 4/8/24 11:09, Alexander Lobakin wrote:
> From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
> Date: Fri, 5 Apr 2024 12:32:55 +0200
> 
>> On 4/4/24 17:44, Alexander Lobakin wrote:
>>> Add a couple intuitive helpers to hide Rx buffer implementation details
> 
> [...]
> 
>>> +struct libeth_fqe {
>>> +    struct page        *page;
>>> +    u32            offset;
>>> +    u32            truesize;
>>> +} __aligned_largest;
>>> +
>>> +/**
>>> + * struct libeth_fq - structure representing a buffer queue
>>> + * @fp: hotpath part of the structure
>>> + * @pp: &page_pool for buffer management
>>> + * @fqes: array of Rx buffers
>>> + * @truesize: size to allocate per buffer, w/overhead
>>> + * @count: number of descriptors/buffers the queue has
>>> + * @buf_len: HW-writeable length per each buffer
>>> + * @nid: ID of the closest NUMA node with memory
>>> + */
>>> +struct libeth_fq {
>>> +    struct_group_tagged(libeth_fq_fp, fp,
>>> +        struct page_pool    *pp;
>>> +        struct libeth_fqe    *fqes;
>>> +
>>> +        u32            truesize;
>>> +        u32            count;
>>> +    );
>>> +
>>> +    /* Cold fields */
>>> +    u32            buf_len;
>>> +    int            nid;
>>> +};
>>
>> [...]
>>
>> Could you please unpack the meaning of `fq` and `fqe` acronyms here?
> 
> Rx:
> 
> RQ -- receive queue, on which you get Rx DMA complete descriptors
> FQ -- fill queue, the one you fill with free buffers
>    FQE -- fill queue element, i.e. smth like "iavf_rx_buffer" or whatever
> 
> Tx:
> 
> SQ -- send queue, the one you fill with buffers to transmit
>    SQE -- send queue element, i.e. "iavf_tx_buffer"
> CQ -- completion queue, on which you get Tx DMA complete descriptors
> 
> XDPSQ, XSkRQ etc. -- same as above, but for XDP / XSk
> 
> I know that rxq, txq, bufq, complq is more common since it's been used
> for years, but I like these "new" ones more :>
> 

Thank you, that sounds right. If you happen to sent v10, a bit of code
comment with this info would be useful ;)

>>
>> otherwise the whole series is very good for me, thank you very much!
>>
> 
> Thanks,
> Olek
Kees Cook April 9, 2024, 4:17 p.m. UTC | #7
On Mon, Apr 08, 2024 at 11:45:32AM +0200, Alexander Lobakin wrote:
> From: Alexander Lobakin <aleksander.lobakin@intel.com>
> Date: Mon, 8 Apr 2024 11:11:12 +0200
> 
> > From: Jakub Kicinski <kuba@kernel.org>
> > Date: Fri, 5 Apr 2024 21:25:13 -0700
> > 
> >> On Thu,  4 Apr 2024 17:44:00 +0200 Alexander Lobakin wrote:
> >>> +/**
> >>> + * struct libeth_fq - structure representing a buffer queue
> >>> + * @fp: hotpath part of the structure
> >>
> >> Second time this happens this week, so maybe some tooling change in 6.9
> >> but apparently kdoc does not want to know about the tagged struct:
> >>
> >> include/net/libeth/rx.h:69: warning: Excess struct member 'fp' description in 'libeth_fq'
> > 
> > Oh no, maybe we should teach kdoc to parse struct_group*()?
> 
> scripts/kernel-doc apparently can handle them...
> 
> + Kees
> 

Ah, hm, scripts/kernel-doc throws away the early arguments of
struct_group_tagged, but I suspect it needs to create a synthetic member
for the tag. i.e. instead of:

	struct_group_tagged(tag, name, members...)

becoming

	members...

it needs to become

	struct tag name;
	members...

It seems this is the first place anyone has tried to document the tagged
struct name! :)

Does this work? I haven't tested it...

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 967f1abb0edb..64a19228d5dd 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1151,7 +1151,8 @@ sub dump_struct($$) {
         # - first eat non-declaration parameters and rewrite for final match
         # - then remove macro, outer parens, and trailing semicolon
         $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos;
-        $members =~ s/\bstruct_group_(attr|tagged)\s*\(([^,]*,){2}/STRUCT_GROUP(/gos;
+        $members =~ s/\bstruct_group_attr\s*\(([^,]*,){2}/STRUCT_GROUP(/gos;
+        $members =~ s/\bstruct_group_tagged\s*\(([^,]*,)([^,]*,)/struct $1 $2; STRUCT_GROUP(/gos;
         $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos;
         $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos;
 

> > 
> >>
> >>> + * @pp: &page_pool for buffer management
> >>> + * @fqes: array of Rx buffers
> >>> + * @truesize: size to allocate per buffer, w/overhead
> >>> + * @count: number of descriptors/buffers the queue has
> >>> + * @buf_len: HW-writeable length per each buffer
> >>> + * @nid: ID of the closest NUMA node with memory
> >>> + */
> >>> +struct libeth_fq {
> >>> +	struct_group_tagged(libeth_fq_fp, fp,
> >>> +		struct page_pool	*pp;
> >>> +		struct libeth_fqe	*fqes;
> >>> +
> >>> +		u32			truesize;
> >>> +		u32			count;
> >>> +	);
> >>> +
> >>> +	/* Cold fields */
> >>> +	u32			buf_len;
> >>> +	int			nid;
> >>> +};
> 
> Thanks,
> Olek
Alexander Lobakin April 10, 2024, 11:49 a.m. UTC | #8
From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Date: Tue, 9 Apr 2024 12:58:33 +0200

> On 4/8/24 11:09, Alexander Lobakin wrote:
>> From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
>> Date: Fri, 5 Apr 2024 12:32:55 +0200
>>
>>> On 4/4/24 17:44, Alexander Lobakin wrote:
>>>> Add a couple intuitive helpers to hide Rx buffer implementation details
>>
>> [...]
>>
>>>> +struct libeth_fqe {
>>>> +    struct page        *page;
>>>> +    u32            offset;
>>>> +    u32            truesize;
>>>> +} __aligned_largest;
>>>> +
>>>> +/**
>>>> + * struct libeth_fq - structure representing a buffer queue
>>>> + * @fp: hotpath part of the structure
>>>> + * @pp: &page_pool for buffer management
>>>> + * @fqes: array of Rx buffers
>>>> + * @truesize: size to allocate per buffer, w/overhead
>>>> + * @count: number of descriptors/buffers the queue has
>>>> + * @buf_len: HW-writeable length per each buffer
>>>> + * @nid: ID of the closest NUMA node with memory
>>>> + */
>>>> +struct libeth_fq {
>>>> +    struct_group_tagged(libeth_fq_fp, fp,
>>>> +        struct page_pool    *pp;
>>>> +        struct libeth_fqe    *fqes;
>>>> +
>>>> +        u32            truesize;
>>>> +        u32            count;
>>>> +    );
>>>> +
>>>> +    /* Cold fields */
>>>> +    u32            buf_len;
>>>> +    int            nid;
>>>> +};
>>>
>>> [...]
>>>
>>> Could you please unpack the meaning of `fq` and `fqe` acronyms here?
>>
>> Rx:
>>
>> RQ -- receive queue, on which you get Rx DMA complete descriptors
>> FQ -- fill queue, the one you fill with free buffers
>>    FQE -- fill queue element, i.e. smth like "iavf_rx_buffer" or whatever
>>
>> Tx:
>>
>> SQ -- send queue, the one you fill with buffers to transmit
>>    SQE -- send queue element, i.e. "iavf_tx_buffer"
>> CQ -- completion queue, on which you get Tx DMA complete descriptors
>>
>> XDPSQ, XSkRQ etc. -- same as above, but for XDP / XSk
>>
>> I know that rxq, txq, bufq, complq is more common since it's been used
>> for years, but I like these "new" ones more :>
>>
> 
> Thank you, that sounds right. If you happen to sent v10, a bit of code
> comment with this info would be useful ;)

The current kdoc in front of each struct and function declaration is not
enough? :D

Thanks,
Olek
Alexander Lobakin April 10, 2024, 1:01 p.m. UTC | #9
From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Date: Wed, 10 Apr 2024 15:01:32 +0200

> On 4/10/24 13:49, Alexander Lobakin wrote:
>> From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
>> Date: Tue, 9 Apr 2024 12:58:33 +0200
>>
>>> On 4/8/24 11:09, Alexander Lobakin wrote:
>>>> From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
>>>> Date: Fri, 5 Apr 2024 12:32:55 +0200
>>>>
>>>>> On 4/4/24 17:44, Alexander Lobakin wrote:
>>>>>> Add a couple intuitive helpers to hide Rx buffer implementation
>>>>>> details
>>>>
>>>> [...]
>>>>
>>>>>> +struct libeth_fqe {
>>>>>> +    struct page        *page;
>>>>>> +    u32            offset;
>>>>>> +    u32            truesize;
>>>>>> +} __aligned_largest;
>>>>>> +
>>>>>> +/**
>>>>>> + * struct libeth_fq - structure representing a buffer queue
>>>>>> + * @fp: hotpath part of the structure
>>>>>> + * @pp: &page_pool for buffer management
>>>>>> + * @fqes: array of Rx buffers
>>>>>> + * @truesize: size to allocate per buffer, w/overhead
>>>>>> + * @count: number of descriptors/buffers the queue has
>>>>>> + * @buf_len: HW-writeable length per each buffer
>>>>>> + * @nid: ID of the closest NUMA node with memory
>>>>>> + */
>>>>>> +struct libeth_fq {
>>>>>> +    struct_group_tagged(libeth_fq_fp, fp,
>>>>>> +        struct page_pool    *pp;
>>>>>> +        struct libeth_fqe    *fqes;
>>>>>> +
>>>>>> +        u32            truesize;
>>>>>> +        u32            count;
>>>>>> +    );
>>>>>> +
>>>>>> +    /* Cold fields */
>>>>>> +    u32            buf_len;
>>>>>> +    int            nid;
>>>>>> +};
>>>>>
>>>>> [...]
>>>>>
>>>>> Could you please unpack the meaning of `fq` and `fqe` acronyms here?
>>>>
>>>> Rx:
>>>>
>>>> RQ -- receive queue, on which you get Rx DMA complete descriptors
>>>> FQ -- fill queue, the one you fill with free buffers
>>>>     FQE -- fill queue element, i.e. smth like "iavf_rx_buffer" or
>>>> whatever
>>>>
>>>> Tx:
>>>>
>>>> SQ -- send queue, the one you fill with buffers to transmit
>>>>     SQE -- send queue element, i.e. "iavf_tx_buffer"
>>>> CQ -- completion queue, on which you get Tx DMA complete descriptors
>>>>
>>>> XDPSQ, XSkRQ etc. -- same as above, but for XDP / XSk
>>>>
>>>> I know that rxq, txq, bufq, complq is more common since it's been used
>>>> for years, but I like these "new" ones more :>
>>>>
>>>
>>> Thank you, that sounds right. If you happen to sent v10, a bit of code
>>> comment with this info would be useful ;)
>>
>> The current kdoc in front of each struct and function declaration is not
>> enough? :D
>>
>> Thanks,
>> Olek
> 
> I've asked my initial question just after reading it thrice, without
> your reply `FQE` was as meaningful as `ABC`

In the commit:

+ * struct libeth_fqe - structure representing an Rx buffer

:z

or you want me to expand FQ, FQE etc. abbrevs in the kdoc?

Thanks,
Olek
Przemek Kitszel April 10, 2024, 1:01 p.m. UTC | #10
On 4/10/24 13:49, Alexander Lobakin wrote:
> From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
> Date: Tue, 9 Apr 2024 12:58:33 +0200
> 
>> On 4/8/24 11:09, Alexander Lobakin wrote:
>>> From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
>>> Date: Fri, 5 Apr 2024 12:32:55 +0200
>>>
>>>> On 4/4/24 17:44, Alexander Lobakin wrote:
>>>>> Add a couple intuitive helpers to hide Rx buffer implementation details
>>>
>>> [...]
>>>
>>>>> +struct libeth_fqe {
>>>>> +    struct page        *page;
>>>>> +    u32            offset;
>>>>> +    u32            truesize;
>>>>> +} __aligned_largest;
>>>>> +
>>>>> +/**
>>>>> + * struct libeth_fq - structure representing a buffer queue
>>>>> + * @fp: hotpath part of the structure
>>>>> + * @pp: &page_pool for buffer management
>>>>> + * @fqes: array of Rx buffers
>>>>> + * @truesize: size to allocate per buffer, w/overhead
>>>>> + * @count: number of descriptors/buffers the queue has
>>>>> + * @buf_len: HW-writeable length per each buffer
>>>>> + * @nid: ID of the closest NUMA node with memory
>>>>> + */
>>>>> +struct libeth_fq {
>>>>> +    struct_group_tagged(libeth_fq_fp, fp,
>>>>> +        struct page_pool    *pp;
>>>>> +        struct libeth_fqe    *fqes;
>>>>> +
>>>>> +        u32            truesize;
>>>>> +        u32            count;
>>>>> +    );
>>>>> +
>>>>> +    /* Cold fields */
>>>>> +    u32            buf_len;
>>>>> +    int            nid;
>>>>> +};
>>>>
>>>> [...]
>>>>
>>>> Could you please unpack the meaning of `fq` and `fqe` acronyms here?
>>>
>>> Rx:
>>>
>>> RQ -- receive queue, on which you get Rx DMA complete descriptors
>>> FQ -- fill queue, the one you fill with free buffers
>>>     FQE -- fill queue element, i.e. smth like "iavf_rx_buffer" or whatever
>>>
>>> Tx:
>>>
>>> SQ -- send queue, the one you fill with buffers to transmit
>>>     SQE -- send queue element, i.e. "iavf_tx_buffer"
>>> CQ -- completion queue, on which you get Tx DMA complete descriptors
>>>
>>> XDPSQ, XSkRQ etc. -- same as above, but for XDP / XSk
>>>
>>> I know that rxq, txq, bufq, complq is more common since it's been used
>>> for years, but I like these "new" ones more :>
>>>
>>
>> Thank you, that sounds right. If you happen to sent v10, a bit of code
>> comment with this info would be useful ;)
> 
> The current kdoc in front of each struct and function declaration is not
> enough? :D
> 
> Thanks,
> Olek

I've asked my initial question just after reading it thrice, without
your reply `FQE` was as meaningful as `ABC`
Przemek Kitszel April 10, 2024, 1:12 p.m. UTC | #11
On 4/10/24 15:01, Alexander Lobakin wrote:
> From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
> Date: Wed, 10 Apr 2024 15:01:32 +0200
> 
>> On 4/10/24 13:49, Alexander Lobakin wrote:
>>> From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
>>> Date: Tue, 9 Apr 2024 12:58:33 +0200
>>>
>>>> On 4/8/24 11:09, Alexander Lobakin wrote:
>>>>> From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
>>>>> Date: Fri, 5 Apr 2024 12:32:55 +0200
>>>>>
>>>>>> On 4/4/24 17:44, Alexander Lobakin wrote:
>>>>>>> Add a couple intuitive helpers to hide Rx buffer implementation
>>>>>>> details
>>>>>
>>>>> [...]
>>>>>
>>>>>>> +struct libeth_fqe {
>>>>>>> +    struct page        *page;
>>>>>>> +    u32            offset;
>>>>>>> +    u32            truesize;
>>>>>>> +} __aligned_largest;
>>>>>>> +
>>>>>>> +/**
>>>>>>> + * struct libeth_fq - structure representing a buffer queue
>>>>>>> + * @fp: hotpath part of the structure
>>>>>>> + * @pp: &page_pool for buffer management
>>>>>>> + * @fqes: array of Rx buffers
>>>>>>> + * @truesize: size to allocate per buffer, w/overhead
>>>>>>> + * @count: number of descriptors/buffers the queue has
>>>>>>> + * @buf_len: HW-writeable length per each buffer
>>>>>>> + * @nid: ID of the closest NUMA node with memory
>>>>>>> + */
>>>>>>> +struct libeth_fq {
>>>>>>> +    struct_group_tagged(libeth_fq_fp, fp,
>>>>>>> +        struct page_pool    *pp;
>>>>>>> +        struct libeth_fqe    *fqes;
>>>>>>> +
>>>>>>> +        u32            truesize;
>>>>>>> +        u32            count;
>>>>>>> +    );
>>>>>>> +
>>>>>>> +    /* Cold fields */
>>>>>>> +    u32            buf_len;
>>>>>>> +    int            nid;
>>>>>>> +};
>>>>>>
>>>>>> [...]
>>>>>>
>>>>>> Could you please unpack the meaning of `fq` and `fqe` acronyms here?
>>>>>
>>>>> Rx:
>>>>>
>>>>> RQ -- receive queue, on which you get Rx DMA complete descriptors
>>>>> FQ -- fill queue, the one you fill with free buffers
>>>>>      FQE -- fill queue element, i.e. smth like "iavf_rx_buffer" or
>>>>> whatever
>>>>>
>>>>> Tx:
>>>>>
>>>>> SQ -- send queue, the one you fill with buffers to transmit
>>>>>      SQE -- send queue element, i.e. "iavf_tx_buffer"
>>>>> CQ -- completion queue, on which you get Tx DMA complete descriptors
>>>>>
>>>>> XDPSQ, XSkRQ etc. -- same as above, but for XDP / XSk
>>>>>
>>>>> I know that rxq, txq, bufq, complq is more common since it's been used
>>>>> for years, but I like these "new" ones more :>
>>>>>
>>>>
>>>> Thank you, that sounds right. If you happen to sent v10, a bit of code
>>>> comment with this info would be useful ;)
>>>
>>> The current kdoc in front of each struct and function declaration is not
>>> enough? :D
>>>
>>> Thanks,
>>> Olek
>>
>> I've asked my initial question just after reading it thrice, without
>> your reply `FQE` was as meaningful as `ABC`
> 
> In the commit:
> 
> + * struct libeth_fqe - structure representing an Rx buffer
> 
> :z
> 
> or you want me to expand FQ, FQE etc. abbrevs in the kdoc?

yes, please very much!

could be as an additional comment at the top of the file too,
with that you could reuse the naming convention later in the file

> 
> Thanks,
> Olek
Alexander Lobakin April 10, 2024, 1:36 p.m. UTC | #12
From: Kees Cook <keescook@chromium.org>
Date: Tue, 9 Apr 2024 09:17:53 -0700

> On Mon, Apr 08, 2024 at 11:45:32AM +0200, Alexander Lobakin wrote:
>> From: Alexander Lobakin <aleksander.lobakin@intel.com>
>> Date: Mon, 8 Apr 2024 11:11:12 +0200
>>
>>> From: Jakub Kicinski <kuba@kernel.org>
>>> Date: Fri, 5 Apr 2024 21:25:13 -0700
>>>
>>>> On Thu,  4 Apr 2024 17:44:00 +0200 Alexander Lobakin wrote:
>>>>> +/**
>>>>> + * struct libeth_fq - structure representing a buffer queue
>>>>> + * @fp: hotpath part of the structure
>>>>
>>>> Second time this happens this week, so maybe some tooling change in 6.9
>>>> but apparently kdoc does not want to know about the tagged struct:
>>>>
>>>> include/net/libeth/rx.h:69: warning: Excess struct member 'fp' description in 'libeth_fq'
>>>
>>> Oh no, maybe we should teach kdoc to parse struct_group*()?
>>
>> scripts/kernel-doc apparently can handle them...
>>
>> + Kees
>>
> 
> Ah, hm, scripts/kernel-doc throws away the early arguments of
> struct_group_tagged, but I suspect it needs to create a synthetic member
> for the tag. i.e. instead of:
> 
> 	struct_group_tagged(tag, name, members...)
> 
> becoming
> 
> 	members...
> 
> it needs to become
> 
> 	struct tag name;
> 	members...
> 
> It seems this is the first place anyone has tried to document the tagged
> struct name! :)

It makes sense and TBH I expected kdoc to warn that an element
description is missing :D

> 
> Does this work? I haven't tested it...
> 
> diff --git a/scripts/kernel-doc b/scripts/kernel-doc
> index 967f1abb0edb..64a19228d5dd 100755
> --- a/scripts/kernel-doc
> +++ b/scripts/kernel-doc
> @@ -1151,7 +1151,8 @@ sub dump_struct($$) {
>          # - first eat non-declaration parameters and rewrite for final match
>          # - then remove macro, outer parens, and trailing semicolon
>          $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos;
> -        $members =~ s/\bstruct_group_(attr|tagged)\s*\(([^,]*,){2}/STRUCT_GROUP(/gos;
> +        $members =~ s/\bstruct_group_attr\s*\(([^,]*,){2}/STRUCT_GROUP(/gos;
> +        $members =~ s/\bstruct_group_tagged\s*\(([^,]*,)([^,]*,)/struct $1 $2; STRUCT_GROUP(/gos;

This one does not. We need to exclude ','s from the groups...
      +        $members =~
s/\bstruct_group_tagged\s*\(([^,]*),([^,]*),/struct $1 $2;
STRUCT_GROUP(/gos;

That one is fine. $members:

include/net/libeth/rx.h:91: warning: struct libeth_fq_fp  fp;
STRUCT_GROUP( struct page_pool        *pp; struct libeth_fqe
*fqes; u32                     truesize; u32                     count;
); enum libeth_fqe_type    type:2; bool                    hsplit:1;
bool                    xdp:1; u32                     buf_len; int
               nid;

So you almost fixed it :D

Which tree this should go through? Should I include this patch to this
series with libeth or it's better to push this through kees/linux and
then pull to net-next?


>          $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos;
>          $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos;

Thanks,
Olek
Jakub Kicinski April 11, 2024, 12:54 a.m. UTC | #13
On Wed, 10 Apr 2024 15:36:13 +0200 Alexander Lobakin wrote:
> Which tree this should go through? Should I include this patch to this
> series with libeth or it's better to push this through kees/linux and
> then pull to net-next?

I think doc tree is a strong candidate, or at least we should not
merge without consulting Jon. Please post and we'll figure it out.

The question someone may ask, however, is whether it causes new
warnings to appear?
Alexander Lobakin April 11, 2024, 9:07 a.m. UTC | #14
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 10 Apr 2024 17:54:24 -0700

> On Wed, 10 Apr 2024 15:36:13 +0200 Alexander Lobakin wrote:
>> Which tree this should go through? Should I include this patch to this
>> series with libeth or it's better to push this through kees/linux and
>> then pull to net-next?
> 
> I think doc tree is a strong candidate, or at least we should not
> merge without consulting Jon. Please post and we'll figure it out.

Can this series go simultaneously or it needs to wait for the fix first?

> 
> The question someone may ask, however, is whether it causes new
> warnings to appear?

I tested `make W=12 KDOCFLAGS=-Wall all` yesterday and haven't noticed
any new issues, although expected them.

Thanks,
Olek
Jakub Kicinski April 11, 2024, 1:45 p.m. UTC | #15
On Thu, 11 Apr 2024 11:07:24 +0200 Alexander Lobakin wrote:
> > I think doc tree is a strong candidate, or at least we should not
> > merge without consulting Jon. Please post and we'll figure it out.  
> 
> Can this series go simultaneously or it needs to wait for the fix first?

You can send both maybe just mention under the --- that "this one will
generate a known kdoc warning, I'll be fixing kdoc script separately".

> > The question someone may ask, however, is whether it causes new
> > warnings to appear?  
> 
> I tested `make W=12 KDOCFLAGS=-Wall all` yesterday and haven't noticed
> any new issues, although expected them.

Surprising but nice.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/libeth/Kconfig b/drivers/net/ethernet/intel/libeth/Kconfig
index af970a63c227..480293b71dbc 100644
--- a/drivers/net/ethernet/intel/libeth/Kconfig
+++ b/drivers/net/ethernet/intel/libeth/Kconfig
@@ -3,6 +3,7 @@ 
 
 config LIBETH
 	tristate
+	select PAGE_POOL
 	help
 	  libeth is a common library containing routines shared between several
 	  drivers, but not yet promoted to the generic kernel API.
diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h
index aaf9c2cdf7fd..3db2bda4eab6 100644
--- a/include/net/libeth/rx.h
+++ b/include/net/libeth/rx.h
@@ -4,8 +4,125 @@ 
 #ifndef __LIBETH_RX_H
 #define __LIBETH_RX_H
 
+#include <linux/if_vlan.h>
+
+#include <net/page_pool/helpers.h>
 #include <net/xdp.h>
 
+/* Rx buffer management */
+
+/* Space reserved in front of each frame */
+#define LIBETH_SKB_HEADROOM	(NET_SKB_PAD + NET_IP_ALIGN)
+/* Maximum headroom for worst-case calculations */
+#define LIBETH_MAX_HEADROOM	LIBETH_SKB_HEADROOM
+/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
+#define LIBETH_RX_LL_LEN	(ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
+
+/* Always use order-0 pages */
+#define LIBETH_RX_PAGE_ORDER	0
+/* Pick a sane buffer stride and align to a cacheline boundary */
+#define LIBETH_RX_BUF_STRIDE	SKB_DATA_ALIGN(128)
+/* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */
+#define LIBETH_RX_PAGE_LEN(hr)						  \
+	ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER),		  \
+		   LIBETH_RX_BUF_STRIDE)
+
+/**
+ * struct libeth_fqe - structure representing an Rx buffer
+ * @page: page holding the buffer
+ * @offset: offset from the page start (to the headroom)
+ * @truesize: total space occupied by the buffer (w/ headroom and tailroom)
+ *
+ * Depending on the MTU, API switches between one-page-per-frame and shared
+ * page model (to conserve memory on bigger-page platforms). In case of the
+ * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
+ */
+struct libeth_fqe {
+	struct page		*page;
+	u32			offset;
+	u32			truesize;
+} __aligned_largest;
+
+/**
+ * struct libeth_fq - structure representing a buffer queue
+ * @fp: hotpath part of the structure
+ * @pp: &page_pool for buffer management
+ * @fqes: array of Rx buffers
+ * @truesize: size to allocate per buffer, w/overhead
+ * @count: number of descriptors/buffers the queue has
+ * @buf_len: HW-writeable length per each buffer
+ * @nid: ID of the closest NUMA node with memory
+ */
+struct libeth_fq {
+	struct_group_tagged(libeth_fq_fp, fp,
+		struct page_pool	*pp;
+		struct libeth_fqe	*fqes;
+
+		u32			truesize;
+		u32			count;
+	);
+
+	/* Cold fields */
+	u32			buf_len;
+	int			nid;
+};
+
+int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi);
+void libeth_rx_fq_destroy(struct libeth_fq *fq);
+
+/**
+ * libeth_rx_alloc - allocate a new Rx buffer
+ * @fq: buffer queue to allocate for
+ * @i: index of the buffer within the queue
+ *
+ * Return: DMA address to be passed to HW for Rx on successful allocation,
+ * ```DMA_MAPPING_ERROR``` otherwise.
+ */
+static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
+{
+	struct libeth_fqe *buf = &fq->fqes[i];
+
+	buf->truesize = fq->truesize;
+	buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize);
+	if (unlikely(!buf->page))
+		return DMA_MAPPING_ERROR;
+
+	return page_pool_get_dma_addr(buf->page) + buf->offset +
+	       fq->pp->p.offset;
+}
+
+void libeth_rx_recycle_slow(struct page *page);
+
+/**
+ * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
+ * @fqe: buffer to process
+ * @len: frame length from the descriptor
+ *
+ * Process the buffer after it's written by HW. The regular path is to
+ * synchronize DMA for CPU, but in case of no data it will be immediately
+ * recycled back to its PP.
+ *
+ * Return: true when there's data to process, false otherwise.
+ */
+static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
+					  u32 len)
+{
+	struct page *page = fqe->page;
+
+	/* Very rare, but possible case. The most common reason:
+	 * the last fragment contained FCS only, which was then
+	 * stripped by the HW.
+	 */
+	if (unlikely(!len)) {
+		libeth_rx_recycle_slow(page);
+		return false;
+	}
+
+	page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len);
+
+	return true;
+}
+
 /* Converting abstract packet type numbers into a software structure with
  * the packet parameters to do O(1) lookup on Rx.
  */
diff --git a/drivers/net/ethernet/intel/libeth/rx.c b/drivers/net/ethernet/intel/libeth/rx.c
index 86f17e29b47d..a557a1ebcbe5 100644
--- a/drivers/net/ethernet/intel/libeth/rx.c
+++ b/drivers/net/ethernet/intel/libeth/rx.c
@@ -3,6 +3,104 @@ 
 
 #include <net/libeth/rx.h>
 
+/* Rx buffer management */
+
+/**
+ * libeth_rx_hw_len - get the actual buffer size to be passed to HW
+ * @pp: &page_pool_params of the netdev to calculate the size for
+ * @max_len: maximum buffer size for a single descriptor
+ *
+ * Return: HW-writeable length per one buffer to pass it to the HW accounting:
+ * MTU the @dev has, HW required alignment, minimum and maximum allowed values,
+ * and system's page size.
+ */
+static u32 libeth_rx_hw_len(const struct page_pool_params *pp, u32 max_len)
+{
+	u32 len;
+
+	len = READ_ONCE(pp->netdev->mtu) + LIBETH_RX_LL_LEN;
+	len = ALIGN(len, LIBETH_RX_BUF_STRIDE);
+	len = min3(len, ALIGN_DOWN(max_len ? : U32_MAX, LIBETH_RX_BUF_STRIDE),
+		   pp->max_len);
+
+	return len;
+}
+
+/**
+ * libeth_rx_fq_create - create a PP with the default libeth settings
+ * @fq: buffer queue struct to fill
+ * @napi: &napi_struct covering this PP (no usage outside its poll loops)
+ *
+ * Return: %0 on success, -%errno on failure.
+ */
+int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi)
+{
+	struct page_pool_params pp = {
+		.flags		= PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+		.order		= LIBETH_RX_PAGE_ORDER,
+		.pool_size	= fq->count,
+		.nid		= fq->nid,
+		.dev		= napi->dev->dev.parent,
+		.netdev		= napi->dev,
+		.napi		= napi,
+		.dma_dir	= DMA_FROM_DEVICE,
+		.offset		= LIBETH_SKB_HEADROOM,
+	};
+	struct libeth_fqe *fqes;
+	struct page_pool *pool;
+
+	/* HW-writeable / syncable length per one page */
+	pp.max_len = LIBETH_RX_PAGE_LEN(pp.offset);
+
+	/* HW-writeable length per buffer */
+	fq->buf_len = libeth_rx_hw_len(&pp, fq->buf_len);
+	/* Buffer size to allocate */
+	fq->truesize = roundup_pow_of_two(SKB_HEAD_ALIGN(pp.offset +
+							 fq->buf_len));
+
+	pool = page_pool_create(&pp);
+	if (IS_ERR(pool))
+		return PTR_ERR(pool);
+
+	fqes = kvcalloc_node(fq->count, sizeof(*fqes), GFP_KERNEL, fq->nid);
+	if (!fqes)
+		goto err_buf;
+
+	fq->fqes = fqes;
+	fq->pp = pool;
+
+	return 0;
+
+err_buf:
+	page_pool_destroy(pool);
+
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_NS_GPL(libeth_rx_fq_create, LIBETH);
+
+/**
+ * libeth_rx_fq_destroy - destroy a &page_pool created by libeth
+ * @fq: buffer queue to process
+ */
+void libeth_rx_fq_destroy(struct libeth_fq *fq)
+{
+	kvfree(fq->fqes);
+	page_pool_destroy(fq->pp);
+}
+EXPORT_SYMBOL_NS_GPL(libeth_rx_fq_destroy, LIBETH);
+
+/**
+ * libeth_rx_recycle_slow - recycle a libeth page from the NAPI context
+ * @page: page to recycle
+ *
+ * To be used on exceptions or rare cases not requiring fast inline recycling.
+ */
+void libeth_rx_recycle_slow(struct page *page)
+{
+	page_pool_recycle_direct(page->pp, page);
+}
+EXPORT_SYMBOL_NS_GPL(libeth_rx_recycle_slow, LIBETH);
+
 /* Converting abstract packet type numbers into a software structure with
  * the packet parameters to do O(1) lookup on Rx.
  */