diff mbox series

[net,1/2] net/mlx5e: XDP, Allow growing tail for XDP multi buffer

Message ID 20230126191050.220610-2-maxtram95@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series xdp_rxq_info_reg fixes for mlx5e | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 24 this patch: 24
netdev/cc_maintainers fail 1 blamed authors not CCed: maximmi@nvidia.com; 8 maintainers not CCed: daniel@iogearbox.net leon@kernel.org ast@kernel.org linux-rdma@vger.kernel.org john.fastabend@gmail.com maximmi@nvidia.com bpf@vger.kernel.org hawk@kernel.org
netdev/build_clang success Errors and warnings before: 5 this patch: 5
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 22 this patch: 22
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Maxim Mikityanskiy Jan. 26, 2023, 7:10 p.m. UTC
The cited commits missed passing frag_size to __xdp_rxq_info_reg, which
is required by bpf_xdp_adjust_tail to support growing the tail pointer
in fragmented packets. Pass the missing parameter when the current RQ
mode allows XDP multi buffer.

Fixes: ea5d49bdae8b ("net/mlx5e: Add XDP multi buffer support to the non-linear legacy RQ")
Fixes: 9cb9482ef10e ("net/mlx5e: Use fragments of the same size in non-linear legacy RQ with XDP")
Signed-off-by: Maxim Mikityanskiy <maxtram95@gmail.com>
Cc: Tariq Toukan <tariqt@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

Comments

Tariq Toukan Jan. 26, 2023, 8:41 p.m. UTC | #1
On 26/01/2023 21:10, Maxim Mikityanskiy wrote:
> The cited commits missed passing frag_size to __xdp_rxq_info_reg, which
> is required by bpf_xdp_adjust_tail to support growing the tail pointer
> in fragmented packets. Pass the missing parameter when the current RQ
> mode allows XDP multi buffer.
> 
> Fixes: ea5d49bdae8b ("net/mlx5e: Add XDP multi buffer support to the non-linear legacy RQ")
> Fixes: 9cb9482ef10e ("net/mlx5e: Use fragments of the same size in non-linear legacy RQ with XDP")
> Signed-off-by: Maxim Mikityanskiy <maxtram95@gmail.com>
> Cc: Tariq Toukan <tariqt@nvidia.com>
> ---
>   drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 ++++++++---
>   1 file changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> index abcc614b6191..cdd1e47e18f9 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> @@ -576,9 +576,10 @@ static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
>   }
>   
>   static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
> -			     struct mlx5e_rq *rq)
> +			     struct mlx5e_rq_param *rq_params, struct mlx5e_rq *rq)
>   {
>   	struct mlx5_core_dev *mdev = c->mdev;
> +	u32 xdp_frag_size = 0;
>   	int err;
>   
>   	rq->wq_type      = params->rq_wq_type;
> @@ -599,7 +600,11 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
>   	if (err)
>   		return err;
>   
> -	return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id);
> +	if (rq->wq_type == MLX5_WQ_TYPE_CYCLIC && rq_params->frags_info.num_frags > 1)

How about a more generic check? like:
if (params->xdp_prog && params->xdp_prog->aux->xdp_has_frags)

So we won't have to maintain this when Stridng RQ support is added.

> +		xdp_frag_size = rq_params->frags_info.arr[1].frag_stride;

Again, in order to not maintain this (frags_info.arr[1].frag_stride not 
relevant for Striding RQ), isn't the value always PAGE_SIZE?

Another idea is to introduce something like
#define XDP_MB_FRAG_SZ (PAGE_SIZE)
use it here and in mlx5e_build_rq_frags_info ::
if (byte_count > max_mtu || params->xdp_prog) {
	frag_size_max = XDP_MB_FRAG_SZ;
Not sure it's worth it...

Both ways we save passing rq_params in the callstack.

> +
> +	return __xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id,
> +				  xdp_frag_size);
>   }
>   
>   static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
> @@ -2214,7 +2219,7 @@ static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
>   {
>   	int err;
>   
> -	err = mlx5e_init_rxq_rq(c, params, &c->rq);
> +	err = mlx5e_init_rxq_rq(c, params, rq_params, &c->rq);
>   	if (err)
>   		return err;
>
Maxim Mikityanskiy Jan. 26, 2023, 10:43 p.m. UTC | #2
On Thu, Jan 26, 2023 at 10:41:30PM +0200, Tariq Toukan wrote:
> 
> 
> On 26/01/2023 21:10, Maxim Mikityanskiy wrote:
> > The cited commits missed passing frag_size to __xdp_rxq_info_reg, which
> > is required by bpf_xdp_adjust_tail to support growing the tail pointer
> > in fragmented packets. Pass the missing parameter when the current RQ
> > mode allows XDP multi buffer.
> > 
> > Fixes: ea5d49bdae8b ("net/mlx5e: Add XDP multi buffer support to the non-linear legacy RQ")
> > Fixes: 9cb9482ef10e ("net/mlx5e: Use fragments of the same size in non-linear legacy RQ with XDP")
> > Signed-off-by: Maxim Mikityanskiy <maxtram95@gmail.com>
> > Cc: Tariq Toukan <tariqt@nvidia.com>
> > ---
> >   drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 ++++++++---
> >   1 file changed, 8 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> > index abcc614b6191..cdd1e47e18f9 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
> > @@ -576,9 +576,10 @@ static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
> >   }
> >   static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
> > -			     struct mlx5e_rq *rq)
> > +			     struct mlx5e_rq_param *rq_params, struct mlx5e_rq *rq)
> >   {
> >   	struct mlx5_core_dev *mdev = c->mdev;
> > +	u32 xdp_frag_size = 0;
> >   	int err;
> >   	rq->wq_type      = params->rq_wq_type;
> > @@ -599,7 +600,11 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
> >   	if (err)
> >   		return err;
> > -	return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id);
> > +	if (rq->wq_type == MLX5_WQ_TYPE_CYCLIC && rq_params->frags_info.num_frags > 1)
> 
> How about a more generic check? like:
> if (params->xdp_prog && params->xdp_prog->aux->xdp_has_frags)
> 
> So we won't have to maintain this when Stridng RQ support is added.

The check is specific, because below I use rq_params->frags_info, which
is specific to legacy RQ. If we change the input for xdp_frag_size, the
check can also be changed, but the condition that you suggested can't be
used anyway, because the XDP program can be hot-swapped without
recreating channels (i.e. without calling into mlx5e_init_rxq_rq), and
xdp_has_frags can change after the hot-swap.

It's actually valid to pass a non-zero value unconditionally, it just
won't be used if the driver doesn't pass any multi-buffer frames to XDP.
I added a reasonable condition solely for extra robustness, but we can
drop the `if` altogether if we don't agree on the condition.

> > +		xdp_frag_size = rq_params->frags_info.arr[1].frag_stride;
> 
> Again, in order to not maintain this (frags_info.arr[1].frag_stride not
> relevant for Striding RQ), isn't the value always PAGE_SIZE?

It's always PAGE_SIZE for the current implementation of legacy RQ, but
the kernel doesn't fix it to PAGE_SIZE, it's possible for a driver to
choose a different memory allocation scheme with fragments of another
size, that's why this parameter exists.

Setting it to PAGE_SIZE to be "future-proof" may be problematic: if
striding RQ uses a different frag_size, and the author forgets to update
this code, it may lead to a memory corruption on adjust_tail.

There is an obvious robustness problem with this place in code: it's
easy to forget about updating it. I forgot to set the right non-zero
value when I added XDP multi buffer, the next developer risks forgetting
updating this code when XDP multi buffer support is extended to striding
RQ, or the memory allocation scheme is somehow changed. So, it's not
possible to avoid maintaining it: either way it might need changes in
the future. I wanted to add some WARN_ON or BUILD_BUG_ON to simplify
such maintenance, but couldn't think of a good check...

> 
> Another idea is to introduce something like
> #define XDP_MB_FRAG_SZ (PAGE_SIZE)
> use it here and in mlx5e_build_rq_frags_info ::
> if (byte_count > max_mtu || params->xdp_prog) {
> 	frag_size_max = XDP_MB_FRAG_SZ;
> Not sure it's worth it...

IMO, it doesn't fit to mlx5e_build_rq_frags_info, because that function
heavily relies on its value being PAGE_SIZE, and hiding it under a
different name may give false impression that it can be changed.
Moreover, there is a chance that striding RQ will use a different value
for XDP frag_size. Also, it rather doesn't make sense even in the code
that you quoted: if byte_count > max_mtu, using XDP_MB_FRAG_SZ doesn't
make sense.

Using this constant only here, but not in mlx5e_build_rq_frags_info
doesn't make sense either, because it won't help remind developers to
update this part of code.

I think I got a better idea: move the logic to en/params.c, it knows
everything about the memory allocation scheme, about the XDP multi
buffer support, so let it calculate the right value and assign it to
some field (let's say, rq_params->xdp_frag_size), which is passed to
mlx5e_init_rxq_rq and used here as is. mlx5e_init_rxq_rq won't need to
dig into implementation details of each mode, instead the functions that
contain these details will calculate the value for XDP. What do you
think?

> Both ways we save passing rq_params in the callstack.

I don't think the number of parameters is crucial for non-datapath,
especially given that it's still fewer than 6.

> 
> > +
> > +	return __xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id,
> > +				  xdp_frag_size);
> >   }
> >   static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
> > @@ -2214,7 +2219,7 @@ static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
> >   {
> >   	int err;
> > -	err = mlx5e_init_rxq_rq(c, params, &c->rq);
> > +	err = mlx5e_init_rxq_rq(c, params, rq_params, &c->rq);
> >   	if (err)
> >   		return err;
Tariq Toukan Jan. 29, 2023, 10:04 a.m. UTC | #3
On 27/01/2023 0:43, Maxim Mikityanskiy wrote:
> On Thu, Jan 26, 2023 at 10:41:30PM +0200, Tariq Toukan wrote:
>>
>>
>> On 26/01/2023 21:10, Maxim Mikityanskiy wrote:
>>> The cited commits missed passing frag_size to __xdp_rxq_info_reg, which
>>> is required by bpf_xdp_adjust_tail to support growing the tail pointer
>>> in fragmented packets. Pass the missing parameter when the current RQ
>>> mode allows XDP multi buffer.
>>>
>>> Fixes: ea5d49bdae8b ("net/mlx5e: Add XDP multi buffer support to the non-linear legacy RQ")
>>> Fixes: 9cb9482ef10e ("net/mlx5e: Use fragments of the same size in non-linear legacy RQ with XDP")
>>> Signed-off-by: Maxim Mikityanskiy <maxtram95@gmail.com>
>>> Cc: Tariq Toukan <tariqt@nvidia.com>
>>> ---
>>>    drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 ++++++++---
>>>    1 file changed, 8 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>>> index abcc614b6191..cdd1e47e18f9 100644
>>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>>> @@ -576,9 +576,10 @@ static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
>>>    }
>>>    static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
>>> -			     struct mlx5e_rq *rq)
>>> +			     struct mlx5e_rq_param *rq_params, struct mlx5e_rq *rq)
>>>    {
>>>    	struct mlx5_core_dev *mdev = c->mdev;
>>> +	u32 xdp_frag_size = 0;
>>>    	int err;
>>>    	rq->wq_type      = params->rq_wq_type;
>>> @@ -599,7 +600,11 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
>>>    	if (err)
>>>    		return err;
>>> -	return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id);
>>> +	if (rq->wq_type == MLX5_WQ_TYPE_CYCLIC && rq_params->frags_info.num_frags > 1)
>>
>> How about a more generic check? like:
>> if (params->xdp_prog && params->xdp_prog->aux->xdp_has_frags)
>>
>> So we won't have to maintain this when Stridng RQ support is added.
> 
> The check is specific, because below I use rq_params->frags_info, which
> is specific to legacy RQ. If we change the input for xdp_frag_size, the
> check can also be changed, but the condition that you suggested can't be
> used anyway, because the XDP program can be hot-swapped without
> recreating channels (i.e. without calling into mlx5e_init_rxq_rq), and
> xdp_has_frags can change after the hot-swap.
> 
> It's actually valid to pass a non-zero value unconditionally, it just
> won't be used if the driver doesn't pass any multi-buffer frames to XDP.
> I added a reasonable condition solely for extra robustness, but we can
> drop the `if` altogether if we don't agree on the condition.
> 
>>> +		xdp_frag_size = rq_params->frags_info.arr[1].frag_stride;
>>
>> Again, in order to not maintain this (frags_info.arr[1].frag_stride not
>> relevant for Striding RQ), isn't the value always PAGE_SIZE?
> 
> It's always PAGE_SIZE for the current implementation of legacy RQ, but
> the kernel doesn't fix it to PAGE_SIZE, it's possible for a driver to
> choose a different memory allocation scheme with fragments of another
> size, that's why this parameter exists.
> 
> Setting it to PAGE_SIZE to be "future-proof" may be problematic: if
> striding RQ uses a different frag_size, and the author forgets to update
> this code, it may lead to a memory corruption on adjust_tail.
> 
> There is an obvious robustness problem with this place in code: it's
> easy to forget about updating it. I forgot to set the right non-zero
> value when I added XDP multi buffer, the next developer risks forgetting
> updating this code when XDP multi buffer support is extended to striding
> RQ, or the memory allocation scheme is somehow changed. So, it's not
> possible to avoid maintaining it: either way it might need changes in
> the future. I wanted to add some WARN_ON or BUILD_BUG_ON to simplify
> such maintenance, but couldn't think of a good check...
> 
>>
>> Another idea is to introduce something like
>> #define XDP_MB_FRAG_SZ (PAGE_SIZE)
>> use it here and in mlx5e_build_rq_frags_info ::
>> if (byte_count > max_mtu || params->xdp_prog) {
>> 	frag_size_max = XDP_MB_FRAG_SZ;
>> Not sure it's worth it...
> 
> IMO, it doesn't fit to mlx5e_build_rq_frags_info, because that function
> heavily relies on its value being PAGE_SIZE, and hiding it under a
> different name may give false impression that it can be changed.
> Moreover, there is a chance that striding RQ will use a different value
> for XDP frag_size. Also, it rather doesn't make sense even in the code
> that you quoted: if byte_count > max_mtu, using XDP_MB_FRAG_SZ doesn't
> make sense.
> 
> Using this constant only here, but not in mlx5e_build_rq_frags_info
> doesn't make sense either, because it won't help remind developers to
> update this part of code.
> 

Agree.

> I think I got a better idea: move the logic to en/params.c, it knows
> everything about the memory allocation scheme, about the XDP multi
> buffer support, so let it calculate the right value and assign it to
> some field (let's say, rq_params->xdp_frag_size), which is passed to
> mlx5e_init_rxq_rq and used here as is. mlx5e_init_rxq_rq won't need to
> dig into implementation details of each mode, instead the functions that
> contain these details will calculate the value for XDP. What do you
> think?
> 

Yes, that would be best.

>> Both ways we save passing rq_params in the callstack.
> 
> I don't think the number of parameters is crucial for non-datapath,
> especially given that it's still fewer than 6.
> 
>>
>>> +
>>> +	return __xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id,
>>> +				  xdp_frag_size);
>>>    }
>>>    static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
>>> @@ -2214,7 +2219,7 @@ static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
>>>    {
>>>    	int err;
>>> -	err = mlx5e_init_rxq_rq(c, params, &c->rq);
>>> +	err = mlx5e_init_rxq_rq(c, params, rq_params, &c->rq);
>>>    	if (err)
>>>    		return err;
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index abcc614b6191..cdd1e47e18f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -576,9 +576,10 @@  static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
 }
 
 static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
-			     struct mlx5e_rq *rq)
+			     struct mlx5e_rq_param *rq_params, struct mlx5e_rq *rq)
 {
 	struct mlx5_core_dev *mdev = c->mdev;
+	u32 xdp_frag_size = 0;
 	int err;
 
 	rq->wq_type      = params->rq_wq_type;
@@ -599,7 +600,11 @@  static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
 	if (err)
 		return err;
 
-	return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id);
+	if (rq->wq_type == MLX5_WQ_TYPE_CYCLIC && rq_params->frags_info.num_frags > 1)
+		xdp_frag_size = rq_params->frags_info.arr[1].frag_stride;
+
+	return __xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id,
+				  xdp_frag_size);
 }
 
 static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
@@ -2214,7 +2219,7 @@  static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param
 {
 	int err;
 
-	err = mlx5e_init_rxq_rq(c, params, &c->rq);
+	err = mlx5e_init_rxq_rq(c, params, rq_params, &c->rq);
 	if (err)
 		return err;