diff mbox series

[net-next,3/3] net: create and use NAPI version of tc_skb_ext_alloc()

Message ID 20230215034355.481925-4-kuba@kernel.org (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: skbuff: cache one skb_ext for use by GRO | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 62 this patch: 62
netdev/cc_maintainers warning 1 maintainers not CCed: linux-rdma@vger.kernel.org
netdev/build_clang success Errors and warnings before: 4 this patch: 4
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 80 this patch: 80
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 31 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Jakub Kicinski Feb. 15, 2023, 3:43 a.m. UTC
Try to use the cached skb_ext in the drivers.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
CC: saeedm@nvidia.com
CC: leon@kernel.org
CC: jhs@mojatatu.com
CC: xiyou.wangcong@gmail.com
CC: jiri@resnulli.us
CC: roid@nvidia.com
CC: ozsh@nvidia.com
CC: paulb@nvidia.com
---
 drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c     | 2 +-
 include/net/pkt_cls.h                               | 9 +++++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

Comments

Jamal Hadi Salim Feb. 15, 2023, 4:50 p.m. UTC | #1
On Tue, Feb 14, 2023 at 10:44 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> Try to use the cached skb_ext in the drivers.
>
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
> CC: saeedm@nvidia.com
> CC: leon@kernel.org
> CC: jhs@mojatatu.com
> CC: xiyou.wangcong@gmail.com
> CC: jiri@resnulli.us
> CC: roid@nvidia.com
> CC: ozsh@nvidia.com
> CC: paulb@nvidia.com
> ---
>  drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 2 +-
>  drivers/net/ethernet/mellanox/mlx5/core/en_tc.c     | 2 +-
>  include/net/pkt_cls.h                               | 9 +++++++++
>  3 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> index 3b590cfe33b8..ffbed5a92eab 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> @@ -770,7 +770,7 @@ static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
>                 struct mlx5_eswitch *esw;
>                 u32 zone_restore_id;
>
> -               tc_skb_ext = tc_skb_ext_alloc(skb);
> +               tc_skb_ext = tc_skb_ext_alloc_napi(skb);
>                 if (!tc_skb_ext) {
>                         WARN_ON(1);
>                         return false;
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> index 2d06b4412762..3d9da4ccaf5d 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> @@ -5643,7 +5643,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
>
>         if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
>                 chain = mapped_obj.chain;
> -               tc_skb_ext = tc_skb_ext_alloc(skb);
> +               tc_skb_ext = tc_skb_ext_alloc_napi(skb);
>                 if (WARN_ON(!tc_skb_ext))
>                         return false;
>
> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
> index ace437c6754b..82821a3f8a8b 100644
> --- a/include/net/pkt_cls.h
> +++ b/include/net/pkt_cls.h
> @@ -764,6 +764,15 @@ static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
>                 memset(tc_skb_ext, 0, sizeof(*tc_skb_ext));
>         return tc_skb_ext;
>  }
> +
> +static inline struct tc_skb_ext *tc_skb_ext_alloc_napi(struct sk_buff *skb)
> +{
> +       struct tc_skb_ext *tc_skb_ext = napi_skb_ext_add(skb, TC_SKB_EXT);
> +
> +       if (tc_skb_ext)
> +               memset(tc_skb_ext, 0, sizeof(*tc_skb_ext));
> +       return tc_skb_ext;
> +}
>  #endif
>

Dumb question: Would this work with a consumer of the metadata past
RPS stage? didnt look closely but assumed per cpu skb_ext because of
the napi context - which will require a per cpu pointer to fetch it
later.
In P4TC we make heavy use of skb_ext and found it to be very pricey,
so we ended making it per cpu - but that's post RPS so we are safe.

cheers,
jamal
Jiri Pirko Feb. 15, 2023, 5:03 p.m. UTC | #2
Wed, Feb 15, 2023 at 05:50:55PM CET, jhs@mojatatu.com wrote:
>On Tue, Feb 14, 2023 at 10:44 PM Jakub Kicinski <kuba@kernel.org> wrote:
>>
>> Try to use the cached skb_ext in the drivers.
>>
>> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
>> ---
>> CC: saeedm@nvidia.com
>> CC: leon@kernel.org
>> CC: jhs@mojatatu.com
>> CC: xiyou.wangcong@gmail.com
>> CC: jiri@resnulli.us
>> CC: roid@nvidia.com
>> CC: ozsh@nvidia.com
>> CC: paulb@nvidia.com
>> ---
>>  drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 2 +-
>>  drivers/net/ethernet/mellanox/mlx5/core/en_tc.c     | 2 +-
>>  include/net/pkt_cls.h                               | 9 +++++++++
>>  3 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
>> index 3b590cfe33b8..ffbed5a92eab 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
>> @@ -770,7 +770,7 @@ static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
>>                 struct mlx5_eswitch *esw;
>>                 u32 zone_restore_id;
>>
>> -               tc_skb_ext = tc_skb_ext_alloc(skb);
>> +               tc_skb_ext = tc_skb_ext_alloc_napi(skb);
>>                 if (!tc_skb_ext) {
>>                         WARN_ON(1);
>>                         return false;
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
>> index 2d06b4412762..3d9da4ccaf5d 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
>> @@ -5643,7 +5643,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
>>
>>         if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
>>                 chain = mapped_obj.chain;
>> -               tc_skb_ext = tc_skb_ext_alloc(skb);
>> +               tc_skb_ext = tc_skb_ext_alloc_napi(skb);
>>                 if (WARN_ON(!tc_skb_ext))
>>                         return false;
>>
>> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
>> index ace437c6754b..82821a3f8a8b 100644
>> --- a/include/net/pkt_cls.h
>> +++ b/include/net/pkt_cls.h
>> @@ -764,6 +764,15 @@ static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
>>                 memset(tc_skb_ext, 0, sizeof(*tc_skb_ext));
>>         return tc_skb_ext;
>>  }
>> +
>> +static inline struct tc_skb_ext *tc_skb_ext_alloc_napi(struct sk_buff *skb)
>> +{
>> +       struct tc_skb_ext *tc_skb_ext = napi_skb_ext_add(skb, TC_SKB_EXT);
>> +
>> +       if (tc_skb_ext)
>> +               memset(tc_skb_ext, 0, sizeof(*tc_skb_ext));
>> +       return tc_skb_ext;
>> +}
>>  #endif
>>
>
>Dumb question: Would this work with a consumer of the metadata past
>RPS stage? didnt look closely but assumed per cpu skb_ext because of
>the napi context - which will require a per cpu pointer to fetch it
>later.
>In P4TC we make heavy use of skb_ext and found it to be very pricey,

Why don't you use skb->cb internally in p4tc? Or does the skb leave p4tc
and arrive back again? When?


>so we ended making it per cpu - but that's post RPS so we are safe.
>
>cheers,
>jamal
Jakub Kicinski Feb. 15, 2023, 5:35 p.m. UTC | #3
On Wed, 15 Feb 2023 11:50:55 -0500 Jamal Hadi Salim wrote:
> Dumb question: Would this work with a consumer of the metadata past
> RPS stage? didnt look closely but assumed per cpu skb_ext because of
> the napi context - which will require a per cpu pointer to fetch it
> later.

The cache is just for allocation, specifically for the case where
driver allocates the skb and it's quickly coalesced by GRO.
The lifetime of the allocated object is not changed.
Jamal Hadi Salim Feb. 15, 2023, 6:36 p.m. UTC | #4
On Wed, Feb 15, 2023 at 12:03 PM Jiri Pirko <jiri@resnulli.us> wrote:
>
> Wed, Feb 15, 2023 at 05:50:55PM CET, jhs@mojatatu.com wrote:
> >On Tue, Feb 14, 2023 at 10:44 PM Jakub Kicinski <kuba@kernel.org> wrote:
> >>
> >> Try to use the cached skb_ext in the drivers.
> >>
> >> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> >> ---
> >> CC: saeedm@nvidia.com
> >> CC: leon@kernel.org
> >> CC: jhs@mojatatu.com
> >> CC: xiyou.wangcong@gmail.com
> >> CC: jiri@resnulli.us
> >> CC: roid@nvidia.com
> >> CC: ozsh@nvidia.com
> >> CC: paulb@nvidia.com
> >> ---
> >>  drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 2 +-
> >>  drivers/net/ethernet/mellanox/mlx5/core/en_tc.c     | 2 +-
> >>  include/net/pkt_cls.h                               | 9 +++++++++
> >>  3 files changed, 11 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> >> index 3b590cfe33b8..ffbed5a92eab 100644
> >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
> >> @@ -770,7 +770,7 @@ static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
> >>                 struct mlx5_eswitch *esw;
> >>                 u32 zone_restore_id;
> >>
> >> -               tc_skb_ext = tc_skb_ext_alloc(skb);
> >> +               tc_skb_ext = tc_skb_ext_alloc_napi(skb);
> >>                 if (!tc_skb_ext) {
> >>                         WARN_ON(1);
> >>                         return false;
> >> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> >> index 2d06b4412762..3d9da4ccaf5d 100644
> >> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> >> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
> >> @@ -5643,7 +5643,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
> >>
> >>         if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
> >>                 chain = mapped_obj.chain;
> >> -               tc_skb_ext = tc_skb_ext_alloc(skb);
> >> +               tc_skb_ext = tc_skb_ext_alloc_napi(skb);
> >>                 if (WARN_ON(!tc_skb_ext))
> >>                         return false;
> >>
> >> diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
> >> index ace437c6754b..82821a3f8a8b 100644
> >> --- a/include/net/pkt_cls.h
> >> +++ b/include/net/pkt_cls.h
> >> @@ -764,6 +764,15 @@ static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
> >>                 memset(tc_skb_ext, 0, sizeof(*tc_skb_ext));
> >>         return tc_skb_ext;
> >>  }
> >> +
> >> +static inline struct tc_skb_ext *tc_skb_ext_alloc_napi(struct sk_buff *skb)
> >> +{
> >> +       struct tc_skb_ext *tc_skb_ext = napi_skb_ext_add(skb, TC_SKB_EXT);
> >> +
> >> +       if (tc_skb_ext)
> >> +               memset(tc_skb_ext, 0, sizeof(*tc_skb_ext));
> >> +       return tc_skb_ext;
> >> +}
> >>  #endif
> >>
> >
> >Dumb question: Would this work with a consumer of the metadata past
> >RPS stage? didnt look closely but assumed per cpu skb_ext because of
> >the napi context - which will require a per cpu pointer to fetch it
> >later.
> >In P4TC we make heavy use of skb_ext and found it to be very pricey,
>
> Why don't you use skb->cb internally in p4tc?

Just for space reasons since we have a lot of stuff we carry around
(headers, metadata, and table keys which could be each upto an imposed
512b in size).

> Or does the skb leave p4tc and arrive back again? When?

Only time it leaves is if we do a "recirculate" - which is essentially
a mirred redirect to ingress/egress of another port.
It still causes memory pressure in alloc/free - we are thinking of
getting rid of it altogether and replacing it with a statically
allocated per-cpu scratchpad; if it gets recirculated we start from
scratch.

cheers,
jamal
Jamal Hadi Salim Feb. 15, 2023, 6:38 p.m. UTC | #5
On Wed, Feb 15, 2023 at 12:35 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Wed, 15 Feb 2023 11:50:55 -0500 Jamal Hadi Salim wrote:
> > Dumb question: Would this work with a consumer of the metadata past
> > RPS stage? didnt look closely but assumed per cpu skb_ext because of
> > the napi context - which will require a per cpu pointer to fetch it
> > later.
>
> The cache is just for allocation, specifically for the case where
> driver allocates the skb and it's quickly coalesced by GRO.
> The lifetime of the allocated object is not changed.

ah. That certainly works.

cheers,
jamal
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 3b590cfe33b8..ffbed5a92eab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -770,7 +770,7 @@  static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
 		struct mlx5_eswitch *esw;
 		u32 zone_restore_id;
 
-		tc_skb_ext = tc_skb_ext_alloc(skb);
+		tc_skb_ext = tc_skb_ext_alloc_napi(skb);
 		if (!tc_skb_ext) {
 			WARN_ON(1);
 			return false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 2d06b4412762..3d9da4ccaf5d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -5643,7 +5643,7 @@  bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
 
 	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
 		chain = mapped_obj.chain;
-		tc_skb_ext = tc_skb_ext_alloc(skb);
+		tc_skb_ext = tc_skb_ext_alloc_napi(skb);
 		if (WARN_ON(!tc_skb_ext))
 			return false;
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index ace437c6754b..82821a3f8a8b 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -764,6 +764,15 @@  static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
 		memset(tc_skb_ext, 0, sizeof(*tc_skb_ext));
 	return tc_skb_ext;
 }
+
+static inline struct tc_skb_ext *tc_skb_ext_alloc_napi(struct sk_buff *skb)
+{
+	struct tc_skb_ext *tc_skb_ext = napi_skb_ext_add(skb, TC_SKB_EXT);
+
+	if (tc_skb_ext)
+		memset(tc_skb_ext, 0, sizeof(*tc_skb_ext));
+	return tc_skb_ext;
+}
 #endif
 
 enum tc_matchall_command {