diff mbox series

[net] gro: fix napi_gro_frags() Fast GRO breakage due to IP alignment check

Message ID 20210418114200.5839-1-alobakin@pm.me (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net] gro: fix napi_gro_frags() Fast GRO breakage due to IP alignment check | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net
netdev/subject_prefix success Link
netdev/cc_maintainers success CCed 12 of 12 maintainers
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 10 this patch: 10
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 32 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 10 this patch: 10
netdev/header_inline success Link

Commit Message

Alexander Lobakin April 18, 2021, 11:42 a.m. UTC
Commit 38ec4944b593 ("gro: ensure frag0 meets IP header alignment")
did the right thing, but missed the fact that napi_gro_frags() logics
calls for skb_gro_reset_offset() *before* pulling Ethernet header
to the skb linear space.
That said, the introduced check for frag0 address being aligned to 4
always fails for it as Ethernet header is obviously 14 bytes long,
and in case with NET_IP_ALIGN its start is not aligned to 4.

Fix this by adding @nhoff argument to skb_gro_reset_offset() which
tells if an IP header is placed right at the start of frag0 or not.
This restores Fast GRO for napi_gro_frags() that became very slow
after the mentioned commit, and preserves the introduced check to
avoid silent unaligned accesses.

Fixes: 38ec4944b593 ("gro: ensure frag0 meets IP header alignment")
Signed-off-by: Alexander Lobakin <alobakin@pm.me>
---
 net/core/dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

--
2.31.1

Comments

Eric Dumazet April 19, 2021, 11:05 a.m. UTC | #1
On Sun, Apr 18, 2021 at 1:43 PM Alexander Lobakin <alobakin@pm.me> wrote:
>
> Commit 38ec4944b593 ("gro: ensure frag0 meets IP header alignment")
> did the right thing, but missed the fact that napi_gro_frags() logics
> calls for skb_gro_reset_offset() *before* pulling Ethernet header
> to the skb linear space.
> That said, the introduced check for frag0 address being aligned to 4
> always fails for it as Ethernet header is obviously 14 bytes long,
> and in case with NET_IP_ALIGN its start is not aligned to 4.
>
> Fix this by adding @nhoff argument to skb_gro_reset_offset() which
> tells if an IP header is placed right at the start of frag0 or not.
> This restores Fast GRO for napi_gro_frags() that became very slow
> after the mentioned commit, and preserves the introduced check to
> avoid silent unaligned accesses.
>
> Fixes: 38ec4944b593 ("gro: ensure frag0 meets IP header alignment")
> Signed-off-by: Alexander Lobakin <alobakin@pm.me>
> ---
>  net/core/dev.c | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 1f79b9aa9a3f..965d5f9b6fee 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -5914,7 +5914,7 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
>         return head;
>  }
>
> -static void skb_gro_reset_offset(struct sk_buff *skb)
> +static void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
>  {
>         const struct skb_shared_info *pinfo = skb_shinfo(skb);
>         const skb_frag_t *frag0 = &pinfo->frags[0];
> @@ -5925,7 +5925,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
>
>         if (!skb_headlen(skb) && pinfo->nr_frags &&
>             !PageHighMem(skb_frag_page(frag0)) &&
> -           (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
> +           (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
>                 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
>                 NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
>                                                     skb_frag_size(frag0),
> @@ -6143,7 +6143,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
>         skb_mark_napi_id(skb, napi);
>         trace_napi_gro_receive_entry(skb);
>
> -       skb_gro_reset_offset(skb);
> +       skb_gro_reset_offset(skb, 0);
>
>         ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
>         trace_napi_gro_receive_exit(ret);
> @@ -6232,7 +6232,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
>         napi->skb = NULL;
>
>         skb_reset_mac_header(skb);
> -       skb_gro_reset_offset(skb);
> +       skb_gro_reset_offset(skb, hlen);
>
>         if (unlikely(skb_gro_header_hard(skb, hlen))) {
>                 eth = skb_gro_header_slow(skb, hlen, 0);
> --
> 2.31.1
>
>

Good catch, thanks.

This has the unfortunate effect of increasing code length on x86,
maybe we should have an exception to
normal rules so that skb_gro_reset_offset() is inlined.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Alexander Lobakin April 19, 2021, 12:37 p.m. UTC | #2
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 19 Apr 2021 13:05:16 +0200

> On Sun, Apr 18, 2021 at 1:43 PM Alexander Lobakin <alobakin@pm.me> wrote:
> >
> > Commit 38ec4944b593 ("gro: ensure frag0 meets IP header alignment")
> > did the right thing, but missed the fact that napi_gro_frags() logics
> > calls for skb_gro_reset_offset() *before* pulling Ethernet header
> > to the skb linear space.
> > That said, the introduced check for frag0 address being aligned to 4
> > always fails for it as Ethernet header is obviously 14 bytes long,
> > and in case with NET_IP_ALIGN its start is not aligned to 4.
> >
> > Fix this by adding @nhoff argument to skb_gro_reset_offset() which
> > tells if an IP header is placed right at the start of frag0 or not.
> > This restores Fast GRO for napi_gro_frags() that became very slow
> > after the mentioned commit, and preserves the introduced check to
> > avoid silent unaligned accesses.
> >
> > Fixes: 38ec4944b593 ("gro: ensure frag0 meets IP header alignment")
> > Signed-off-by: Alexander Lobakin <alobakin@pm.me>
> > ---
> >  net/core/dev.c | 8 ++++----
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index 1f79b9aa9a3f..965d5f9b6fee 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -5914,7 +5914,7 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
> >         return head;
> >  }
> >
> > -static void skb_gro_reset_offset(struct sk_buff *skb)
> > +static void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
> >  {
> >         const struct skb_shared_info *pinfo = skb_shinfo(skb);
> >         const skb_frag_t *frag0 = &pinfo->frags[0];
> > @@ -5925,7 +5925,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
> >
> >         if (!skb_headlen(skb) && pinfo->nr_frags &&
> >             !PageHighMem(skb_frag_page(frag0)) &&
> > -           (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
> > +           (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
> >                 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
> >                 NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
> >                                                     skb_frag_size(frag0),
> > @@ -6143,7 +6143,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
> >         skb_mark_napi_id(skb, napi);
> >         trace_napi_gro_receive_entry(skb);
> >
> > -       skb_gro_reset_offset(skb);
> > +       skb_gro_reset_offset(skb, 0);
> >
> >         ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
> >         trace_napi_gro_receive_exit(ret);
> > @@ -6232,7 +6232,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
> >         napi->skb = NULL;
> >
> >         skb_reset_mac_header(skb);
> > -       skb_gro_reset_offset(skb);
> > +       skb_gro_reset_offset(skb, hlen);
> >
> >         if (unlikely(skb_gro_header_hard(skb, hlen))) {
> >                 eth = skb_gro_header_slow(skb, hlen, 0);
> > --
> > 2.31.1
> >
> >
>
> Good catch, thanks.
>
> This has the unfortunate effect of increasing code length on x86,
> maybe we should have an exception to
> normal rules so that skb_gro_reset_offset() is inlined.

Agree. To mitigate this codegrowth we either go with NET_IP_ALIGN
ifdeffery or just inline skb_gro_reset_offset(). The function is
tiny itself, I don't see a reason to not do this.
Will drop v2 in a moment.

> Reviewed-by: Eric Dumazet <edumazet@google.com>

Thanks,
Al
diff mbox series

Patch

diff --git a/net/core/dev.c b/net/core/dev.c
index 1f79b9aa9a3f..965d5f9b6fee 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5914,7 +5914,7 @@  static struct list_head *gro_list_prepare(struct napi_struct *napi,
 	return head;
 }

-static void skb_gro_reset_offset(struct sk_buff *skb)
+static void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
 {
 	const struct skb_shared_info *pinfo = skb_shinfo(skb);
 	const skb_frag_t *frag0 = &pinfo->frags[0];
@@ -5925,7 +5925,7 @@  static void skb_gro_reset_offset(struct sk_buff *skb)

 	if (!skb_headlen(skb) && pinfo->nr_frags &&
 	    !PageHighMem(skb_frag_page(frag0)) &&
-	    (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
+	    (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
 		NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
 		NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
 						    skb_frag_size(frag0),
@@ -6143,7 +6143,7 @@  gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	skb_mark_napi_id(skb, napi);
 	trace_napi_gro_receive_entry(skb);

-	skb_gro_reset_offset(skb);
+	skb_gro_reset_offset(skb, 0);

 	ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
 	trace_napi_gro_receive_exit(ret);
@@ -6232,7 +6232,7 @@  static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 	napi->skb = NULL;

 	skb_reset_mac_header(skb);
-	skb_gro_reset_offset(skb);
+	skb_gro_reset_offset(skb, hlen);

 	if (unlikely(skb_gro_header_hard(skb, hlen))) {
 		eth = skb_gro_header_slow(skb, hlen, 0);