diff mbox series

[v2] skbuff: skb_segment, Call zero copy functions before using skbuff frags

Message ID 20230830232811.9876-1-mkhalfella@purestorage.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [v2] skbuff: skb_segment, Call zero copy functions before using skbuff frags | expand

Checks

Context Check Description
netdev/series_format warning Single patches do not need cover letters; Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1332 this patch: 1332
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 1353 this patch: 1353
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 1355 this patch: 1355
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 87 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Mohamed Khalfella Aug. 30, 2023, 11:28 p.m. UTC
Commit bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions
once per nskb") added the call to zero copy functions in skb_segment().
The change introduced a bug in skb_segment() because skb_orphan_frags()
may possibly change the number of fragments or allocate new fragments
altogether leaving nrfrags and frag to point to the old values. This can
cause a panic with stacktrace like the one below.

[  193.894380] BUG: kernel NULL pointer dereference, address: 00000000000000bc
[  193.895273] CPU: 13 PID: 18164 Comm: vh-net-17428 Kdump: loaded Tainted: G           O      5.15.123+ #26
[  193.903919] RIP: 0010:skb_segment+0xb0e/0x12f0
[  194.021892] Call Trace:
[  194.027422]  <TASK>
[  194.072861]  tcp_gso_segment+0x107/0x540
[  194.082031]  inet_gso_segment+0x15c/0x3d0
[  194.090783]  skb_mac_gso_segment+0x9f/0x110
[  194.095016]  __skb_gso_segment+0xc1/0x190
[  194.103131]  netem_enqueue+0x290/0xb10 [sch_netem]
[  194.107071]  dev_qdisc_enqueue+0x16/0x70
[  194.110884]  __dev_queue_xmit+0x63b/0xb30
[  194.121670]  bond_start_xmit+0x159/0x380 [bonding]
[  194.128506]  dev_hard_start_xmit+0xc3/0x1e0
[  194.131787]  __dev_queue_xmit+0x8a0/0xb30
[  194.138225]  macvlan_start_xmit+0x4f/0x100 [macvlan]
[  194.141477]  dev_hard_start_xmit+0xc3/0x1e0
[  194.144622]  sch_direct_xmit+0xe3/0x280
[  194.147748]  __dev_queue_xmit+0x54a/0xb30
[  194.154131]  tap_get_user+0x2a8/0x9c0 [tap]
[  194.157358]  tap_sendmsg+0x52/0x8e0 [tap]
[  194.167049]  handle_tx_zerocopy+0x14e/0x4c0 [vhost_net]
[  194.173631]  handle_tx+0xcd/0xe0 [vhost_net]
[  194.176959]  vhost_worker+0x76/0xb0 [vhost]
[  194.183667]  kthread+0x118/0x140
[  194.190358]  ret_from_fork+0x1f/0x30
[  194.193670]  </TASK>

In this case calling skb_orphan_frags() updated nr_frags leaving nrfrags
local variable in skb_segment() stale. This resulted in the code hitting
i >= nrfrags prematurely and trying to move to next frag_skb using
list_skb pointer, which was NULL, and caused kernel panic. Move the call
to zero copy functions before using frags and nr_frags.

Fixes: bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions once per nskb")
Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
Reported-by: Amit Goyal <agoyal@purestorage.com>
Cc: stable@vger.kernel.org
---
 net/core/skbuff.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

Comments

Eric Dumazet Aug. 31, 2023, 6:58 a.m. UTC | #1
On Thu, Aug 31, 2023 at 1:28 AM Mohamed Khalfella
<mkhalfella@purestorage.com> wrote:
>
> Commit bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions
> once per nskb") added the call to zero copy functions in skb_segment().
> The change introduced a bug in skb_segment() because skb_orphan_frags()
> may possibly change the number of fragments or allocate new fragments
> altogether leaving nrfrags and frag to point to the old values. This can
> cause a panic with stacktrace like the one below.
>
> [  193.894380] BUG: kernel NULL pointer dereference, address: 00000000000000bc
> [  193.895273] CPU: 13 PID: 18164 Comm: vh-net-17428 Kdump: loaded Tainted: G           O      5.15.123+ #26
> [  193.903919] RIP: 0010:skb_segment+0xb0e/0x12f0
> [  194.021892] Call Trace:
> [  194.027422]  <TASK>
> [  194.072861]  tcp_gso_segment+0x107/0x540
> [  194.082031]  inet_gso_segment+0x15c/0x3d0
> [  194.090783]  skb_mac_gso_segment+0x9f/0x110
> [  194.095016]  __skb_gso_segment+0xc1/0x190
> [  194.103131]  netem_enqueue+0x290/0xb10 [sch_netem]
> [  194.107071]  dev_qdisc_enqueue+0x16/0x70
> [  194.110884]  __dev_queue_xmit+0x63b/0xb30
> [  194.121670]  bond_start_xmit+0x159/0x380 [bonding]
> [  194.128506]  dev_hard_start_xmit+0xc3/0x1e0
> [  194.131787]  __dev_queue_xmit+0x8a0/0xb30
> [  194.138225]  macvlan_start_xmit+0x4f/0x100 [macvlan]
> [  194.141477]  dev_hard_start_xmit+0xc3/0x1e0
> [  194.144622]  sch_direct_xmit+0xe3/0x280
> [  194.147748]  __dev_queue_xmit+0x54a/0xb30
> [  194.154131]  tap_get_user+0x2a8/0x9c0 [tap]
> [  194.157358]  tap_sendmsg+0x52/0x8e0 [tap]
> [  194.167049]  handle_tx_zerocopy+0x14e/0x4c0 [vhost_net]
> [  194.173631]  handle_tx+0xcd/0xe0 [vhost_net]
> [  194.176959]  vhost_worker+0x76/0xb0 [vhost]
> [  194.183667]  kthread+0x118/0x140
> [  194.190358]  ret_from_fork+0x1f/0x30
> [  194.193670]  </TASK>
>
> In this case calling skb_orphan_frags() updated nr_frags leaving nrfrags
> local variable in skb_segment() stale. This resulted in the code hitting
> i >= nrfrags prematurely and trying to move to next frag_skb using
> list_skb pointer, which was NULL, and caused kernel panic. Move the call
> to zero copy functions before using frags and nr_frags.
>
> Fixes: bf5c25d60861 ("skbuff: in skb_segment, call zerocopy functions once per nskb")
> Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
> Reported-by: Amit Goyal <agoyal@purestorage.com>
> Cc: stable@vger.kernel.org
> ---
>  net/core/skbuff.c | 34 ++++++++++++++++++++--------------
>  1 file changed, 20 insertions(+), 14 deletions(-)
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index a298992060e6..18a33dc2d6af 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -4354,21 +4354,20 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>         struct sk_buff *segs = NULL;
>         struct sk_buff *tail = NULL;
>         struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
> -       skb_frag_t *frag = skb_shinfo(head_skb)->frags;
>         unsigned int mss = skb_shinfo(head_skb)->gso_size;
>         unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
> -       struct sk_buff *frag_skb = head_skb;
>         unsigned int offset = doffset;
>         unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
>         unsigned int partial_segs = 0;
>         unsigned int headroom;
>         unsigned int len = head_skb->len;
> +       struct sk_buff *frag_skb;
> +       skb_frag_t *frag;
>         __be16 proto;
>         bool csum, sg;
> -       int nfrags = skb_shinfo(head_skb)->nr_frags;
>         int err = -ENOMEM;
>         int i = 0;
> -       int pos;
> +       int nfrags, pos;
>
>         if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
>             mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
> @@ -4445,6 +4444,13 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>         headroom = skb_headroom(head_skb);
>         pos = skb_headlen(head_skb);
>
> +       if (skb_orphan_frags(head_skb, GFP_ATOMIC))
> +               return ERR_PTR(-ENOMEM);
> +
> +       nfrags = skb_shinfo(head_skb)->nr_frags;
> +       frag = skb_shinfo(head_skb)->frags;
> +       frag_skb = head_skb;
> +
>         do {
>                 struct sk_buff *nskb;
>                 skb_frag_t *nskb_frag;
> @@ -4465,6 +4471,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>                     (skb_headlen(list_skb) == len || sg)) {
>                         BUG_ON(skb_headlen(list_skb) > len);
>
> +                       nskb = skb_clone(list_skb, GFP_ATOMIC);
> +                       if (unlikely(!nskb))
> +                               goto err;
> +

This patch is quite complex to review, so I am asking if this part was
really needed ?
<1>  : You moved here <2> and <3>

If this is not strictly needed, please keep the code as is to ease
code review...

>                         i = 0;
>                         nfrags = skb_shinfo(list_skb)->nr_frags;
>                         frag = skb_shinfo(list_skb)->frags;
> @@ -4483,12 +4493,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>                                 frag++;
>                         }
>
> -                       nskb = skb_clone(list_skb, GFP_ATOMIC);

<2>

>                         list_skb = list_skb->next;
>
> -                       if (unlikely(!nskb))
> -                               goto err;
> -

<3>

>                         if (unlikely(pskb_trim(nskb, len))) {
>                                 kfree_skb(nskb);
>                                 goto err;
> @@ -4564,12 +4570,16 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>                 skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
>                                            SKBFL_SHARED_FRAG;
>
> -               if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> -                   skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
> +               if (skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC))

Why using list_skb here instead of frag_skb ?
Again, I have to look at the whole thing to understand why you did this.

>                         goto err;
>
>                 while (pos < offset + len) {
>                         if (i >= nfrags) {
> +                               if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
> +                                   skb_zerocopy_clone(nskb, list_skb,
> +                                                      GFP_ATOMIC))
> +                                       goto err;
> +

This part is fine.

>                                 i = 0;
>                                 nfrags = skb_shinfo(list_skb)->nr_frags;
>                                 frag = skb_shinfo(list_skb)->frags;
> @@ -4583,10 +4593,6 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>                                         i--;
>                                         frag--;
>                                 }
> -                               if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> -                                   skb_zerocopy_clone(nskb, frag_skb,
> -                                                      GFP_ATOMIC))
> -                                       goto err;
>
>                                 list_skb = list_skb->next;
>                         }
> --
> 2.17.1
>

Thanks.
Mohamed Khalfella Aug. 31, 2023, 7:29 a.m. UTC | #2
On 2023-08-31 08:58:51 +0200, Eric Dumazet wrote:
> On Thu, Aug 31, 2023 at 1:28 AM Mohamed Khalfella
> <mkhalfella@purestorage.com> wrote:
> >         do {
> >                 struct sk_buff *nskb;
> >                 skb_frag_t *nskb_frag;
> > @@ -4465,6 +4471,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> >                     (skb_headlen(list_skb) == len || sg)) {
> >                         BUG_ON(skb_headlen(list_skb) > len);
> >
> > +                       nskb = skb_clone(list_skb, GFP_ATOMIC);
> > +                       if (unlikely(!nskb))
> > +                               goto err;
> > +
> 
> This patch is quite complex to review, so I am asking if this part was
> really needed ?

Unfortunately the patch is complex because I try to avoid calling
skb_orphan_frags() in the middle of processing these frags. Otherwise
it would be much harder to implement because as reallocated frags do not
map 1:1 with existing frags as Willem mentioned.

> <1>  : You moved here <2> and <3>

<2> was moved here because skb_clone() calls skb_orphan_frags(). By
moving this up we do not need to call skb_orphan_frags() for list_skb
and we can start to use nr_frags and frags without worrying their value
is going to change.

<3> was moved here because <2> was moved here. Fail fast if we can not
clone list_skb.

> 
> If this is not strictly needed, please keep the code as is to ease
> code review...
> 
> >                         i = 0;
> >                         nfrags = skb_shinfo(list_skb)->nr_frags;
> >                         frag = skb_shinfo(list_skb)->frags;
> > @@ -4483,12 +4493,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> >                                 frag++;
> >                         }
> >
> > -                       nskb = skb_clone(list_skb, GFP_ATOMIC);
> 
> <2>
> 
> >                         list_skb = list_skb->next;
> >
> > -                       if (unlikely(!nskb))
> > -                               goto err;
> > -
> 
> <3>
> 
> >                         if (unlikely(pskb_trim(nskb, len))) {
> >                                 kfree_skb(nskb);
> >                                 goto err;
> > @@ -4564,12 +4570,16 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> >                 skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
> >                                            SKBFL_SHARED_FRAG;
> >
> > -               if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> > -                   skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
> > +               if (skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC))
> 
> Why using list_skb here instead of frag_skb ?
> Again, I have to look at the whole thing to understand why you did this.

Oops, this is a mistake. It should be frag_skb. Will fix it run the test
one more time and post v3.
Eric Dumazet Aug. 31, 2023, 7:43 a.m. UTC | #3
On Thu, Aug 31, 2023 at 9:30 AM Mohamed Khalfella
<mkhalfella@purestorage.com> wrote:
>
> On 2023-08-31 08:58:51 +0200, Eric Dumazet wrote:
> > On Thu, Aug 31, 2023 at 1:28 AM Mohamed Khalfella
> > <mkhalfella@purestorage.com> wrote:
> > >         do {
> > >                 struct sk_buff *nskb;
> > >                 skb_frag_t *nskb_frag;
> > > @@ -4465,6 +4471,10 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > >                     (skb_headlen(list_skb) == len || sg)) {
> > >                         BUG_ON(skb_headlen(list_skb) > len);
> > >
> > > +                       nskb = skb_clone(list_skb, GFP_ATOMIC);
> > > +                       if (unlikely(!nskb))
> > > +                               goto err;
> > > +
> >
> > This patch is quite complex to review, so I am asking if this part was
> > really needed ?
>
> Unfortunately the patch is complex because I try to avoid calling
> skb_orphan_frags() in the middle of processing these frags. Otherwise
> it would be much harder to implement because as reallocated frags do not
> map 1:1 with existing frags as Willem mentioned.
>
> > <1>  : You moved here <2> and <3>
>
> <2> was moved here because skb_clone() calls skb_orphan_frags(). By

Oh right, I think we should amend skb_clone() documentation, it is
slightly wrong.

( I will take care of this change)

> moving this up we do not need to call skb_orphan_frags() for list_skb
> and we can start to use nr_frags and frags without worrying their value
> is going to change.
>
> <3> was moved here because <2> was moved here. Fail fast if we can not
> clone list_skb.
>
> >
> > If this is not strictly needed, please keep the code as is to ease
> > code review...
> >
> > >                         i = 0;
> > >                         nfrags = skb_shinfo(list_skb)->nr_frags;
> > >                         frag = skb_shinfo(list_skb)->frags;
> > > @@ -4483,12 +4493,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > >                                 frag++;
> > >                         }
> > >
> > > -                       nskb = skb_clone(list_skb, GFP_ATOMIC);
> >
> > <2>
> >
> > >                         list_skb = list_skb->next;
> > >
> > > -                       if (unlikely(!nskb))
> > > -                               goto err;
> > > -
> >
> > <3>
> >
> > >                         if (unlikely(pskb_trim(nskb, len))) {
> > >                                 kfree_skb(nskb);
> > >                                 goto err;
> > > @@ -4564,12 +4570,16 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > >                 skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
> > >                                            SKBFL_SHARED_FRAG;
> > >
> > > -               if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
> > > -                   skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
> > > +               if (skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC))
> >
> > Why using list_skb here instead of frag_skb ?
> > Again, I have to look at the whole thing to understand why you did this.
>
> Oops, this is a mistake. It should be frag_skb. Will fix it run the test
> one more time and post v3.
diff mbox series

Patch

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a298992060e6..18a33dc2d6af 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4354,21 +4354,20 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 	struct sk_buff *segs = NULL;
 	struct sk_buff *tail = NULL;
 	struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
-	skb_frag_t *frag = skb_shinfo(head_skb)->frags;
 	unsigned int mss = skb_shinfo(head_skb)->gso_size;
 	unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
-	struct sk_buff *frag_skb = head_skb;
 	unsigned int offset = doffset;
 	unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
 	unsigned int partial_segs = 0;
 	unsigned int headroom;
 	unsigned int len = head_skb->len;
+	struct sk_buff *frag_skb;
+	skb_frag_t *frag;
 	__be16 proto;
 	bool csum, sg;
-	int nfrags = skb_shinfo(head_skb)->nr_frags;
 	int err = -ENOMEM;
 	int i = 0;
-	int pos;
+	int nfrags, pos;
 
 	if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
 	    mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
@@ -4445,6 +4444,13 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 	headroom = skb_headroom(head_skb);
 	pos = skb_headlen(head_skb);
 
+	if (skb_orphan_frags(head_skb, GFP_ATOMIC))
+		return ERR_PTR(-ENOMEM);
+
+	nfrags = skb_shinfo(head_skb)->nr_frags;
+	frag = skb_shinfo(head_skb)->frags;
+	frag_skb = head_skb;
+
 	do {
 		struct sk_buff *nskb;
 		skb_frag_t *nskb_frag;
@@ -4465,6 +4471,10 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 		    (skb_headlen(list_skb) == len || sg)) {
 			BUG_ON(skb_headlen(list_skb) > len);
 
+			nskb = skb_clone(list_skb, GFP_ATOMIC);
+			if (unlikely(!nskb))
+				goto err;
+
 			i = 0;
 			nfrags = skb_shinfo(list_skb)->nr_frags;
 			frag = skb_shinfo(list_skb)->frags;
@@ -4483,12 +4493,8 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 				frag++;
 			}
 
-			nskb = skb_clone(list_skb, GFP_ATOMIC);
 			list_skb = list_skb->next;
 
-			if (unlikely(!nskb))
-				goto err;
-
 			if (unlikely(pskb_trim(nskb, len))) {
 				kfree_skb(nskb);
 				goto err;
@@ -4564,12 +4570,16 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 		skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
 					   SKBFL_SHARED_FRAG;
 
-		if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
-		    skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+		if (skb_zerocopy_clone(nskb, list_skb, GFP_ATOMIC))
 			goto err;
 
 		while (pos < offset + len) {
 			if (i >= nfrags) {
+				if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
+				    skb_zerocopy_clone(nskb, list_skb,
+						       GFP_ATOMIC))
+					goto err;
+
 				i = 0;
 				nfrags = skb_shinfo(list_skb)->nr_frags;
 				frag = skb_shinfo(list_skb)->frags;
@@ -4583,10 +4593,6 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 					i--;
 					frag--;
 				}
-				if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
-				    skb_zerocopy_clone(nskb, frag_skb,
-						       GFP_ATOMIC))
-					goto err;
 
 				list_skb = list_skb->next;
 			}