Message ID | 77f3e358-c75e-b0bf-ca87-6f8297f5593c@virtuozzo.com (mailing list archive) |
---|---|
State | Not Applicable |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | skbuff: introduce skb_expand_head() | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Guessing tree name failed - patch did not apply |
(resend without html - thanks gmail web-interface...) On Fri, Aug 20, 2021 at 3:41 PM Christoph Paasch <christoph.paasch@gmail.com> wrote: > > Hello, > > On Fri, Aug 6, 2021 at 1:18 AM Vasily Averin <vvs@virtuozzo.com> wrote: > > > > Unlike skb_realloc_headroom, new helper skb_expand_head > > does not allocate a new skb if possible. > > > > Additionally this patch replaces commonly used dereferencing with variables. > > > > Signed-off-by: Vasily Averin <vvs@virtuozzo.com> > > --- > > net/ipv6/ip6_output.c | 27 +++++++++++---------------- > > 1 file changed, 11 insertions(+), 16 deletions(-) > > > > diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c > > index 7d2ec25..f91d13a 100644 > > --- a/net/ipv6/ip6_output.c > > +++ b/net/ipv6/ip6_output.c > > @@ -249,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, > > const struct ipv6_pinfo *np = inet6_sk(sk); > > struct in6_addr *first_hop = &fl6->daddr; > > struct dst_entry *dst = skb_dst(skb); > > + struct net_device *dev = dst->dev; > > + struct inet6_dev *idev = ip6_dst_idev(dst); > > unsigned int head_room; > > struct ipv6hdr *hdr; > > u8 proto = fl6->flowi6_proto; > > @@ -256,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, > > int hlimit = -1; > > u32 mtu; > > > > - head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); > > + head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev); > > if (opt) > > head_room += opt->opt_nflen + opt->opt_flen; > > > > - if (unlikely(skb_headroom(skb) < head_room)) { > > - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); > > - if (!skb2) { > > - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), > > - IPSTATS_MIB_OUTDISCARDS); > > - kfree_skb(skb); > > + if (unlikely(head_room > skb_headroom(skb))) { > > + skb = skb_expand_head(skb, head_room); > > + if (!skb) { > > + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); > > > this change introduces a panic on my syzkaller instance: > > ------------[ cut here ]------------ > WARNING: CPU: 0 PID: 1832 at net/core/skbuff.c:5412 skb_try_coalesce+0x1019/0x12c0 net/core/skbuff.c:5412 > Modules linked in: > CPU: 0 PID: 1832 Comm: syz-executor.0 Not tainted 5.14.0-rc4ab492b0cda378661ae004e2fd66cfd1be474438d #102 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 > RIP: 0010:skb_try_coalesce+0x1019/0x12c0 net/core/skbuff.c:5412 > Code: 24 20 bf 01 00 00 00 8b 40 20 44 0f b7 f0 44 89 f6 e8 ab 41 c0 fe 41 83 ee 01 0f 85 01 f3 ff ff e9 42 f6 ff ff e8 07 3c c0 fe <0f> 0b e9 7b f9 ff ff e8 fb 3b c0 fe 48 8b 44 24 40 48 8d 70 ff 4c > RSP: 0018:ffffc90002d97530 EFLAGS: 00010293 > RAX: 0000000000000000 RBX: 0000000000000e00 RCX: 0000000000000000 > RDX: ffff88810a27bc00 RSI: ffffffff8276b6c9 RDI: 0000000000000003 > RBP: ffff88810a17f9e0 R08: 0000000000000e00 R09: 0000000000000000 > R10: ffffffff8276b042 R11: 0000000000000000 R12: ffff88810a17f760 > R13: ffff888108fc6ac0 R14: 0000000000001000 R15: ffff88810a17f7d6 > FS: 00007f6be8546700(0000) GS:ffff88811b400000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 0000000000000000 CR3: 000000010a4f0005 CR4: 0000000000170ef0 > Call Trace: > tcp_try_coalesce net/ipv4/tcp_input.c:4642 [inline] > tcp_try_coalesce+0x312/0x870 net/ipv4/tcp_input.c:4621 > tcp_queue_rcv+0x73/0x670 net/ipv4/tcp_input.c:4905 > tcp_data_queue+0x11e5/0x4af0 net/ipv4/tcp_input.c:5016 > tcp_rcv_established+0x83a/0x1d30 net/ipv4/tcp_input.c:5928 > tcp_v6_do_rcv+0x438/0x1380 net/ipv6/tcp_ipv6.c:1517 > sk_backlog_rcv include/net/sock.h:1024 [inline] > __release_sock+0x1ad/0x310 net/core/sock.c:2669 > release_sock+0x54/0x1a0 net/core/sock.c:3193 > tcp_sendmsg+0x36/0x40 net/ipv4/tcp.c:1462 > inet6_sendmsg+0xb5/0x140 net/ipv6/af_inet6.c:646 > sock_sendmsg_nosec net/socket.c:704 [inline] > sock_sendmsg net/socket.c:724 [inline] > ____sys_sendmsg+0x3b5/0x970 net/socket.c:2403 > ___sys_sendmsg+0xff/0x170 net/socket.c:2457 > __sys_sendmmsg+0x192/0x440 net/socket.c:2543 > __do_sys_sendmmsg net/socket.c:2572 [inline] > __se_sys_sendmmsg net/socket.c:2569 [inline] > __x64_sys_sendmmsg+0x98/0x100 net/socket.c:2569 > do_syscall_x64 arch/x86/entry/common.c:50 [inline] > do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80 > entry_SYSCALL_64_after_hwframe+0x44/0xae > RIP: 0033:0x7f6be7e55469 > Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ff 49 2b 00 f7 d8 64 89 01 48 > RSP: 002b:00007f6be8545da8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 > RAX: ffffffffffffffda RBX: 0000000000000133 RCX: 00007f6be7e55469 > RDX: 0000000000000003 RSI: 00000000200008c0 RDI: 0000000000000003 > RBP: 0000000000000133 R08: 0000000000000000 R09: 0000000000000000 > R10: 0000000040044040 R11: 0000000000000246 R12: 000000000069bf8c > R13: 00007ffe013f968f R14: 00007f6be8526000 R15: 0000000000000003 > ---[ end trace 60453d9d261151ca ]--- > > (syzkaller-reproducer at the end of this email) > > AFAICS, this is because pskb_expand_head (called from skb_expand_head) is not adjusting skb->truesize when skb->sk is set (which I guess is the case in this particular scenario). I'm not sure what the proper fix would be though... > > > Reproducer: > > # {Threaded:true Collide:true Repeat:true RepeatTimes:0 Procs:1 Slowdown:1 Sandbox:none Fault:false FaultCall:-1 FaultNth:0 Leak:false NetInjection:true NetDevices:true NetReset:true Cgroups:true BinfmtMisc:true CloseFDs:true KCSAN:false DevlinkPCI:false USB:false VhciInjection:false Wifi:false IEEE802154:false Sysctl:true UseTmpDir:true HandleSegv:true Repro:false Trace:false} > r0 = socket$inet6_tcp(0xa, 0x1, 0x0) > bind$inet6(r0, &(0x7f0000000080)={0xa, 0x4e22, 0x0, @loopback}, 0x1c) > sendmmsg$inet6(r0, &(0x7f0000002940)=[{{&(0x7f00000000c0)={0xa, 0x4e22, 0x0, @empty}, 0x1c, 0x0}}], 0x36, 0x20000145) > r1 = socket$inet6_icmp_raw(0xa, 0x3, 0x3a) > r2 = socket$inet_tcp(0x2, 0x1, 0x0) > ioctl$sock_SIOCGIFINDEX(r2, 0x8933, 0x0) > setsockopt$inet6_mreq(r1, 0x29, 0x1b, 0x0, 0x0) > sendmmsg$inet6(r0, &(0x7f00000008c0)=[{{0x0, 0x0, &(0x7f0000000240)=[{&(0x7f0000000040)="11c2e854", 0x4}, {0x0}], 0x2}}, {{0x0, 0x0, &(0x7f0000000580)=[{0x0}, {&(0x7f0000001800)="bcfad31884cb1c6226004dd6ed929a7fa308da79249cdfcf5447732df714b21f9fa725d49453be964002a469aff676404855809cf7d7f8fad8ff26c30a9aa692c5ba1c3bf622d795da6efd5425e52f43e8a01e98fec8c4079d0f711b7b02666cc4046eed001f62377a14142ee1004708bc6b57ed028e4a0f8af459fa89ec7cd3098c32cbe69625cd654a3aca8814c9426a817da1b631828f40c0ba2227e8030aabba4eeaeea5617df6fbc54918905137032bb60cb86a7ebd4b8b1f8428085bfc5749a3a986dfca6f0e40b2cc32121cdd53ab4815cdf32dca5d1ee64d0e11894d07ad78f5ee5e7701f4803c49d6fdf927a0c103cf5ba9293e232ebd5661fb950f1cacef864673b79c7a180010889f0a8c9d69a97e3f6a88a9180fcc61631ffa42332f68a8ae78982b5e232decca2a4259a05e96c7fae309468a364798b7343354bdddfe1d81c3393556ced79b92f7c8f9455c1e4deb7cac7d81fdc3d72f201b711a253c2bf4df9f9bcb0ebe51edd3bb62a9440c0c88dc0b7ed6aaa2fdadd868845865cf6c40ba21222123894323fdd0ae8f8fc896a1c4b77431655d77750578db7e6d01380a7b5f5326cd8dd0091ac0a5526b9d1e9318dbfd2ac7227b1779674167c23fa8c59fcc44ab0c117788aa6a1c2baa0fa314093cfa79474e26b09d305c4b20d8f20150feb861ab750f1dbd7c4081ede2b245c14f7152f46352db5b0e4a3e676d23a76ea6363f97a35b3df2ced972f9daec0a5f6f13d5093250a93b31f4fafb1fa783cd99504ba8ac5fbfb4af9ff108a891e3786964ae5f00bc52496db9cf162b0efaded2459a3c1c1fbc60182b62a430a8eef93c6673db2596256f2b85f41975a31908dfd27eced4b169c99fbe79f59e61c8d3aef4e23f3bb665a0f25a09ce19a15f47734b817b1451f32418b1d9d801c64339b4fa2147e794dec9909569abcabeb9785991e32eb9835da449617c9c050000cf83fc6297e908d3708784a9c0599d3df55055123ef6037c5f8223acbeb0413610af573904069b64057d73a95e76f53939d6a144b411d73b7ae09bddc8c28e2484e03f781cce9df821c727facfd7d158a63f466d183e7be32c921176f574258d252bf2c15f1bcb6498ef252315a360ccd4ce7e5388d66462c51885a04211ed9ab097f66e31f6bb704902b74d6afd96f05b7a5de5eaa9d6ba4c700e7ad2e686b7241cf57358937e1b526f34d8522b761ca596b77e74ebae352ffbc9ee9d98b40ea1b1f7e24b3037d0630cd3ede29d4cd960fe0c17102c06e0a7f993b99f961384698dfd82c813cb862779d68fd8f6828a7c6e0bc8f9e3798ca7c3d5f287f8b63b65f6db8fa706e8a378166486b8c8fadfa06a4f0a21e8d6dc8ed230c8b5cde27a48742700b43ba4dce5e3da897b2bf1fc5832d07ac3482a9834b30c0371f517b9f0ae0706a12b87dcb3555e08df00cc0a83d7060c06edd7b8b8c6199398ee6bea33a4690e8f9455f54a6ef9d4b69d39c55a69d6fed3d7a376ceb51bd032b9b76a08ce48c5a2af8702922163a57835133e9968f8a1e4401479ec8a83a42f1c425840cdcd52483d8432c897ae75e8f68df55e5a5c8c16b4d071e92be22eb23f35d58cb86cc1b0c494e96723f5c50e441b81a7f1fca08faaefec8003fddb967381c542c5ac6c18cc7e4f4e61c8aae4ae90f157ae283a9fe8d02b9f52dc7bbf80e47cc6e3e2e6ee9008e8b82cb7ee672b11be1a3874aa63586fb242378f59251990cd8d944d8e46d84e76cfe9dbffe2562d29ed94f5a314062fc8f496bdb38a426f2ab0525418625727f329ca1cb8cd30ae9013829136c025892c2614d1fa0edf019c8e7f3a9ef50ec0483f3d70daec362b603a89b6a7049310b4776e1bb884f8d44a74b2a3da056e3ba8f3425f7a4628f3f140aa12fcd779ea01e22eff2c670759971ed8e11570813835401484537c0075e897863ae690c420b1c4d90ea4e90d84b220511bbba8e21869610e4ff8d2130b09a40a0cf49715275a01ecc438d6fcdfcb189d602d964a1a39ee375067d82c63dc409fab9715ee86cbbf5cdc9b906b66b40e4d2180cd1d03229bd42b2ea6359d8a127d6156d19cc638e4811b55871c39b370121bc0da9b29d3be162572f672f3f1cb9aac5dba4cb9f57062a27b95f5db96b7ea16beb7df8d5642ca2695aa0dccec99bc1b4efbdbe4ceb7febca4b1846b1486ee11383ca9c802428a9a8ac308bfa106dd8a5c87cceb4e1a700626a7ceb074d5628a6be3a96ff59851f797e375f42c7c452d66232f076f5c0045d06f5ee1e39f48b1b4285e569e574136de2d3412dc6f24e5e7024739f1fc5aa71c41e8eaa88c0094ca7ae5b96a20beaec44223d2df1cf650ef924b58e71f7ddf82b18a7d13d8eccfaea41e17ba7c65923d6dea3a7834113f446eaac407c371dc80841a85a4aa0ef2f4d49a204f626ed1c4921d61c50f6bd938359e8fec4e7d03dd6edb0b29437e3e121b65852d5cda472a99a29de0a56db2aa6aca87a9d6bcf1bbe6783735b67807c42a399b0757390d872a26e73244df25b5a6870b2da91e45e5168f8993cf7ca209df04c49cfb6eed24cfc343864cfb997490ac5b0b39d2e3bfa453d54d36546cd034d542ba38f66fe87cd4f89d08ca059e721272648c550481d952a0a47fd15d8fbe4a23303978b813011deef566317f71d9b18a7cb2283c44be2ed1a05ca9c3daf6c28f782f6c3593536d48253243a8995e8b9a30d83b7733bc166d4791bfd1555dcdf44c297ef726aaa47cbebdeae8a7288e20fae87b92fa44c78158501526afc5a7e0041b48c55881f9031d2ae4f482b8f44ce5297113bbd217d081162b26811b4d08f0fb9d76d5f179e5af344f73d62742bf871920121e26796eb67d059b49f5850475e02723c9e84a5578d5610f1cb3b1055b6333d59df391dbf67f6660f2d3aaa9a601d44cec1bb4199b1130d373a5a53ca81e16c374400976b39967bc7310ba801a225e44da319744a1e7e2b2261dedcbb31dda7105de586baf881f88bc7e0069b42fb3ebae8fa63e39609a1f596ba219da2ef61d69976d3175e47769bc1ba2b3cd064db3ff4d7ed0d87ee7054aa7e111aae93592dd6203e11e87bcaf8c43cd803282d0d25ba9b043ea8cbe26f585643d504d6f4f66848c881c066e9f10057e2c773b1e5abf2e56a4f07d35fc29f7fe98dc5fc26507406ca2901ebfae6e68715f6ca4d73555dea5f6ec77661ab9cd7335276eda1f28606e8dbd9b04fe17f53ffc3bedab5d800860f1a2c8bd4909f3b98cc7e7bda7a7e46deeff86c756e3b7d40067ca35f867bb5456ea61599e95916397f72404b2bac726dd5c1a5042eed92bd2988365405d17dde91c214c12263e976356d6131a2a5269cdfb1aefa6dc60b9f522ec2a619b2cc58baf8fadd52f43892d12d27498023e18390db603a3fe5e2f3e14491c1aed2ddaf47e717decfa6877aeed9fea0c575223cb062ceab83a20e99b49a0581e0fb25ba1eae88d5b229eb5320d888652acae4955c3bbe94de43fa99476c9058250847381ee4d85e079c190fadfa0776318ed00f7019a4010f2eb8777c7499456b8748991d4d4c6590cf903a237a81c0af16c4cec87ac49bc7787c2c05ea56501c9cddc2c4ba884ab26a51639e4b77fa5a2488ab842b8ef6d0fb1d3bea710ad1528a1e78a18e37b4a0ed803ba29d9ddf1ca7e8bd56d1588dbb9e3995493959adaa334acd611ac8d266a3199b2ecb9958366d785946722bc9507ab420d447dfadd0274a8e2a25d290c598d65145956c1c8542d07bc0a15efa0a9daed2f2fe71f60b4b657b34a3e897749a49bd85c2a0d915af6a38e8de62df1fcb4e991d04ce4c43eef97c4b3e9de11d528eb9cb923115efc681d0a2a0a714ad923086961c07335e152ec83b65ddb43b8865984c36de76f19bdb6e96566b741fb1c755d012c5377b196bed875e9a08bb7d4fd3160fddc71b9bb55a7199183529cda7a3624077a77b3a2b0367845d23e42dbc4130dcef4cc855556f833b94794b1a4d28d7503113ed4f8ec7b3578b418640d49b01e6a75b456944377a8d892eab38037705e010e32cf7074784dd42f42a92d18c0c26eb4c23a12c1697215aaa41ca92dd59f9816168bc7c9275c256cc689c03809f40125a1c17063a7696ab30dc32d8dcddf8e4e2983a883f11d64b485310c71e5533399d928da94b4ce9a8178517477345bc26b6ba838dfc0b0d9f8542f93183f68e4660e17a8bc92dea290bb718f1d0e89e385ad8ac2f3cc9ad8dfa08e62501b3bd6597574f98bfa39b901daa1e8c56dcddf87d49cf4b1345ae352b1aa72ca62fdeb39abcad462c4045fa1daaba17047d1b91bec23145d4090a7eca6f1510d88beab5a2625fdf773f16293b77eec703bdc13504550f8d74546b89ac056419fb12ded672a2cd4efee157689a704eb8511fab3c12d29e025184b6f329fe4b6226f187b0c009e1b5cfc249791a6c96bee7766276f7e90163f0c11f6c1d04a32e3128d1e3eb95e167f0bdf4b927b775d7fe01aa2174d43a8fb7b8ae4b5c1e6c7e7b7a6bc7f19a70f7ef107a6406e69cb60047aee0d3d0ed4b1a6605bc1eba9ea2664edf145d2422dcc47e26c6f000071a15cbd62c16983bf8a5fef08aeea48d6a477654aacdada917efee28a79897a7b9280025a370d2ccbadd639c3314a7e3cc12298da71e2ca40833f1b88d44e6693049baee1b0789a619815aee3707703963e296be3171854411f874a01f1c6c9f9bb6e4932e13b791280aed0cca54c3dd77033a8c3836a0306e13dabfdfa96ddcb9fa1de806419c1eaac65d7b601417b011ce50fd6257ae97f883faacfc2ce7a5fec477793f2bfa24f51d542f7d5d4828e7efc455af4f4d4e60f72c6598b72751dc058169e5d13b14b91b5849463f688c10d9d4a8f566ee18620a4541dce68411d3746b8b4ef4f891c87cbc536e8e99b828c23732fb1484fd03e23bc0affde95d7c0c6fc0407d1990b55296c4e32f8c0387b965ee20482d9e666f3caa2167218d6a48305b37430835e839b62960fab96ec61e5b49c345fe8c07f7b71ba3b82bef5701de404461a1aaa01331913dc03e27ed36c7f14f0c82057f477b73e9ee20621a5c70df091a2da470db9602aa17f281a15f644a2a85534040dba67d2a6473503453e133098205fb53a3c20fdafa508a82e9c8d172b2eed24afb1c47890ace2eb48ee9e2bc2488e47bf18e69676b32087e7abf3d1b918bd9ac24338d5ae54c0f2c95b0f969f44edac2b552b611ae2b415cfed467ec989e7a139a72b0d3c68e20dd307500bbd7f4e079f6c1486bdb31f648c50c1c4b58e6be618ac6257ddb2558af9f24be2ae415e8edc50197dda4a1178ea4de53bbd66e819173574fecaaa5540189538762f472bd562f1d87f64fc39008d4a3f85cd1b3f01508a42a3047976f6f1dcd4c1d942349967dfd633a6e56de55a5e0f2ad68847e7f1e3a08813dc7483db90cf417e7388aab6e4806628e6b9ab980a12b9ca6b345845f043d9b31cfbba9a17517cb0d421e2db467e991c2584f625dd1884126261e3f455c4f44c15380d43dc7e3e8fc69086395d9535f094432115b3d1b643178aadcf0919d85c3faddbf631ddd50c322a3e489310de21ec2c3026721f9301a34acfe9d65326f7f7ad54b6ad6eaa978b739407105d2d4eb869fff3e2de7585a7fed747493bd65537120cac03e3b48458ddbdbc5ba3382b6040d4863f4d3fd783276e01a3a9a5f05067d1d101ec424e6fb179cf9bcad8f5536b63dd63248ee411ce4b79b70fa7e8a619714646ff2fd557", 0x1000}, {0x0}, {0x0}], 0x4}}, {{0x0, 0x0, 0x0}}], 0x3, 0x40044040) > setsockopt$inet6_IPV6_HOPOPTS(r0, 0x29, 0x36, &(0x7f0000000640)={0x0, 0x11, '\x00', [@calipso={0x7, 0x40, {0x0, 0xe, 0xb, 0x101, [0x5, 0x4ebd, 0x5d, 0x3, 0x80, 0x7, 0x8]}}, @calipso={0x7, 0x10, {0x0, 0x2, 0x6, 0x0, [0x0]}}, @calipso={0x7, 0x28, {0x2, 0x8, 0x9, 0x6, [0x0, 0x80000001, 0x5d53, 0x9]}}, @calipso={0x7, 0x8, {0x2, 0x0, 0x3, 0x5}}]}, 0x90) > > > > Christoph >
On 8/21/21 1:44 AM, Christoph Paasch wrote: > (resend without html - thanks gmail web-interface...) > On Fri, Aug 20, 2021 at 3:41 PM Christoph Paasch >> AFAICS, this is because pskb_expand_head (called from >> skb_expand_head) is not adjusting skb->truesize when skb->sk is set >> (which I guess is the case in this particular scenario). I'm not >> sure what the proper fix would be though... Could you please elaborate? it seems to me skb_realloc_headroom used before my patch called pskb_expand_head() too and did not adjusted skb->truesize too. Am I missed something perhaps? The only difference in my patch is that skb_clone can be not called, though I do not understand how this can affect skb->truesize. Thank you, Vasily Averin
Hello Vasily, On Fri, Aug 20, 2021 at 11:21 PM Vasily Averin <vvs@virtuozzo.com> wrote: > > On 8/21/21 1:44 AM, Christoph Paasch wrote: > > (resend without html - thanks gmail web-interface...) > > On Fri, Aug 20, 2021 at 3:41 PM Christoph Paasch > >> AFAICS, this is because pskb_expand_head (called from > >> skb_expand_head) is not adjusting skb->truesize when skb->sk is set > >> (which I guess is the case in this particular scenario). I'm not > >> sure what the proper fix would be though... > > Could you please elaborate? > it seems to me skb_realloc_headroom used before my patch called pskb_expand_head() too > and did not adjusted skb->truesize too. Am I missed something perhaps? > > The only difference in my patch is that skb_clone can be not called, > though I do not understand how this can affect skb->truesize. I *believe* that the difference is that after skb_clone() skb->sk is NULL and thus truesize will be adjusted. I will try to confirm that with some more debugging. Christoph > > Thank you, > Vasily Averin
On Sun, Aug 22, 2021 at 10:04 AM Christoph Paasch <christoph.paasch@gmail.com> wrote: > > Hello Vasily, > > On Fri, Aug 20, 2021 at 11:21 PM Vasily Averin <vvs@virtuozzo.com> wrote: > > > > On 8/21/21 1:44 AM, Christoph Paasch wrote: > > > (resend without html - thanks gmail web-interface...) > > > On Fri, Aug 20, 2021 at 3:41 PM Christoph Paasch > > >> AFAICS, this is because pskb_expand_head (called from > > >> skb_expand_head) is not adjusting skb->truesize when skb->sk is set > > >> (which I guess is the case in this particular scenario). I'm not > > >> sure what the proper fix would be though... > > > > Could you please elaborate? > > it seems to me skb_realloc_headroom used before my patch called pskb_expand_head() too > > and did not adjusted skb->truesize too. Am I missed something perhaps? > > > > The only difference in my patch is that skb_clone can be not called, > > though I do not understand how this can affect skb->truesize. > > I *believe* that the difference is that after skb_clone() skb->sk is > NULL and thus truesize will be adjusted. > > I will try to confirm that with some more debugging. Yes indeed. Before your patch: [ 19.154039] ip6_xmit before realloc truesize 4864 sk? 000000002ccd6868 [ 19.155230] ip6_xmit after realloc truesize 5376 sk? 0000000000000000 skb->sk is not set and thus truesize will be adjusted. With your change: [ 15.092933] ip6_xmit before realloc truesize 4864 sk? 00000000072930fd [ 15.094131] ip6_xmit after realloc truesize 4864 sk? 00000000072930fd skb->sk is set and thus truesize is not adjusted. Christoph > > > Christoph > > > > > Thank you, > > Vasily Averin
On 8/22/21 8:13 PM, Christoph Paasch wrote: > On Sun, Aug 22, 2021 at 10:04 AM Christoph Paasch > <christoph.paasch@gmail.com> wrote: >> >> Hello Vasily, >> >> On Fri, Aug 20, 2021 at 11:21 PM Vasily Averin <vvs@virtuozzo.com> wrote: >>> >>> On 8/21/21 1:44 AM, Christoph Paasch wrote: >>>> (resend without html - thanks gmail web-interface...) >>>> On Fri, Aug 20, 2021 at 3:41 PM Christoph Paasch >>>>> AFAICS, this is because pskb_expand_head (called from >>>>> skb_expand_head) is not adjusting skb->truesize when skb->sk is set >>>>> (which I guess is the case in this particular scenario). I'm not >>>>> sure what the proper fix would be though... >>> >>> Could you please elaborate? >>> it seems to me skb_realloc_headroom used before my patch called pskb_expand_head() too >>> and did not adjusted skb->truesize too. Am I missed something perhaps? >>> >>> The only difference in my patch is that skb_clone can be not called, >>> though I do not understand how this can affect skb->truesize. >> >> I *believe* that the difference is that after skb_clone() skb->sk is >> NULL and thus truesize will be adjusted. >> >> I will try to confirm that with some more debugging. > > Yes indeed. > > Before your patch: > [ 19.154039] ip6_xmit before realloc truesize 4864 sk? 000000002ccd6868 > [ 19.155230] ip6_xmit after realloc truesize 5376 sk? 0000000000000000 > > skb->sk is not set and thus truesize will be adjusted. This looks strange for me. skb should not lost sk reference. Could you please clarify where exactly you cheked it? sk on newly allocated skb is set on line 291 net/ipv6/ip6_output.c::ip6_xmit() 282 if (unlikely(skb_headroom(skb) < head_room)) { 283 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); 284 if (!skb2) { 285 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 286 IPSTATS_MIB_OUTDISCARDS); 287 kfree_skb(skb); 288 return -ENOBUFS; 289 } 290 if (skb->sk) 291 skb_set_owner_w(skb2, skb->sk); <<<<< here 292 consume_skb(skb); 293 skb = skb2; 294 } > With your change: > [ 15.092933] ip6_xmit before realloc truesize 4864 sk? 00000000072930fd > [ 15.094131] ip6_xmit after realloc truesize 4864 sk? 00000000072930fd > > skb->sk is set and thus truesize is not adjusted. In this case skb_set_owner_w() is called inside skb_expand_head() net/ipv6/ip6_output.c::ip6_xmit() 265 if (unlikely(head_room > skb_headroom(skb))) { 266 skb = skb_expand_head(skb, head_room); 267 if (!skb) { 268 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 269 return -ENOBUFS; 270 } 271 } net/core/skbuff.c::skb_expand_head() 1813 if (skb_shared(skb)) { 1814 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 1815 1816 if (likely(nskb)) { 1817 if (skb->sk) 1818 skb_set_owner_w(nskb, skb->sk); <<<< here 1819 consume_skb(skb); 1820 } else { 1821 kfree_skb(skb); 1822 } 1823 skb = nskb; 1824 } So I do not understand how this can happen. With my patch: a) if skb is not shared -- it should keep original skb->sk b) if skb is shared -- new skb should set sk if it was set on original skb. Your results can be explained if you looked and skb->sk and truesize right after skb_realloc_headroom() call but before following skb_set_owner_w(). Could you please check it? Thank you, Vasily Averin
On 8/23/21 8:44 AM, Vasily Averin wrote: > On 8/22/21 8:13 PM, Christoph Paasch wrote: >> On Sun, Aug 22, 2021 at 10:04 AM Christoph Paasch >> <christoph.paasch@gmail.com> wrote: >>> >>> Hello Vasily, >>> >>> On Fri, Aug 20, 2021 at 11:21 PM Vasily Averin <vvs@virtuozzo.com> wrote: >>>> >>>> On 8/21/21 1:44 AM, Christoph Paasch wrote: >>>>> (resend without html - thanks gmail web-interface...) >>>>> On Fri, Aug 20, 2021 at 3:41 PM Christoph Paasch >>>>>> AFAICS, this is because pskb_expand_head (called from >>>>>> skb_expand_head) is not adjusting skb->truesize when skb->sk is set >>>>>> (which I guess is the case in this particular scenario). I'm not >>>>>> sure what the proper fix would be though... >>>> >>>> Could you please elaborate? >>>> it seems to me skb_realloc_headroom used before my patch called pskb_expand_head() too >>>> and did not adjusted skb->truesize too. Am I missed something perhaps? >>>> >>>> The only difference in my patch is that skb_clone can be not called, >>>> though I do not understand how this can affect skb->truesize. >>> >>> I *believe* that the difference is that after skb_clone() skb->sk is >>> NULL and thus truesize will be adjusted. >>> >>> I will try to confirm that with some more debugging. >> >> Yes indeed. >> >> Before your patch: >> [ 19.154039] ip6_xmit before realloc truesize 4864 sk? 000000002ccd6868 >> [ 19.155230] ip6_xmit after realloc truesize 5376 sk? 0000000000000000 >> >> skb->sk is not set and thus truesize will be adjusted. > > This looks strange for me. skb should not lost sk reference. > > Could you please clarify where exactly you cheked it? > sk on newly allocated skb is set on line 291 > > net/ipv6/ip6_output.c::ip6_xmit() > 282 if (unlikely(skb_headroom(skb) < head_room)) { > 283 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); > 284 if (!skb2) { > 285 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), > 286 IPSTATS_MIB_OUTDISCARDS); > 287 kfree_skb(skb); > 288 return -ENOBUFS; > 289 } > 290 if (skb->sk) > 291 skb_set_owner_w(skb2, skb->sk); <<<<< here > 292 consume_skb(skb); > 293 skb = skb2; > 294 } > >> With your change: >> [ 15.092933] ip6_xmit before realloc truesize 4864 sk? 00000000072930fd >> [ 15.094131] ip6_xmit after realloc truesize 4864 sk? 00000000072930fd >> >> skb->sk is set and thus truesize is not adjusted. > > In this case skb_set_owner_w() is called inside skb_expand_head() > > net/ipv6/ip6_output.c::ip6_xmit() > 265 if (unlikely(head_room > skb_headroom(skb))) { > 266 skb = skb_expand_head(skb, head_room); > 267 if (!skb) { > 268 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); > 269 return -ENOBUFS; > 270 } > 271 } > > net/core/skbuff.c::skb_expand_head() > 1813 if (skb_shared(skb)) { > 1814 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); > 1815 > 1816 if (likely(nskb)) { > 1817 if (skb->sk) > 1818 skb_set_owner_w(nskb, skb->sk); <<<< here > 1819 consume_skb(skb); > 1820 } else { > 1821 kfree_skb(skb); > 1822 } > 1823 skb = nskb; > 1824 } > > So I do not understand how this can happen. > With my patch: > a) if skb is not shared -- it should keep original skb->sk > b) if skb is shared -- new skb should set sk if it was set on original skb. > > Your results can be explained if you looked and skb->sk and truesize right after skb_realloc_headroom() call > but before following skb_set_owner_w(). Could you please check it? It seems I've found the reason: before my change pskb_expand_head() is called for newly cloned skb where sk was not set. after my change skb->sk is set before following pskb_expand_head() call On own turn pskb_expand_head() adjust truesize: net/core/skbuff.c::pskb_expand_head() 1751 /* It is not generally safe to change skb->truesize. 1752 * For the moment, we really care of rx path, or 1753 * when skb is orphaned (not attached to a socket). 1754 */ 1755 if (!skb->sk || skb->destructor == sock_edemux) 1756 skb->truesize += size - osize; 1757 1758 return 0; Could you please confirm it? Thank you, Vasily Averin
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7d2ec25..f91d13a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -249,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = dst->dev; + struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; @@ -256,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, int hlimit = -1; u32 mtu; - head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); + head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; - if (unlikely(skb_headroom(skb) < head_room)) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (!skb2) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); + if (unlikely(head_room > skb_headroom(skb))) { + skb = skb_expand_head(skb, head_room); + if (!skb) { + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); return -ENOBUFS; } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } if (opt) { @@ -313,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { - IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUT, skb->len); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -327,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * we promote our socket to non const */ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, (struct sock *)sk, skb, NULL, dst->dev, + net, (struct sock *)sk, skb, NULL, dev, dst_output); } - skb->dev = dst->dev; + skb->dev = dev; /* ipv6_local_error() does not require socket lock, * we promote our socket to non const */ ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; }
Unlike skb_realloc_headroom, new helper skb_expand_head does not allocate a new skb if possible. Additionally this patch replaces commonly used dereferencing with variables. Signed-off-by: Vasily Averin <vvs@virtuozzo.com> --- net/ipv6/ip6_output.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-)