Message ID | 20230629203005.2137107-3-eric@garver.life (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: openvswitch: add drop action | expand |
Hi Eric, kernel test robot noticed the following build errors: [auto build test ERROR on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Eric-Garver/net-openvswitch-add-drop-reasons/20230630-043307 base: net-next/main patch link: https://lore.kernel.org/r/20230629203005.2137107-3-eric%40garver.life patch subject: [PATCH net-next 2/2] net: openvswitch: add drop action config: mips-randconfig-r036-20230629 (https://download.01.org/0day-ci/archive/20230630/202306300645.towwST4X-lkp@intel.com/config) compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1) reproduce: (https://download.01.org/0day-ci/archive/20230630/202306300645.towwST4X-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202306300645.towwST4X-lkp@intel.com/ All errors (new ones prefixed by >>): >> net/openvswitch/actions.c:1483:4: error: expected expression u32 reason = nla_get_u32(a); ^ >> net/openvswitch/actions.c:1485:4: error: use of undeclared identifier 'reason' reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << ^ net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason' if (reason == OVS_XLATE_OK) ^ net/openvswitch/actions.c:1491:26: error: use of undeclared identifier 'reason' kfree_skb_reason(skb, reason); ^ 4 errors generated. vim +1483 net/openvswitch/actions.c 1285 1286 /* Execute a list of actions against 'skb'. */ 1287 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, 1288 struct sw_flow_key *key, 1289 const struct nlattr *attr, int len) 1290 { 1291 const struct nlattr *a; 1292 int rem; 1293 1294 for (a = attr, rem = len; rem > 0; 1295 a = nla_next(a, &rem)) { 1296 int err = 0; 1297 1298 if (trace_ovs_do_execute_action_enabled()) 1299 trace_ovs_do_execute_action(dp, skb, key, a, rem); 1300 1301 switch (nla_type(a)) { 1302 case OVS_ACTION_ATTR_OUTPUT: { 1303 int port = nla_get_u32(a); 1304 struct sk_buff *clone; 1305 1306 /* Every output action needs a separate clone 1307 * of 'skb', In case the output action is the 1308 * last action, cloning can be avoided. 1309 */ 1310 if (nla_is_last(a, rem)) { 1311 do_output(dp, skb, port, key); 1312 /* 'skb' has been used for output. 1313 */ 1314 return 0; 1315 } 1316 1317 clone = skb_clone(skb, GFP_ATOMIC); 1318 if (clone) 1319 do_output(dp, clone, port, key); 1320 OVS_CB(skb)->cutlen = 0; 1321 break; 1322 } 1323 1324 case OVS_ACTION_ATTR_TRUNC: { 1325 struct ovs_action_trunc *trunc = nla_data(a); 1326 1327 if (skb->len > trunc->max_len) 1328 OVS_CB(skb)->cutlen = skb->len - trunc->max_len; 1329 break; 1330 } 1331 1332 case OVS_ACTION_ATTR_USERSPACE: 1333 output_userspace(dp, skb, key, a, attr, 1334 len, OVS_CB(skb)->cutlen); 1335 OVS_CB(skb)->cutlen = 0; 1336 break; 1337 1338 case OVS_ACTION_ATTR_HASH: 1339 execute_hash(skb, key, a); 1340 break; 1341 1342 case OVS_ACTION_ATTR_PUSH_MPLS: { 1343 struct ovs_action_push_mpls *mpls = nla_data(a); 1344 1345 err = push_mpls(skb, key, mpls->mpls_lse, 1346 mpls->mpls_ethertype, skb->mac_len); 1347 break; 1348 } 1349 case OVS_ACTION_ATTR_ADD_MPLS: { 1350 struct ovs_action_add_mpls *mpls = nla_data(a); 1351 __u16 mac_len = 0; 1352 1353 if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK) 1354 mac_len = skb->mac_len; 1355 1356 err = push_mpls(skb, key, mpls->mpls_lse, 1357 mpls->mpls_ethertype, mac_len); 1358 break; 1359 } 1360 case OVS_ACTION_ATTR_POP_MPLS: 1361 err = pop_mpls(skb, key, nla_get_be16(a)); 1362 break; 1363 1364 case OVS_ACTION_ATTR_PUSH_VLAN: 1365 err = push_vlan(skb, key, nla_data(a)); 1366 break; 1367 1368 case OVS_ACTION_ATTR_POP_VLAN: 1369 err = pop_vlan(skb, key); 1370 break; 1371 1372 case OVS_ACTION_ATTR_RECIRC: { 1373 bool last = nla_is_last(a, rem); 1374 1375 err = execute_recirc(dp, skb, key, a, last); 1376 if (last) { 1377 /* If this is the last action, the skb has 1378 * been consumed or freed. 1379 * Return immediately. 1380 */ 1381 return err; 1382 } 1383 break; 1384 } 1385 1386 case OVS_ACTION_ATTR_SET: 1387 err = execute_set_action(skb, key, nla_data(a)); 1388 break; 1389 1390 case OVS_ACTION_ATTR_SET_MASKED: 1391 case OVS_ACTION_ATTR_SET_TO_MASKED: 1392 err = execute_masked_set_action(skb, key, nla_data(a)); 1393 break; 1394 1395 case OVS_ACTION_ATTR_SAMPLE: { 1396 bool last = nla_is_last(a, rem); 1397 1398 err = sample(dp, skb, key, a, last); 1399 if (last) 1400 return err; 1401 1402 break; 1403 } 1404 1405 case OVS_ACTION_ATTR_CT: 1406 if (!is_flow_key_valid(key)) { 1407 err = ovs_flow_key_update(skb, key); 1408 if (err) 1409 return err; 1410 } 1411 1412 err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key, 1413 nla_data(a)); 1414 1415 /* Hide stolen IP fragments from user space. */ 1416 if (err) 1417 return err == -EINPROGRESS ? 0 : err; 1418 break; 1419 1420 case OVS_ACTION_ATTR_CT_CLEAR: 1421 err = ovs_ct_clear(skb, key); 1422 break; 1423 1424 case OVS_ACTION_ATTR_PUSH_ETH: 1425 err = push_eth(skb, key, nla_data(a)); 1426 break; 1427 1428 case OVS_ACTION_ATTR_POP_ETH: 1429 err = pop_eth(skb, key); 1430 break; 1431 1432 case OVS_ACTION_ATTR_PUSH_NSH: { 1433 u8 buffer[NSH_HDR_MAX_LEN]; 1434 struct nshhdr *nh = (struct nshhdr *)buffer; 1435 1436 err = nsh_hdr_from_nlattr(nla_data(a), nh, 1437 NSH_HDR_MAX_LEN); 1438 if (unlikely(err)) 1439 break; 1440 err = push_nsh(skb, key, nh); 1441 break; 1442 } 1443 1444 case OVS_ACTION_ATTR_POP_NSH: 1445 err = pop_nsh(skb, key); 1446 break; 1447 1448 case OVS_ACTION_ATTR_METER: 1449 if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) { 1450 consume_skb(skb); 1451 return 0; 1452 } 1453 break; 1454 1455 case OVS_ACTION_ATTR_CLONE: { 1456 bool last = nla_is_last(a, rem); 1457 1458 err = clone(dp, skb, key, a, last); 1459 if (last) 1460 return err; 1461 1462 break; 1463 } 1464 1465 case OVS_ACTION_ATTR_CHECK_PKT_LEN: { 1466 bool last = nla_is_last(a, rem); 1467 1468 err = execute_check_pkt_len(dp, skb, key, a, last); 1469 if (last) 1470 return err; 1471 1472 break; 1473 } 1474 1475 case OVS_ACTION_ATTR_DEC_TTL: 1476 err = execute_dec_ttl(skb, key); 1477 if (err == -EHOSTUNREACH) 1478 return dec_ttl_exception_handler(dp, skb, 1479 key, a); 1480 break; 1481 1482 case OVS_ACTION_ATTR_DROP: > 1483 u32 reason = nla_get_u32(a); 1484 > 1485 reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << 1486 SKB_DROP_REASON_SUBSYS_SHIFT; 1487 1488 if (reason == OVS_XLATE_OK) 1489 break; 1490 1491 kfree_skb_reason(skb, reason); 1492 return 0; 1493 } 1494 1495 if (unlikely(err)) { 1496 kfree_skb(skb); 1497 return err; 1498 } 1499 } 1500 1501 consume_skb(skb); 1502 return 0; 1503 } 1504
Hi Eric, kernel test robot noticed the following build errors: [auto build test ERROR on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/Eric-Garver/net-openvswitch-add-drop-reasons/20230630-043307 base: net-next/main patch link: https://lore.kernel.org/r/20230629203005.2137107-3-eric%40garver.life patch subject: [PATCH net-next 2/2] net: openvswitch: add drop action config: hexagon-randconfig-r045-20230629 (https://download.01.org/0day-ci/archive/20230630/202306300609.tdRdZscy-lkp@intel.com/config) compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a) reproduce: (https://download.01.org/0day-ci/archive/20230630/202306300609.tdRdZscy-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202306300609.tdRdZscy-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from net/openvswitch/actions.c:8: In file included from include/linux/skbuff.h:17: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 547 | val = __raw_readb(PCI_IOBASE + addr); | ~~~~~~~~~~ ^ include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 560 | val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr)); | ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu' 37 | #define __le16_to_cpu(x) ((__force __u16)(__le16)(x)) | ^ In file included from net/openvswitch/actions.c:8: In file included from include/linux/skbuff.h:17: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 573 | val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr)); | ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu' 35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x)) | ^ In file included from net/openvswitch/actions.c:8: In file included from include/linux/skbuff.h:17: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 584 | __raw_writeb(value, PCI_IOBASE + addr); | ~~~~~~~~~~ ^ include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 594 | __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); | ~~~~~~~~~~ ^ include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] 604 | __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); | ~~~~~~~~~~ ^ >> net/openvswitch/actions.c:1483:4: error: expected expression 1483 | u32 reason = nla_get_u32(a); | ^ >> net/openvswitch/actions.c:1485:4: error: use of undeclared identifier 'reason' 1485 | reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << | ^ net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason' 1488 | if (reason == OVS_XLATE_OK) | ^ net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason' net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason' net/openvswitch/actions.c:1491:26: error: use of undeclared identifier 'reason' 1491 | kfree_skb_reason(skb, reason); | ^ 6 warnings and 6 errors generated. vim +1483 net/openvswitch/actions.c 1285 1286 /* Execute a list of actions against 'skb'. */ 1287 static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, 1288 struct sw_flow_key *key, 1289 const struct nlattr *attr, int len) 1290 { 1291 const struct nlattr *a; 1292 int rem; 1293 1294 for (a = attr, rem = len; rem > 0; 1295 a = nla_next(a, &rem)) { 1296 int err = 0; 1297 1298 if (trace_ovs_do_execute_action_enabled()) 1299 trace_ovs_do_execute_action(dp, skb, key, a, rem); 1300 1301 switch (nla_type(a)) { 1302 case OVS_ACTION_ATTR_OUTPUT: { 1303 int port = nla_get_u32(a); 1304 struct sk_buff *clone; 1305 1306 /* Every output action needs a separate clone 1307 * of 'skb', In case the output action is the 1308 * last action, cloning can be avoided. 1309 */ 1310 if (nla_is_last(a, rem)) { 1311 do_output(dp, skb, port, key); 1312 /* 'skb' has been used for output. 1313 */ 1314 return 0; 1315 } 1316 1317 clone = skb_clone(skb, GFP_ATOMIC); 1318 if (clone) 1319 do_output(dp, clone, port, key); 1320 OVS_CB(skb)->cutlen = 0; 1321 break; 1322 } 1323 1324 case OVS_ACTION_ATTR_TRUNC: { 1325 struct ovs_action_trunc *trunc = nla_data(a); 1326 1327 if (skb->len > trunc->max_len) 1328 OVS_CB(skb)->cutlen = skb->len - trunc->max_len; 1329 break; 1330 } 1331 1332 case OVS_ACTION_ATTR_USERSPACE: 1333 output_userspace(dp, skb, key, a, attr, 1334 len, OVS_CB(skb)->cutlen); 1335 OVS_CB(skb)->cutlen = 0; 1336 break; 1337 1338 case OVS_ACTION_ATTR_HASH: 1339 execute_hash(skb, key, a); 1340 break; 1341 1342 case OVS_ACTION_ATTR_PUSH_MPLS: { 1343 struct ovs_action_push_mpls *mpls = nla_data(a); 1344 1345 err = push_mpls(skb, key, mpls->mpls_lse, 1346 mpls->mpls_ethertype, skb->mac_len); 1347 break; 1348 } 1349 case OVS_ACTION_ATTR_ADD_MPLS: { 1350 struct ovs_action_add_mpls *mpls = nla_data(a); 1351 __u16 mac_len = 0; 1352 1353 if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK) 1354 mac_len = skb->mac_len; 1355 1356 err = push_mpls(skb, key, mpls->mpls_lse, 1357 mpls->mpls_ethertype, mac_len); 1358 break; 1359 } 1360 case OVS_ACTION_ATTR_POP_MPLS: 1361 err = pop_mpls(skb, key, nla_get_be16(a)); 1362 break; 1363 1364 case OVS_ACTION_ATTR_PUSH_VLAN: 1365 err = push_vlan(skb, key, nla_data(a)); 1366 break; 1367 1368 case OVS_ACTION_ATTR_POP_VLAN: 1369 err = pop_vlan(skb, key); 1370 break; 1371 1372 case OVS_ACTION_ATTR_RECIRC: { 1373 bool last = nla_is_last(a, rem); 1374 1375 err = execute_recirc(dp, skb, key, a, last); 1376 if (last) { 1377 /* If this is the last action, the skb has 1378 * been consumed or freed. 1379 * Return immediately. 1380 */ 1381 return err; 1382 } 1383 break; 1384 } 1385 1386 case OVS_ACTION_ATTR_SET: 1387 err = execute_set_action(skb, key, nla_data(a)); 1388 break; 1389 1390 case OVS_ACTION_ATTR_SET_MASKED: 1391 case OVS_ACTION_ATTR_SET_TO_MASKED: 1392 err = execute_masked_set_action(skb, key, nla_data(a)); 1393 break; 1394 1395 case OVS_ACTION_ATTR_SAMPLE: { 1396 bool last = nla_is_last(a, rem); 1397 1398 err = sample(dp, skb, key, a, last); 1399 if (last) 1400 return err; 1401 1402 break; 1403 } 1404 1405 case OVS_ACTION_ATTR_CT: 1406 if (!is_flow_key_valid(key)) { 1407 err = ovs_flow_key_update(skb, key); 1408 if (err) 1409 return err; 1410 } 1411 1412 err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key, 1413 nla_data(a)); 1414 1415 /* Hide stolen IP fragments from user space. */ 1416 if (err) 1417 return err == -EINPROGRESS ? 0 : err; 1418 break; 1419 1420 case OVS_ACTION_ATTR_CT_CLEAR: 1421 err = ovs_ct_clear(skb, key); 1422 break; 1423 1424 case OVS_ACTION_ATTR_PUSH_ETH: 1425 err = push_eth(skb, key, nla_data(a)); 1426 break; 1427 1428 case OVS_ACTION_ATTR_POP_ETH: 1429 err = pop_eth(skb, key); 1430 break; 1431 1432 case OVS_ACTION_ATTR_PUSH_NSH: { 1433 u8 buffer[NSH_HDR_MAX_LEN]; 1434 struct nshhdr *nh = (struct nshhdr *)buffer; 1435 1436 err = nsh_hdr_from_nlattr(nla_data(a), nh, 1437 NSH_HDR_MAX_LEN); 1438 if (unlikely(err)) 1439 break; 1440 err = push_nsh(skb, key, nh); 1441 break; 1442 } 1443 1444 case OVS_ACTION_ATTR_POP_NSH: 1445 err = pop_nsh(skb, key); 1446 break; 1447 1448 case OVS_ACTION_ATTR_METER: 1449 if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) { 1450 consume_skb(skb); 1451 return 0; 1452 } 1453 break; 1454 1455 case OVS_ACTION_ATTR_CLONE: { 1456 bool last = nla_is_last(a, rem); 1457 1458 err = clone(dp, skb, key, a, last); 1459 if (last) 1460 return err; 1461 1462 break; 1463 } 1464 1465 case OVS_ACTION_ATTR_CHECK_PKT_LEN: { 1466 bool last = nla_is_last(a, rem); 1467 1468 err = execute_check_pkt_len(dp, skb, key, a, last); 1469 if (last) 1470 return err; 1471 1472 break; 1473 } 1474 1475 case OVS_ACTION_ATTR_DEC_TTL: 1476 err = execute_dec_ttl(skb, key); 1477 if (err == -EHOSTUNREACH) 1478 return dec_ttl_exception_handler(dp, skb, 1479 key, a); 1480 break; 1481 1482 case OVS_ACTION_ATTR_DROP: > 1483 u32 reason = nla_get_u32(a); 1484 > 1485 reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << 1486 SKB_DROP_REASON_SUBSYS_SHIFT; 1487 1488 if (reason == OVS_XLATE_OK) 1489 break; 1490 1491 kfree_skb_reason(skb, reason); 1492 return 0; 1493 } 1494 1495 if (unlikely(err)) { 1496 kfree_skb(skb); 1497 return err; 1498 } 1499 } 1500 1501 consume_skb(skb); 1502 return 0; 1503 } 1504
On Thu, Jun 29, 2023 at 04:30:05PM -0400, Eric Garver wrote: > This adds an explicit drop action. This is used by OVS to drop packets > for which it cannot determine what to do. An explicit action in the > kernel allows passing the reason _why_ the packet is being dropped. We > can then use perf tracing to match on the drop reason. > > e.g. trace all OVS dropped skbs > > # perf trace -e skb:kfree_skb --filter="reason >= 0x30000" > [..] > 106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \ > location:0xffffffffc0d9b462, protocol: 2048, reason: 196610) > > reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP) > > Signed-off-by: Eric Garver <eric@garver.life> ... > --- a/net/openvswitch/actions.c > +++ b/net/openvswitch/actions.c > @@ -32,6 +32,7 @@ > #include "vport.h" > #include "flow_netlink.h" > #include "openvswitch_trace.h" > +#include "drop.h" > > struct deferred_action { > struct sk_buff *skb; > @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, > return dec_ttl_exception_handler(dp, skb, > key, a); > break; > + > + case OVS_ACTION_ATTR_DROP: > + u32 reason = nla_get_u32(a); > + > + reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << > + SKB_DROP_REASON_SUBSYS_SHIFT; > + > + if (reason == OVS_XLATE_OK) > + break; > + > + kfree_skb_reason(skb, reason); > + return 0; > } Hi Eric, thanks for your patches. This is an interesting new feature. unfortunately clang-16 doesn't seem to like this very much. I think that it is due to the declaration of reason not being enclosed in a block - { }. net/openvswitch/actions.c:1483:4: error: expected expression u32 reason = nla_get_u32(a); ^ net/openvswitch/actions.c:1485:4: error: use of undeclared identifier 'reason' reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << ^ net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason' if (reason == OVS_XLATE_OK) ^ net/openvswitch/actions.c:1491:26: error: use of undeclared identifier 'reason' kfree_skb_reason(skb, reason); ^ 4 errors generated. net-next is currently closed. So please provide a v2 once it reposts, after 10th July. See: https://www.kernel.org/doc/html/next/process/maintainer-netdev.html#development-cycle
On Fri, Jun 30, 2023 at 11:47:04AM +0200, Simon Horman wrote: > On Thu, Jun 29, 2023 at 04:30:05PM -0400, Eric Garver wrote: > > This adds an explicit drop action. This is used by OVS to drop packets > > for which it cannot determine what to do. An explicit action in the > > kernel allows passing the reason _why_ the packet is being dropped. We > > can then use perf tracing to match on the drop reason. > > > > e.g. trace all OVS dropped skbs > > > > # perf trace -e skb:kfree_skb --filter="reason >= 0x30000" > > [..] > > 106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \ > > location:0xffffffffc0d9b462, protocol: 2048, reason: 196610) > > > > reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP) > > > > Signed-off-by: Eric Garver <eric@garver.life> > > ... > > > --- a/net/openvswitch/actions.c > > +++ b/net/openvswitch/actions.c > > @@ -32,6 +32,7 @@ > > #include "vport.h" > > #include "flow_netlink.h" > > #include "openvswitch_trace.h" > > +#include "drop.h" > > > > struct deferred_action { > > struct sk_buff *skb; > > @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, > > return dec_ttl_exception_handler(dp, skb, > > key, a); > > break; > > + > > + case OVS_ACTION_ATTR_DROP: > > + u32 reason = nla_get_u32(a); > > + > > + reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << > > + SKB_DROP_REASON_SUBSYS_SHIFT; > > + > > + if (reason == OVS_XLATE_OK) > > + break; > > + > > + kfree_skb_reason(skb, reason); > > + return 0; > > } > > Hi Eric, > > thanks for your patches. This is an interesting new feature. > > unfortunately clang-16 doesn't seem to like this very much. > I think that it is due to the declaration of reason not > being enclosed in a block - { }. > > net/openvswitch/actions.c:1483:4: error: expected expression > u32 reason = nla_get_u32(a); > ^ > net/openvswitch/actions.c:1485:4: error: use of undeclared identifier 'reason' > reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << > ^ > net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason' > if (reason == OVS_XLATE_OK) > ^ > net/openvswitch/actions.c:1491:26: error: use of undeclared identifier 'reason' > kfree_skb_reason(skb, reason); > ^ > 4 errors generated. > > > net-next is currently closed. > So please provide a v2 once it reposts, after 10th July. oof. My bad. I'll fix the clang issue and post v2 in a couple weeks. Thanks. Eric.
On Fri, Jun 30, 2023 at 08:29:58AM -0400, Eric Garver wrote: > On Fri, Jun 30, 2023 at 11:47:04AM +0200, Simon Horman wrote: > > On Thu, Jun 29, 2023 at 04:30:05PM -0400, Eric Garver wrote: > > > This adds an explicit drop action. This is used by OVS to drop packets > > > for which it cannot determine what to do. An explicit action in the > > > kernel allows passing the reason _why_ the packet is being dropped. We > > > can then use perf tracing to match on the drop reason. > > > > > > e.g. trace all OVS dropped skbs > > > > > > # perf trace -e skb:kfree_skb --filter="reason >= 0x30000" > > > [..] > > > 106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \ > > > location:0xffffffffc0d9b462, protocol: 2048, reason: 196610) > > > > > > reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP) > > > > > > Signed-off-by: Eric Garver <eric@garver.life> > > > > ... > > > > > --- a/net/openvswitch/actions.c > > > +++ b/net/openvswitch/actions.c > > > @@ -32,6 +32,7 @@ > > > #include "vport.h" > > > #include "flow_netlink.h" > > > #include "openvswitch_trace.h" > > > +#include "drop.h" > > > > > > struct deferred_action { > > > struct sk_buff *skb; > > > @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, > > > return dec_ttl_exception_handler(dp, skb, > > > key, a); > > > break; > > > + > > > + case OVS_ACTION_ATTR_DROP: > > > + u32 reason = nla_get_u32(a); > > > + > > > + reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << > > > + SKB_DROP_REASON_SUBSYS_SHIFT; > > > + > > > + if (reason == OVS_XLATE_OK) > > > + break; > > > + > > > + kfree_skb_reason(skb, reason); > > > + return 0; > > > } > > > > Hi Eric, > > > > thanks for your patches. This is an interesting new feature. > > > > unfortunately clang-16 doesn't seem to like this very much. > > I think that it is due to the declaration of reason not > > being enclosed in a block - { }. > > > > net/openvswitch/actions.c:1483:4: error: expected expression > > u32 reason = nla_get_u32(a); > > ^ > > net/openvswitch/actions.c:1485:4: error: use of undeclared identifier 'reason' > > reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << > > ^ > > net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason' > > if (reason == OVS_XLATE_OK) > > ^ > > net/openvswitch/actions.c:1491:26: error: use of undeclared identifier 'reason' > > kfree_skb_reason(skb, reason); > > ^ > > 4 errors generated. > > > > > > net-next is currently closed. > > So please provide a v2 once it reposts, after 10th July. > > oof. My bad. I'll fix the clang issue and post v2 in a couple weeks. Thanks Eric, much appreciated.
Eric Garver <eric@garver.life> writes: > This adds an explicit drop action. This is used by OVS to drop packets > for which it cannot determine what to do. An explicit action in the > kernel allows passing the reason _why_ the packet is being dropped. We > can then use perf tracing to match on the drop reason. > > e.g. trace all OVS dropped skbs > > # perf trace -e skb:kfree_skb --filter="reason >= 0x30000" > [..] > 106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \ > location:0xffffffffc0d9b462, protocol: 2048, reason: 196610) > > reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP) > > Signed-off-by: Eric Garver <eric@garver.life> > --- > include/uapi/linux/openvswitch.h | 2 ++ > net/openvswitch/actions.c | 13 +++++++++++++ > net/openvswitch/flow_netlink.c | 12 +++++++++++- > .../testing/selftests/net/openvswitch/ovs-dpctl.py | 3 +++ > 4 files changed, 29 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h > index e94870e77ee9..a967dbca3574 100644 > --- a/include/uapi/linux/openvswitch.h > +++ b/include/uapi/linux/openvswitch.h > @@ -965,6 +965,7 @@ struct check_pkt_len_arg { > * start of the packet or at the start of the l3 header depending on the value > * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS > * argument. > + * @OVS_ACTION_ATTR_DROP: Explicit drop action. > * > * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all > * fields within a header are modifiable, e.g. the IPv4 protocol and fragment > @@ -1002,6 +1003,7 @@ enum ovs_action_attr { > OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ > OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ > OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ > + OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */ > > __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted > * from userspace. */ > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c > index cab1e02b63e0..4ad9a45dc042 100644 > --- a/net/openvswitch/actions.c > +++ b/net/openvswitch/actions.c > @@ -32,6 +32,7 @@ > #include "vport.h" > #include "flow_netlink.h" > #include "openvswitch_trace.h" > +#include "drop.h" > > struct deferred_action { > struct sk_buff *skb; > @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, > return dec_ttl_exception_handler(dp, skb, > key, a); > break; > + > + case OVS_ACTION_ATTR_DROP: > + u32 reason = nla_get_u32(a); > + > + reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << > + SKB_DROP_REASON_SUBSYS_SHIFT; > + > + if (reason == OVS_XLATE_OK) > + break; > + > + kfree_skb_reason(skb, reason); > + return 0; > } > > if (unlikely(err)) { > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c > index 41116361433d..23d39eae9a0d 100644 > --- a/net/openvswitch/flow_netlink.c > +++ b/net/openvswitch/flow_netlink.c > @@ -39,6 +39,7 @@ > #include <net/erspan.h> > > #include "flow_netlink.h" > +#include "drop.h" > > struct ovs_len_tbl { > int len; > @@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) > case OVS_ACTION_ATTR_RECIRC: > case OVS_ACTION_ATTR_TRUNC: > case OVS_ACTION_ATTR_USERSPACE: > + case OVS_ACTION_ATTR_DROP: > break; > > case OVS_ACTION_ATTR_CT: > @@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) > /* Whenever new actions are added, the need to update this > * function should be considered. > */ > - BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23); > + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); > > if (!actions) > return; > @@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, > [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), > [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, > + [OVS_ACTION_ATTR_DROP] = sizeof(u32), > }; > const struct ovs_action_push_vlan *vlan; > int type = nla_type(a); > @@ -3453,6 +3456,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > skip_copy = true; > break; > > + case OVS_ACTION_ATTR_DROP: > + if (nla_get_u32(a) >= > + u32_get_bits(OVS_XLATE_MAX, > + ~SKB_DROP_REASON_SUBSYS_MASK)) > + return -EINVAL; > + break; > + If there's a case where the userspace sends a drop reason that isn't known to the kernel, we will reject the flow, and the only "close" drop will be OVS_XLATE_OK, which would be wrong. Is there a reason to do this? For example, userspace might get new support for some kind of flows and during that time might have a new xlate drop reason. Maybe we can have a reason code that OVS knows will exist, so that if this fails, it can at least fall back to that? > default: > OVS_NLERR(log, "Unknown Action type %d", type); > return -EINVAL; > diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py > index 1c8b36bc15d4..526ebad7d514 100644 > --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py > +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py > @@ -115,6 +115,7 @@ class ovsactions(nla): > ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"), > ("OVS_ACTION_ATTR_ADD_MPLS", "none"), > ("OVS_ACTION_ATTR_DEC_TTL", "none"), > + ("OVS_ACTION_ATTR_DROP", "uint32"), > ) > > class ctact(nla): > @@ -261,6 +262,8 @@ class ovsactions(nla): > print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) > elif field[0] == "OVS_ACTION_ATTR_TRUNC": > print_str += "trunc(%d)" % int(self.get_attr(field[0])) > + elif field[0] == "OVS_ACTION_ATTR_DROP": > + print_str += "drop" Can we also include the reason here? > elif field[1] == "flag": > if field[0] == "OVS_ACTION_ATTR_CT_CLEAR": > print_str += "ct_clear"
On Thu, Jul 06, 2023 at 08:54:16AM -0400, Aaron Conole wrote: > Eric Garver <eric@garver.life> writes: > > > This adds an explicit drop action. This is used by OVS to drop packets > > for which it cannot determine what to do. An explicit action in the > > kernel allows passing the reason _why_ the packet is being dropped. We > > can then use perf tracing to match on the drop reason. > > > > e.g. trace all OVS dropped skbs > > > > # perf trace -e skb:kfree_skb --filter="reason >= 0x30000" > > [..] > > 106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \ > > location:0xffffffffc0d9b462, protocol: 2048, reason: 196610) > > > > reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP) > > > > Signed-off-by: Eric Garver <eric@garver.life> > > --- > > include/uapi/linux/openvswitch.h | 2 ++ > > net/openvswitch/actions.c | 13 +++++++++++++ > > net/openvswitch/flow_netlink.c | 12 +++++++++++- > > .../testing/selftests/net/openvswitch/ovs-dpctl.py | 3 +++ > > 4 files changed, 29 insertions(+), 1 deletion(-) > > > > diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h > > index e94870e77ee9..a967dbca3574 100644 > > --- a/include/uapi/linux/openvswitch.h > > +++ b/include/uapi/linux/openvswitch.h > > @@ -965,6 +965,7 @@ struct check_pkt_len_arg { > > * start of the packet or at the start of the l3 header depending on the value > > * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS > > * argument. > > + * @OVS_ACTION_ATTR_DROP: Explicit drop action. > > * > > * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all > > * fields within a header are modifiable, e.g. the IPv4 protocol and fragment > > @@ -1002,6 +1003,7 @@ enum ovs_action_attr { > > OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ > > OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ > > OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ > > + OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */ > > > > __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted > > * from userspace. */ > > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c > > index cab1e02b63e0..4ad9a45dc042 100644 > > --- a/net/openvswitch/actions.c > > +++ b/net/openvswitch/actions.c > > @@ -32,6 +32,7 @@ > > #include "vport.h" > > #include "flow_netlink.h" > > #include "openvswitch_trace.h" > > +#include "drop.h" > > > > struct deferred_action { > > struct sk_buff *skb; > > @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, > > return dec_ttl_exception_handler(dp, skb, > > key, a); > > break; > > + > > + case OVS_ACTION_ATTR_DROP: > > + u32 reason = nla_get_u32(a); > > + > > + reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << > > + SKB_DROP_REASON_SUBSYS_SHIFT; > > + > > + if (reason == OVS_XLATE_OK) > > + break; > > + > > + kfree_skb_reason(skb, reason); > > + return 0; > > } > > > > if (unlikely(err)) { > > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c > > index 41116361433d..23d39eae9a0d 100644 > > --- a/net/openvswitch/flow_netlink.c > > +++ b/net/openvswitch/flow_netlink.c > > @@ -39,6 +39,7 @@ > > #include <net/erspan.h> > > > > #include "flow_netlink.h" > > +#include "drop.h" > > > > struct ovs_len_tbl { > > int len; > > @@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) > > case OVS_ACTION_ATTR_RECIRC: > > case OVS_ACTION_ATTR_TRUNC: > > case OVS_ACTION_ATTR_USERSPACE: > > + case OVS_ACTION_ATTR_DROP: > > break; > > > > case OVS_ACTION_ATTR_CT: > > @@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) > > /* Whenever new actions are added, the need to update this > > * function should be considered. > > */ > > - BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23); > > + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); > > > > if (!actions) > > return; > > @@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, > > [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), > > [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, > > + [OVS_ACTION_ATTR_DROP] = sizeof(u32), > > }; > > const struct ovs_action_push_vlan *vlan; > > int type = nla_type(a); > > @@ -3453,6 +3456,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, > > skip_copy = true; > > break; > > > > + case OVS_ACTION_ATTR_DROP: > > + if (nla_get_u32(a) >= > > + u32_get_bits(OVS_XLATE_MAX, > > + ~SKB_DROP_REASON_SUBSYS_MASK)) > > + return -EINVAL; > > + break; > > + > > If there's a case where the userspace sends a drop reason that isn't > known to the kernel, we will reject the flow, and the only "close" drop > will be OVS_XLATE_OK, which would be wrong. Is there a reason to do > this? For example, userspace might get new support for some kind of > flows and during that time might have a new xlate drop reason. Maybe we > can have a reason code that OVS knows will exist, so that if this fails, > it can at least fall back to that? You're correct. It will reject the flow. Maybe we clamp the value to OVS_XLATE_MAX if it's unknown. That makes the skb drop reason less helpful, but no less helpful than today ;). At least we won't reject the flow. We could alias OVS_XLATE_MAX to OVS_XLATE_UNKNOWN. I prefer an explicit value for OVS_XLATE_UNKNOWN, e.g. (u16)-1. > > default: > > OVS_NLERR(log, "Unknown Action type %d", type); > > return -EINVAL; > > diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py > > index 1c8b36bc15d4..526ebad7d514 100644 > > --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py > > +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py > > @@ -115,6 +115,7 @@ class ovsactions(nla): > > ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"), > > ("OVS_ACTION_ATTR_ADD_MPLS", "none"), > > ("OVS_ACTION_ATTR_DEC_TTL", "none"), > > + ("OVS_ACTION_ATTR_DROP", "uint32"), > > ) > > > > class ctact(nla): > > @@ -261,6 +262,8 @@ class ovsactions(nla): > > print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) > > elif field[0] == "OVS_ACTION_ATTR_TRUNC": > > print_str += "trunc(%d)" % int(self.get_attr(field[0])) > > + elif field[0] == "OVS_ACTION_ATTR_DROP": > > + print_str += "drop" > > Can we also include the reason here? It could. This mimics dpctl output, which does currently not include the reason. So I went with parity. I have proposed adding the reason to all dpctl "drop" output, but was planning on that being a follow up to this work. > > elif field[1] == "flag": > > if field[0] == "OVS_ACTION_ATTR_CT_CLEAR": > > print_str += "ct_clear" > >
On 7/6/23 15:57, Eric Garver wrote: > On Thu, Jul 06, 2023 at 08:54:16AM -0400, Aaron Conole wrote: >> Eric Garver <eric@garver.life> writes: >> >>> This adds an explicit drop action. This is used by OVS to drop packets >>> for which it cannot determine what to do. An explicit action in the >>> kernel allows passing the reason _why_ the packet is being dropped. We >>> can then use perf tracing to match on the drop reason. >>> >>> e.g. trace all OVS dropped skbs >>> >>> # perf trace -e skb:kfree_skb --filter="reason >= 0x30000" >>> [..] >>> 106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \ >>> location:0xffffffffc0d9b462, protocol: 2048, reason: 196610) >>> >>> reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP) >>> >>> Signed-off-by: Eric Garver <eric@garver.life> >>> --- >>> include/uapi/linux/openvswitch.h | 2 ++ >>> net/openvswitch/actions.c | 13 +++++++++++++ >>> net/openvswitch/flow_netlink.c | 12 +++++++++++- >>> .../testing/selftests/net/openvswitch/ovs-dpctl.py | 3 +++ >>> 4 files changed, 29 insertions(+), 1 deletion(-) >>> >>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h >>> index e94870e77ee9..a967dbca3574 100644 >>> --- a/include/uapi/linux/openvswitch.h >>> +++ b/include/uapi/linux/openvswitch.h >>> @@ -965,6 +965,7 @@ struct check_pkt_len_arg { >>> * start of the packet or at the start of the l3 header depending on the value >>> * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS >>> * argument. >>> + * @OVS_ACTION_ATTR_DROP: Explicit drop action. >>> * >>> * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all >>> * fields within a header are modifiable, e.g. the IPv4 protocol and fragment >>> @@ -1002,6 +1003,7 @@ enum ovs_action_attr { >>> OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ >>> OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ >>> OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ >>> + OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */ >>> >>> __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted >>> * from userspace. */ >>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c >>> index cab1e02b63e0..4ad9a45dc042 100644 >>> --- a/net/openvswitch/actions.c >>> +++ b/net/openvswitch/actions.c >>> @@ -32,6 +32,7 @@ >>> #include "vport.h" >>> #include "flow_netlink.h" >>> #include "openvswitch_trace.h" >>> +#include "drop.h" >>> >>> struct deferred_action { >>> struct sk_buff *skb; >>> @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, >>> return dec_ttl_exception_handler(dp, skb, >>> key, a); >>> break; >>> + >>> + case OVS_ACTION_ATTR_DROP: >>> + u32 reason = nla_get_u32(a); >>> + >>> + reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << >>> + SKB_DROP_REASON_SUBSYS_SHIFT; >>> + >>> + if (reason == OVS_XLATE_OK) >>> + break; >>> + >>> + kfree_skb_reason(skb, reason); >>> + return 0; >>> } >>> >>> if (unlikely(err)) { >>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c >>> index 41116361433d..23d39eae9a0d 100644 >>> --- a/net/openvswitch/flow_netlink.c >>> +++ b/net/openvswitch/flow_netlink.c >>> @@ -39,6 +39,7 @@ >>> #include <net/erspan.h> >>> >>> #include "flow_netlink.h" >>> +#include "drop.h" >>> >>> struct ovs_len_tbl { >>> int len; >>> @@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) >>> case OVS_ACTION_ATTR_RECIRC: >>> case OVS_ACTION_ATTR_TRUNC: >>> case OVS_ACTION_ATTR_USERSPACE: >>> + case OVS_ACTION_ATTR_DROP: >>> break; >>> >>> case OVS_ACTION_ATTR_CT: >>> @@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) >>> /* Whenever new actions are added, the need to update this >>> * function should be considered. >>> */ >>> - BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23); >>> + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); >>> >>> if (!actions) >>> return; >>> @@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, >>> [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, >>> [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), >>> [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, >>> + [OVS_ACTION_ATTR_DROP] = sizeof(u32), >>> }; >>> const struct ovs_action_push_vlan *vlan; >>> int type = nla_type(a); >>> @@ -3453,6 +3456,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, >>> skip_copy = true; >>> break; >>> >>> + case OVS_ACTION_ATTR_DROP: >>> + if (nla_get_u32(a) >= >>> + u32_get_bits(OVS_XLATE_MAX, >>> + ~SKB_DROP_REASON_SUBSYS_MASK)) >>> + return -EINVAL; >>> + break; >>> + >> >> If there's a case where the userspace sends a drop reason that isn't >> known to the kernel, we will reject the flow, and the only "close" drop >> will be OVS_XLATE_OK, which would be wrong. Is there a reason to do >> this? For example, userspace might get new support for some kind of >> flows and during that time might have a new xlate drop reason. Maybe we >> can have a reason code that OVS knows will exist, so that if this fails, >> it can at least fall back to that? > > You're correct. It will reject the flow. > > Maybe we clamp the value to OVS_XLATE_MAX if it's unknown. That makes > the skb drop reason less helpful, but no less helpful than today ;). At > least we won't reject the flow. > > We could alias OVS_XLATE_MAX to OVS_XLATE_UNKNOWN. I prefer an explicit > value for OVS_XLATE_UNKNOWN, e.g. (u16)-1. A wild idea: How about we do not define actual reasons? i.e. define a subsystem and just call kfree_skb_reason(skb, SUBSYSTEM | value), where 'value' is whatever userspace gives as long as it is within a subsystem range? The point is: drop reasons are not part of the uAPI, but by defining drop reasons for openvswitch we're making this subset of drop reasons part of the uAPI. And that seems a bit shady. Users can't really rely on actual values of drop reasons anyway, because the subsystem offset will not be part of the uAPI. And it doesn't matter if they need to get them from the kernel binary or from the userspace OVS binary. So, it might be cleaner to not define them in the first place. Thoughts? CC: kernel maintainers Best regards, Ilya Maximets.
On Fri, 7 Jul 2023 12:30:38 +0200 Ilya Maximets wrote: > A wild idea: How about we do not define actual reasons? i.e. define a > subsystem and just call kfree_skb_reason(skb, SUBSYSTEM | value), where > 'value' is whatever userspace gives as long as it is within a subsystem > range? That already exists, right? Johannes added it in the last release for WiFi.
On 7/7/23 17:00, Jakub Kicinski wrote: > On Fri, 7 Jul 2023 12:30:38 +0200 Ilya Maximets wrote: >> A wild idea: How about we do not define actual reasons? i.e. define a >> subsystem and just call kfree_skb_reason(skb, SUBSYSTEM | value), where >> 'value' is whatever userspace gives as long as it is within a subsystem >> range? > > That already exists, right? Johannes added it in the last release for WiFi. I'm not sure. The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly to that on a surface. However, looking closer, any value that can be passed into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is kind of defined in net/mac80211/drop.h, unless I'm missing something (very possible, because I don't really know wifi code). The difference, I guess, is that for openvswitch values will be provided by the userpsace application via netlink interface. It'll be just a number not defined anywhere in the kernel. Only the subsystem itself will be defined in order to occupy the range. Garbage in, same garbage out, from the kernel's perspective. Best regards, Ilya Maximets.
On 7/7/23 17:29, Ilya Maximets wrote: > On 7/7/23 17:00, Jakub Kicinski wrote: >> On Fri, 7 Jul 2023 12:30:38 +0200 Ilya Maximets wrote: >>> A wild idea: How about we do not define actual reasons? i.e. define a >>> subsystem and just call kfree_skb_reason(skb, SUBSYSTEM | value), where >>> 'value' is whatever userspace gives as long as it is within a subsystem >>> range? >> >> That already exists, right? Johannes added it in the last release for WiFi. > > I'm not sure. The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly > to that on a surface. However, looking closer, any value that can be passed > into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is > kind of defined in net/mac80211/drop.h, unless I'm missing something (very > possible, because I don't really know wifi code). > > The difference, I guess, is that for openvswitch values will be provided by > the userpsace application via netlink interface. It'll be just a number not > defined anywhere in the kernel. Only the subsystem itself will be defined > in order to occupy the range. Garbage in, same garbage out, from the kernel's > perspective. To be clear, I think, not defining them in this particular case is better. Definition of every reason that userspace can come up with will add extra uAPI maintenance cost/issues with no practical benefits. Values are not going to be used for anything outside reporting a drop reason and subsystem offset is not part of uAPI anyway. > > Best regards, Ilya Maximets.
On Fri, 7 Jul 2023 18:04:36 +0200 Ilya Maximets wrote: > >> That already exists, right? Johannes added it in the last release for WiFi. > > > > I'm not sure. The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly > > to that on a surface. However, looking closer, any value that can be passed > > into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is > > kind of defined in net/mac80211/drop.h, unless I'm missing something (very > > possible, because I don't really know wifi code). > > > > The difference, I guess, is that for openvswitch values will be provided by > > the userpsace application via netlink interface. It'll be just a number not > > defined anywhere in the kernel. Only the subsystem itself will be defined > > in order to occupy the range. Garbage in, same garbage out, from the kernel's > > perspective. > > To be clear, I think, not defining them in this particular case is better. > Definition of every reason that userspace can come up with will add extra > uAPI maintenance cost/issues with no practical benefits. Values are not > going to be used for anything outside reporting a drop reason and subsystem > offset is not part of uAPI anyway. Ah, I see. No, please don't stuff user space defined values into the drop reason. The reasons are for debugging the kernel stack itself. IOW it'd be abuse not reuse.
On 7/8/23 00:06, Jakub Kicinski wrote: > On Fri, 7 Jul 2023 18:04:36 +0200 Ilya Maximets wrote: >>>> That already exists, right? Johannes added it in the last release for WiFi. >>> >>> I'm not sure. The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly >>> to that on a surface. However, looking closer, any value that can be passed >>> into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is >>> kind of defined in net/mac80211/drop.h, unless I'm missing something (very >>> possible, because I don't really know wifi code). >>> >>> The difference, I guess, is that for openvswitch values will be provided by >>> the userpsace application via netlink interface. It'll be just a number not >>> defined anywhere in the kernel. Only the subsystem itself will be defined >>> in order to occupy the range. Garbage in, same garbage out, from the kernel's >>> perspective. >> >> To be clear, I think, not defining them in this particular case is better. >> Definition of every reason that userspace can come up with will add extra >> uAPI maintenance cost/issues with no practical benefits. Values are not >> going to be used for anything outside reporting a drop reason and subsystem >> offset is not part of uAPI anyway. > > Ah, I see. No, please don't stuff user space defined values into > the drop reason. The reasons are for debugging the kernel stack > itself. IOW it'd be abuse not reuse. Makes sense. I wasn't sure that's a good solution from a kernel perspective either. It's better than defining all these reasons, IMO, but it's not good enough to be considered acceptable, I agree. How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ? One for an explicit drop action with a zero argument and one for an explicit drop with non-zero argument. The exact reason for the error can be retrieved by other means, i.e by looking at the datapath flow dump or OVS logs/traces. This way we can give a user who is catching packet drop traces a signal that there was something wrong with an OVS flow and they can look up exact details from the userspace / flow dump. The point being, most of the flows will have a zero as a drop action argument, i.e. a regular explicit packet drop. It will be hard to figure out which flow exactly we're hitting without looking at the full flow dump. And if the value is non-zero, then it should be immediately obvious which flow is to blame from the dump, as we should not have a lot of such flows. This would still allow us to avoid a maintenance burden of defining every case, which are fairly meaningless for the kernel itself, while having 99% of the information we may need. Jakub, do you think this will be acceptable? Eric, Adrian, Aaron, do you see any problems with such implementation? P.S. There is a plan to add more drop reasons for other places in openvswitch module to catch more regular types of drops like memory issues or upcall failures. So, the drop reason subsystem can be extended later. The explicit drop action is a bit of an odd case here. Best regards, Ilya Maximets.
On Mon, 10 Jul 2023 18:51:19 +0200 Ilya Maximets wrote: > Makes sense. I wasn't sure that's a good solution from a kernel perspective > either. It's better than defining all these reasons, IMO, but it's not good > enough to be considered acceptable, I agree. > > How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and > OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ? > One for an explicit drop action with a zero argument and one for an explicit > drop with non-zero argument. > > The exact reason for the error can be retrieved by other means, i.e by looking > at the datapath flow dump or OVS logs/traces. > > This way we can give a user who is catching packet drop traces a signal that > there was something wrong with an OVS flow and they can look up exact details > from the userspace / flow dump. > > The point being, most of the flows will have a zero as a drop action argument, > i.e. a regular explicit packet drop. It will be hard to figure out which flow > exactly we're hitting without looking at the full flow dump. And if the value > is non-zero, then it should be immediately obvious which flow is to blame from > the dump, as we should not have a lot of such flows. > > This would still allow us to avoid a maintenance burden of defining every case, > which are fairly meaningless for the kernel itself, while having 99% of the > information we may need. > > Jakub, do you think this will be acceptable? As far as I understand what you're proposing, yes :) > Eric, Adrian, Aaron, do you see any problems with such implementation? > > P.S. There is a plan to add more drop reasons for other places in openvswitch > module to catch more regular types of drops like memory issues or upcall > failures. So, the drop reason subsystem can be extended later. > The explicit drop action is a bit of an odd case here. If you have more than ~4 OvS specific reasons, I wonder if it still makes sense to create a reason group/subsystem for OvS (a'la WiFi)?
On Mon, Jul 10, 2023 at 06:51:19PM +0200, Ilya Maximets wrote: > On 7/8/23 00:06, Jakub Kicinski wrote: > > On Fri, 7 Jul 2023 18:04:36 +0200 Ilya Maximets wrote: > >>>> That already exists, right? Johannes added it in the last release for WiFi. > >>> > >>> I'm not sure. The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly > >>> to that on a surface. However, looking closer, any value that can be passed > >>> into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is > >>> kind of defined in net/mac80211/drop.h, unless I'm missing something (very > >>> possible, because I don't really know wifi code). > >>> > >>> The difference, I guess, is that for openvswitch values will be provided by > >>> the userpsace application via netlink interface. It'll be just a number not > >>> defined anywhere in the kernel. Only the subsystem itself will be defined > >>> in order to occupy the range. Garbage in, same garbage out, from the kernel's > >>> perspective. > >> > >> To be clear, I think, not defining them in this particular case is better. > >> Definition of every reason that userspace can come up with will add extra > >> uAPI maintenance cost/issues with no practical benefits. Values are not > >> going to be used for anything outside reporting a drop reason and subsystem > >> offset is not part of uAPI anyway. > > > > Ah, I see. No, please don't stuff user space defined values into > > the drop reason. The reasons are for debugging the kernel stack > > itself. IOW it'd be abuse not reuse. > > Makes sense. I wasn't sure that's a good solution from a kernel perspective > either. It's better than defining all these reasons, IMO, but it's not good > enough to be considered acceptable, I agree. > > How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and > OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ? > One for an explicit drop action with a zero argument and one for an explicit > drop with non-zero argument. > > The exact reason for the error can be retrieved by other means, i.e by looking > at the datapath flow dump or OVS logs/traces. > > This way we can give a user who is catching packet drop traces a signal that > there was something wrong with an OVS flow and they can look up exact details > from the userspace / flow dump. > > The point being, most of the flows will have a zero as a drop action argument, > i.e. a regular explicit packet drop. It will be hard to figure out which flow > exactly we're hitting without looking at the full flow dump. And if the value > is non-zero, then it should be immediately obvious which flow is to blame from > the dump, as we should not have a lot of such flows. > > This would still allow us to avoid a maintenance burden of defining every case, > which are fairly meaningless for the kernel itself, while having 99% of the > information we may need. > > Jakub, do you think this will be acceptable? > > Eric, Adrian, Aaron, do you see any problems with such implementation? I see no problems. I'm content with this approach. > P.S. There is a plan to add more drop reasons for other places in openvswitch > module to catch more regular types of drops like memory issues or upcall > failures. So, the drop reason subsystem can be extended later. > The explicit drop action is a bit of an odd case here. > > Best regards, Ilya Maximets. >
On 7/10/23 19:01, Jakub Kicinski wrote: > On Mon, 10 Jul 2023 18:51:19 +0200 Ilya Maximets wrote: >> Makes sense. I wasn't sure that's a good solution from a kernel perspective >> either. It's better than defining all these reasons, IMO, but it's not good >> enough to be considered acceptable, I agree. >> >> How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and >> OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ? >> One for an explicit drop action with a zero argument and one for an explicit >> drop with non-zero argument. >> >> The exact reason for the error can be retrieved by other means, i.e by looking >> at the datapath flow dump or OVS logs/traces. >> >> This way we can give a user who is catching packet drop traces a signal that >> there was something wrong with an OVS flow and they can look up exact details >> from the userspace / flow dump. >> >> The point being, most of the flows will have a zero as a drop action argument, >> i.e. a regular explicit packet drop. It will be hard to figure out which flow >> exactly we're hitting without looking at the full flow dump. And if the value >> is non-zero, then it should be immediately obvious which flow is to blame from >> the dump, as we should not have a lot of such flows. >> >> This would still allow us to avoid a maintenance burden of defining every case, >> which are fairly meaningless for the kernel itself, while having 99% of the >> information we may need. >> >> Jakub, do you think this will be acceptable? > > As far as I understand what you're proposing, yes :) OK. Just to spell it all out: Userspace will install a flow with an OVS_FLOW_CMD_NEW: match:ip,tcp,... actions:something,something,drop(0) match:ip,udp,... actions:something,something,drop(42) drop() here represents the OVS_ACTION_ATTR_DROP. Then, in net/openvswitch/actions.c:do_execute_actions(), while executing these actions: case OVS_ACTION_ATTR_DROP: kfree_skb_reason(skb, nla_get_u32(a) ? OVS_DROP_ACTION_WITH_ERROR : OVS_DROP_ACTION); Users can enable traces and catch the OVS_DROP_ACTION_WITH_ERROR. Later they can dump flows with OVS_FLOW_CMD_GET and see that the error value was 42. > >> Eric, Adrian, Aaron, do you see any problems with such implementation? >> >> P.S. There is a plan to add more drop reasons for other places in openvswitch >> module to catch more regular types of drops like memory issues or upcall >> failures. So, the drop reason subsystem can be extended later. >> The explicit drop action is a bit of an odd case here. > > If you have more than ~4 OvS specific reasons, I wonder if it still > makes sense to create a reason group/subsystem for OvS (a'la WiFi)? I believe, we will easily have more than 4 OVS-specific reasons. A few from the top of my head: - upcall failure (failed to send a packet to userspace) - reached the limit for deferred actions - reached the recursion limit So, creation of a reason group/subsystem seems reasonable to me. Best regards, Ilya Maximets.
On Mon, 10 Jul 2023 20:39:11 +0200 Ilya Maximets wrote: > > As far as I understand what you're proposing, yes :) > > OK. Just to spell it all out: > > Userspace will install a flow with an OVS_FLOW_CMD_NEW: > > match:ip,tcp,... actions:something,something,drop(0) > match:ip,udp,... actions:something,something,drop(42) > > drop() here represents the OVS_ACTION_ATTR_DROP. > > Then, in net/openvswitch/actions.c:do_execute_actions(), while executing > these actions: > > case OVS_ACTION_ATTR_DROP: > kfree_skb_reason(skb, nla_get_u32(a) ? OVS_DROP_ACTION_WITH_ERROR > : OVS_DROP_ACTION); > > Users can enable traces and catch the OVS_DROP_ACTION_WITH_ERROR. > Later they can dump flows with OVS_FLOW_CMD_GET and see that the > error value was 42. nod > >> Eric, Adrian, Aaron, do you see any problems with such implementation? > >> > >> P.S. There is a plan to add more drop reasons for other places in openvswitch > >> module to catch more regular types of drops like memory issues or upcall > >> failures. So, the drop reason subsystem can be extended later. > >> The explicit drop action is a bit of an odd case here. > > > > If you have more than ~4 OvS specific reasons, I wonder if it still > > makes sense to create a reason group/subsystem for OvS (a'la WiFi)? > > I believe, we will easily have more than 4 OVS-specific reasons. A few > from the top of my head: > - upcall failure (failed to send a packet to userspace) > - reached the limit for deferred actions > - reached the recursion limit > > So, creation of a reason group/subsystem seems reasonable to me. SG.
Eric Garver <eric@garver.life> writes: > On Mon, Jul 10, 2023 at 06:51:19PM +0200, Ilya Maximets wrote: >> On 7/8/23 00:06, Jakub Kicinski wrote: >> > On Fri, 7 Jul 2023 18:04:36 +0200 Ilya Maximets wrote: >> >>>> That already exists, right? Johannes added it in the last release for WiFi. >> >>> >> >>> I'm not sure. The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly >> >>> to that on a surface. However, looking closer, any value that can be passed >> >>> into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is >> >>> kind of defined in net/mac80211/drop.h, unless I'm missing something (very >> >>> possible, because I don't really know wifi code). >> >>> >> >>> The difference, I guess, is that for openvswitch values will be provided by >> >>> the userpsace application via netlink interface. It'll be just a number not >> >>> defined anywhere in the kernel. Only the subsystem itself will be defined >> >>> in order to occupy the range. Garbage in, same garbage out, from the kernel's >> >>> perspective. >> >> >> >> To be clear, I think, not defining them in this particular case is better. >> >> Definition of every reason that userspace can come up with will add extra >> >> uAPI maintenance cost/issues with no practical benefits. Values are not >> >> going to be used for anything outside reporting a drop reason and subsystem >> >> offset is not part of uAPI anyway. >> > >> > Ah, I see. No, please don't stuff user space defined values into >> > the drop reason. The reasons are for debugging the kernel stack >> > itself. IOW it'd be abuse not reuse. >> >> Makes sense. I wasn't sure that's a good solution from a kernel perspective >> either. It's better than defining all these reasons, IMO, but it's not good >> enough to be considered acceptable, I agree. >> >> How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and >> OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ? >> One for an explicit drop action with a zero argument and one for an explicit >> drop with non-zero argument. >> >> The exact reason for the error can be retrieved by other means, i.e by looking >> at the datapath flow dump or OVS logs/traces. >> >> This way we can give a user who is catching packet drop traces a signal that >> there was something wrong with an OVS flow and they can look up exact details >> from the userspace / flow dump. >> >> The point being, most of the flows will have a zero as a drop action argument, >> i.e. a regular explicit packet drop. It will be hard to figure out which flow >> exactly we're hitting without looking at the full flow dump. And if the value >> is non-zero, then it should be immediately obvious which flow is to blame from >> the dump, as we should not have a lot of such flows. >> >> This would still allow us to avoid a maintenance burden of defining every case, >> which are fairly meaningless for the kernel itself, while having 99% of the >> information we may need. >> >> Jakub, do you think this will be acceptable? >> >> Eric, Adrian, Aaron, do you see any problems with such implementation? > > I see no problems. I'm content with this approach. +1 >> P.S. There is a plan to add more drop reasons for other places in openvswitch >> module to catch more regular types of drops like memory issues or upcall >> failures. So, the drop reason subsystem can be extended later. >> The explicit drop action is a bit of an odd case here. >> >> Best regards, Ilya Maximets. >>
On 7/11/23 22:46, Aaron Conole wrote: > Eric Garver <eric@garver.life> writes: > >> On Mon, Jul 10, 2023 at 06:51:19PM +0200, Ilya Maximets wrote: >>> On 7/8/23 00:06, Jakub Kicinski wrote: >>>> On Fri, 7 Jul 2023 18:04:36 +0200 Ilya Maximets wrote: >>>>>>> That already exists, right? Johannes added it in the last release for WiFi. >>>>>> >>>>>> I'm not sure. The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly >>>>>> to that on a surface. However, looking closer, any value that can be passed >>>>>> into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is >>>>>> kind of defined in net/mac80211/drop.h, unless I'm missing something (very >>>>>> possible, because I don't really know wifi code). >>>>>> >>>>>> The difference, I guess, is that for openvswitch values will be provided by >>>>>> the userpsace application via netlink interface. It'll be just a number not >>>>>> defined anywhere in the kernel. Only the subsystem itself will be defined >>>>>> in order to occupy the range. Garbage in, same garbage out, from the kernel's >>>>>> perspective. >>>>> >>>>> To be clear, I think, not defining them in this particular case is better. >>>>> Definition of every reason that userspace can come up with will add extra >>>>> uAPI maintenance cost/issues with no practical benefits. Values are not >>>>> going to be used for anything outside reporting a drop reason and subsystem >>>>> offset is not part of uAPI anyway. >>>> >>>> Ah, I see. No, please don't stuff user space defined values into >>>> the drop reason. The reasons are for debugging the kernel stack >>>> itself. IOW it'd be abuse not reuse. >>> >>> Makes sense. I wasn't sure that's a good solution from a kernel perspective >>> either. It's better than defining all these reasons, IMO, but it's not good >>> enough to be considered acceptable, I agree. >>> >>> How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and >>> OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ? >>> One for an explicit drop action with a zero argument and one for an explicit >>> drop with non-zero argument. >>> >>> The exact reason for the error can be retrieved by other means, i.e by looking >>> at the datapath flow dump or OVS logs/traces. >>> >>> This way we can give a user who is catching packet drop traces a signal that >>> there was something wrong with an OVS flow and they can look up exact details >>> from the userspace / flow dump. >>> >>> The point being, most of the flows will have a zero as a drop action argument, >>> i.e. a regular explicit packet drop. It will be hard to figure out which flow >>> exactly we're hitting without looking at the full flow dump. And if the value >>> is non-zero, then it should be immediately obvious which flow is to blame from >>> the dump, as we should not have a lot of such flows. >>> >>> This would still allow us to avoid a maintenance burden of defining every case, >>> which are fairly meaningless for the kernel itself, while having 99% of the >>> information we may need. >>> >>> Jakub, do you think this will be acceptable? >>> >>> Eric, Adrian, Aaron, do you see any problems with such implementation? >> >> I see no problems. I'm content with this approach. > > +1 Sounds like a good plan. Thanks. > >>> P.S. There is a plan to add more drop reasons for other places in openvswitch >>> module to catch more regular types of drops like memory issues or upcall >>> failures. So, the drop reason subsystem can be extended later. >>> The explicit drop action is a bit of an odd case here. >>> >>> Best regards, Ilya Maximets. >>> >
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index e94870e77ee9..a967dbca3574 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -965,6 +965,7 @@ struct check_pkt_len_arg { * start of the packet or at the start of the l3 header depending on the value * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS * argument. + * @OVS_ACTION_ATTR_DROP: Explicit drop action. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -1002,6 +1003,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ + OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index cab1e02b63e0..4ad9a45dc042 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -32,6 +32,7 @@ #include "vport.h" #include "flow_netlink.h" #include "openvswitch_trace.h" +#include "drop.h" struct deferred_action { struct sk_buff *skb; @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return dec_ttl_exception_handler(dp, skb, key, a); break; + + case OVS_ACTION_ATTR_DROP: + u32 reason = nla_get_u32(a); + + reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH << + SKB_DROP_REASON_SUBSYS_SHIFT; + + if (reason == OVS_XLATE_OK) + break; + + kfree_skb_reason(skb, reason); + return 0; } if (unlikely(err)) { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 41116361433d..23d39eae9a0d 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -39,6 +39,7 @@ #include <net/erspan.h> #include "flow_netlink.h" +#include "drop.h" struct ovs_len_tbl { int len; @@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_RECIRC: case OVS_ACTION_ATTR_TRUNC: case OVS_ACTION_ATTR_USERSPACE: + case OVS_ACTION_ATTR_DROP: break; case OVS_ACTION_ATTR_CT: @@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) /* Whenever new actions are added, the need to update this * function should be considered. */ - BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23); + BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); if (!actions) return; @@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, + [OVS_ACTION_ATTR_DROP] = sizeof(u32), }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3453,6 +3456,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, skip_copy = true; break; + case OVS_ACTION_ATTR_DROP: + if (nla_get_u32(a) >= + u32_get_bits(OVS_XLATE_MAX, + ~SKB_DROP_REASON_SUBSYS_MASK)) + return -EINVAL; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 1c8b36bc15d4..526ebad7d514 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -115,6 +115,7 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"), ("OVS_ACTION_ATTR_ADD_MPLS", "none"), ("OVS_ACTION_ATTR_DEC_TTL", "none"), + ("OVS_ACTION_ATTR_DROP", "uint32"), ) class ctact(nla): @@ -261,6 +262,8 @@ class ovsactions(nla): print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) elif field[0] == "OVS_ACTION_ATTR_TRUNC": print_str += "trunc(%d)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_DROP": + print_str += "drop" elif field[1] == "flag": if field[0] == "OVS_ACTION_ATTR_CT_CLEAR": print_str += "ct_clear"
This adds an explicit drop action. This is used by OVS to drop packets for which it cannot determine what to do. An explicit action in the kernel allows passing the reason _why_ the packet is being dropped. We can then use perf tracing to match on the drop reason. e.g. trace all OVS dropped skbs # perf trace -e skb:kfree_skb --filter="reason >= 0x30000" [..] 106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \ location:0xffffffffc0d9b462, protocol: 2048, reason: 196610) reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP) Signed-off-by: Eric Garver <eric@garver.life> --- include/uapi/linux/openvswitch.h | 2 ++ net/openvswitch/actions.c | 13 +++++++++++++ net/openvswitch/flow_netlink.c | 12 +++++++++++- .../testing/selftests/net/openvswitch/ovs-dpctl.py | 3 +++ 4 files changed, 29 insertions(+), 1 deletion(-)