diff mbox series

[net-next,2/2] net: openvswitch: add drop action

Message ID 20230629203005.2137107-3-eric@garver.life (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: openvswitch: add drop action | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 8 this patch: 15
netdev/cc_maintainers warning 6 maintainers not CCed: kuba@kernel.org shuah@kernel.org davem@davemloft.net linux-kselftest@vger.kernel.org pabeni@redhat.com edumazet@google.com
netdev/build_clang fail Errors and warnings before: 18 this patch: 26
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 8 this patch: 15
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 96 lines checked
netdev/kdoc success Errors and warnings before: 42 this patch: 42
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Garver June 29, 2023, 8:30 p.m. UTC
This adds an explicit drop action. This is used by OVS to drop packets
for which it cannot determine what to do. An explicit action in the
kernel allows passing the reason _why_ the packet is being dropped. We
can then use perf tracing to match on the drop reason.

e.g. trace all OVS dropped skbs

 # perf trace -e skb:kfree_skb --filter="reason >= 0x30000"
 [..]
 106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \
  location:0xffffffffc0d9b462, protocol: 2048, reason: 196610)

reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP)

Signed-off-by: Eric Garver <eric@garver.life>
---
 include/uapi/linux/openvswitch.h                    |  2 ++
 net/openvswitch/actions.c                           | 13 +++++++++++++
 net/openvswitch/flow_netlink.c                      | 12 +++++++++++-
 .../testing/selftests/net/openvswitch/ovs-dpctl.py  |  3 +++
 4 files changed, 29 insertions(+), 1 deletion(-)

Comments

kernel test robot June 29, 2023, 10:46 p.m. UTC | #1
Hi Eric,

kernel test robot noticed the following build errors:

[auto build test ERROR on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Eric-Garver/net-openvswitch-add-drop-reasons/20230630-043307
base:   net-next/main
patch link:    https://lore.kernel.org/r/20230629203005.2137107-3-eric%40garver.life
patch subject: [PATCH net-next 2/2] net: openvswitch: add drop action
config: mips-randconfig-r036-20230629 (https://download.01.org/0day-ci/archive/20230630/202306300645.towwST4X-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce: (https://download.01.org/0day-ci/archive/20230630/202306300645.towwST4X-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202306300645.towwST4X-lkp@intel.com/

All errors (new ones prefixed by >>):

>> net/openvswitch/actions.c:1483:4: error: expected expression
                           u32 reason = nla_get_u32(a);
                           ^
>> net/openvswitch/actions.c:1485:4: error: use of undeclared identifier 'reason'
                           reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
                           ^
   net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason'
                           if (reason == OVS_XLATE_OK)
                               ^
   net/openvswitch/actions.c:1491:26: error: use of undeclared identifier 'reason'
                           kfree_skb_reason(skb, reason);
                                                 ^
   4 errors generated.


vim +1483 net/openvswitch/actions.c

  1285	
  1286	/* Execute a list of actions against 'skb'. */
  1287	static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
  1288				      struct sw_flow_key *key,
  1289				      const struct nlattr *attr, int len)
  1290	{
  1291		const struct nlattr *a;
  1292		int rem;
  1293	
  1294		for (a = attr, rem = len; rem > 0;
  1295		     a = nla_next(a, &rem)) {
  1296			int err = 0;
  1297	
  1298			if (trace_ovs_do_execute_action_enabled())
  1299				trace_ovs_do_execute_action(dp, skb, key, a, rem);
  1300	
  1301			switch (nla_type(a)) {
  1302			case OVS_ACTION_ATTR_OUTPUT: {
  1303				int port = nla_get_u32(a);
  1304				struct sk_buff *clone;
  1305	
  1306				/* Every output action needs a separate clone
  1307				 * of 'skb', In case the output action is the
  1308				 * last action, cloning can be avoided.
  1309				 */
  1310				if (nla_is_last(a, rem)) {
  1311					do_output(dp, skb, port, key);
  1312					/* 'skb' has been used for output.
  1313					 */
  1314					return 0;
  1315				}
  1316	
  1317				clone = skb_clone(skb, GFP_ATOMIC);
  1318				if (clone)
  1319					do_output(dp, clone, port, key);
  1320				OVS_CB(skb)->cutlen = 0;
  1321				break;
  1322			}
  1323	
  1324			case OVS_ACTION_ATTR_TRUNC: {
  1325				struct ovs_action_trunc *trunc = nla_data(a);
  1326	
  1327				if (skb->len > trunc->max_len)
  1328					OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
  1329				break;
  1330			}
  1331	
  1332			case OVS_ACTION_ATTR_USERSPACE:
  1333				output_userspace(dp, skb, key, a, attr,
  1334							     len, OVS_CB(skb)->cutlen);
  1335				OVS_CB(skb)->cutlen = 0;
  1336				break;
  1337	
  1338			case OVS_ACTION_ATTR_HASH:
  1339				execute_hash(skb, key, a);
  1340				break;
  1341	
  1342			case OVS_ACTION_ATTR_PUSH_MPLS: {
  1343				struct ovs_action_push_mpls *mpls = nla_data(a);
  1344	
  1345				err = push_mpls(skb, key, mpls->mpls_lse,
  1346						mpls->mpls_ethertype, skb->mac_len);
  1347				break;
  1348			}
  1349			case OVS_ACTION_ATTR_ADD_MPLS: {
  1350				struct ovs_action_add_mpls *mpls = nla_data(a);
  1351				__u16 mac_len = 0;
  1352	
  1353				if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK)
  1354					mac_len = skb->mac_len;
  1355	
  1356				err = push_mpls(skb, key, mpls->mpls_lse,
  1357						mpls->mpls_ethertype, mac_len);
  1358				break;
  1359			}
  1360			case OVS_ACTION_ATTR_POP_MPLS:
  1361				err = pop_mpls(skb, key, nla_get_be16(a));
  1362				break;
  1363	
  1364			case OVS_ACTION_ATTR_PUSH_VLAN:
  1365				err = push_vlan(skb, key, nla_data(a));
  1366				break;
  1367	
  1368			case OVS_ACTION_ATTR_POP_VLAN:
  1369				err = pop_vlan(skb, key);
  1370				break;
  1371	
  1372			case OVS_ACTION_ATTR_RECIRC: {
  1373				bool last = nla_is_last(a, rem);
  1374	
  1375				err = execute_recirc(dp, skb, key, a, last);
  1376				if (last) {
  1377					/* If this is the last action, the skb has
  1378					 * been consumed or freed.
  1379					 * Return immediately.
  1380					 */
  1381					return err;
  1382				}
  1383				break;
  1384			}
  1385	
  1386			case OVS_ACTION_ATTR_SET:
  1387				err = execute_set_action(skb, key, nla_data(a));
  1388				break;
  1389	
  1390			case OVS_ACTION_ATTR_SET_MASKED:
  1391			case OVS_ACTION_ATTR_SET_TO_MASKED:
  1392				err = execute_masked_set_action(skb, key, nla_data(a));
  1393				break;
  1394	
  1395			case OVS_ACTION_ATTR_SAMPLE: {
  1396				bool last = nla_is_last(a, rem);
  1397	
  1398				err = sample(dp, skb, key, a, last);
  1399				if (last)
  1400					return err;
  1401	
  1402				break;
  1403			}
  1404	
  1405			case OVS_ACTION_ATTR_CT:
  1406				if (!is_flow_key_valid(key)) {
  1407					err = ovs_flow_key_update(skb, key);
  1408					if (err)
  1409						return err;
  1410				}
  1411	
  1412				err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
  1413						     nla_data(a));
  1414	
  1415				/* Hide stolen IP fragments from user space. */
  1416				if (err)
  1417					return err == -EINPROGRESS ? 0 : err;
  1418				break;
  1419	
  1420			case OVS_ACTION_ATTR_CT_CLEAR:
  1421				err = ovs_ct_clear(skb, key);
  1422				break;
  1423	
  1424			case OVS_ACTION_ATTR_PUSH_ETH:
  1425				err = push_eth(skb, key, nla_data(a));
  1426				break;
  1427	
  1428			case OVS_ACTION_ATTR_POP_ETH:
  1429				err = pop_eth(skb, key);
  1430				break;
  1431	
  1432			case OVS_ACTION_ATTR_PUSH_NSH: {
  1433				u8 buffer[NSH_HDR_MAX_LEN];
  1434				struct nshhdr *nh = (struct nshhdr *)buffer;
  1435	
  1436				err = nsh_hdr_from_nlattr(nla_data(a), nh,
  1437							  NSH_HDR_MAX_LEN);
  1438				if (unlikely(err))
  1439					break;
  1440				err = push_nsh(skb, key, nh);
  1441				break;
  1442			}
  1443	
  1444			case OVS_ACTION_ATTR_POP_NSH:
  1445				err = pop_nsh(skb, key);
  1446				break;
  1447	
  1448			case OVS_ACTION_ATTR_METER:
  1449				if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
  1450					consume_skb(skb);
  1451					return 0;
  1452				}
  1453				break;
  1454	
  1455			case OVS_ACTION_ATTR_CLONE: {
  1456				bool last = nla_is_last(a, rem);
  1457	
  1458				err = clone(dp, skb, key, a, last);
  1459				if (last)
  1460					return err;
  1461	
  1462				break;
  1463			}
  1464	
  1465			case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
  1466				bool last = nla_is_last(a, rem);
  1467	
  1468				err = execute_check_pkt_len(dp, skb, key, a, last);
  1469				if (last)
  1470					return err;
  1471	
  1472				break;
  1473			}
  1474	
  1475			case OVS_ACTION_ATTR_DEC_TTL:
  1476				err = execute_dec_ttl(skb, key);
  1477				if (err == -EHOSTUNREACH)
  1478					return dec_ttl_exception_handler(dp, skb,
  1479									 key, a);
  1480				break;
  1481	
  1482			case OVS_ACTION_ATTR_DROP:
> 1483				u32 reason = nla_get_u32(a);
  1484	
> 1485				reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
  1486						SKB_DROP_REASON_SUBSYS_SHIFT;
  1487	
  1488				if (reason == OVS_XLATE_OK)
  1489					break;
  1490	
  1491				kfree_skb_reason(skb, reason);
  1492				return 0;
  1493			}
  1494	
  1495			if (unlikely(err)) {
  1496				kfree_skb(skb);
  1497				return err;
  1498			}
  1499		}
  1500	
  1501		consume_skb(skb);
  1502		return 0;
  1503	}
  1504
kernel test robot June 29, 2023, 10:56 p.m. UTC | #2
Hi Eric,

kernel test robot noticed the following build errors:

[auto build test ERROR on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Eric-Garver/net-openvswitch-add-drop-reasons/20230630-043307
base:   net-next/main
patch link:    https://lore.kernel.org/r/20230629203005.2137107-3-eric%40garver.life
patch subject: [PATCH net-next 2/2] net: openvswitch: add drop action
config: hexagon-randconfig-r045-20230629 (https://download.01.org/0day-ci/archive/20230630/202306300609.tdRdZscy-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
reproduce: (https://download.01.org/0day-ci/archive/20230630/202306300609.tdRdZscy-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202306300609.tdRdZscy-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from net/openvswitch/actions.c:8:
   In file included from include/linux/skbuff.h:17:
   In file included from include/linux/bvec.h:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     547 |         val = __raw_readb(PCI_IOBASE + addr);
         |                           ~~~~~~~~~~ ^
   include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     560 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
      37 | #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
         |                                                   ^
   In file included from net/openvswitch/actions.c:8:
   In file included from include/linux/skbuff.h:17:
   In file included from include/linux/bvec.h:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     573 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
      35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
         |                                                   ^
   In file included from net/openvswitch/actions.c:8:
   In file included from include/linux/skbuff.h:17:
   In file included from include/linux/bvec.h:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/hexagon/include/asm/io.h:334:
   include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     584 |         __raw_writeb(value, PCI_IOBASE + addr);
         |                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     594 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     604 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
>> net/openvswitch/actions.c:1483:4: error: expected expression
    1483 |                         u32 reason = nla_get_u32(a);
         |                         ^
>> net/openvswitch/actions.c:1485:4: error: use of undeclared identifier 'reason'
    1485 |                         reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
         |                         ^
   net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason'
    1488 |                         if (reason == OVS_XLATE_OK)
         |                             ^
   net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason'
   net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason'
   net/openvswitch/actions.c:1491:26: error: use of undeclared identifier 'reason'
    1491 |                         kfree_skb_reason(skb, reason);
         |                                               ^
   6 warnings and 6 errors generated.


vim +1483 net/openvswitch/actions.c

  1285	
  1286	/* Execute a list of actions against 'skb'. */
  1287	static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
  1288				      struct sw_flow_key *key,
  1289				      const struct nlattr *attr, int len)
  1290	{
  1291		const struct nlattr *a;
  1292		int rem;
  1293	
  1294		for (a = attr, rem = len; rem > 0;
  1295		     a = nla_next(a, &rem)) {
  1296			int err = 0;
  1297	
  1298			if (trace_ovs_do_execute_action_enabled())
  1299				trace_ovs_do_execute_action(dp, skb, key, a, rem);
  1300	
  1301			switch (nla_type(a)) {
  1302			case OVS_ACTION_ATTR_OUTPUT: {
  1303				int port = nla_get_u32(a);
  1304				struct sk_buff *clone;
  1305	
  1306				/* Every output action needs a separate clone
  1307				 * of 'skb', In case the output action is the
  1308				 * last action, cloning can be avoided.
  1309				 */
  1310				if (nla_is_last(a, rem)) {
  1311					do_output(dp, skb, port, key);
  1312					/* 'skb' has been used for output.
  1313					 */
  1314					return 0;
  1315				}
  1316	
  1317				clone = skb_clone(skb, GFP_ATOMIC);
  1318				if (clone)
  1319					do_output(dp, clone, port, key);
  1320				OVS_CB(skb)->cutlen = 0;
  1321				break;
  1322			}
  1323	
  1324			case OVS_ACTION_ATTR_TRUNC: {
  1325				struct ovs_action_trunc *trunc = nla_data(a);
  1326	
  1327				if (skb->len > trunc->max_len)
  1328					OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
  1329				break;
  1330			}
  1331	
  1332			case OVS_ACTION_ATTR_USERSPACE:
  1333				output_userspace(dp, skb, key, a, attr,
  1334							     len, OVS_CB(skb)->cutlen);
  1335				OVS_CB(skb)->cutlen = 0;
  1336				break;
  1337	
  1338			case OVS_ACTION_ATTR_HASH:
  1339				execute_hash(skb, key, a);
  1340				break;
  1341	
  1342			case OVS_ACTION_ATTR_PUSH_MPLS: {
  1343				struct ovs_action_push_mpls *mpls = nla_data(a);
  1344	
  1345				err = push_mpls(skb, key, mpls->mpls_lse,
  1346						mpls->mpls_ethertype, skb->mac_len);
  1347				break;
  1348			}
  1349			case OVS_ACTION_ATTR_ADD_MPLS: {
  1350				struct ovs_action_add_mpls *mpls = nla_data(a);
  1351				__u16 mac_len = 0;
  1352	
  1353				if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK)
  1354					mac_len = skb->mac_len;
  1355	
  1356				err = push_mpls(skb, key, mpls->mpls_lse,
  1357						mpls->mpls_ethertype, mac_len);
  1358				break;
  1359			}
  1360			case OVS_ACTION_ATTR_POP_MPLS:
  1361				err = pop_mpls(skb, key, nla_get_be16(a));
  1362				break;
  1363	
  1364			case OVS_ACTION_ATTR_PUSH_VLAN:
  1365				err = push_vlan(skb, key, nla_data(a));
  1366				break;
  1367	
  1368			case OVS_ACTION_ATTR_POP_VLAN:
  1369				err = pop_vlan(skb, key);
  1370				break;
  1371	
  1372			case OVS_ACTION_ATTR_RECIRC: {
  1373				bool last = nla_is_last(a, rem);
  1374	
  1375				err = execute_recirc(dp, skb, key, a, last);
  1376				if (last) {
  1377					/* If this is the last action, the skb has
  1378					 * been consumed or freed.
  1379					 * Return immediately.
  1380					 */
  1381					return err;
  1382				}
  1383				break;
  1384			}
  1385	
  1386			case OVS_ACTION_ATTR_SET:
  1387				err = execute_set_action(skb, key, nla_data(a));
  1388				break;
  1389	
  1390			case OVS_ACTION_ATTR_SET_MASKED:
  1391			case OVS_ACTION_ATTR_SET_TO_MASKED:
  1392				err = execute_masked_set_action(skb, key, nla_data(a));
  1393				break;
  1394	
  1395			case OVS_ACTION_ATTR_SAMPLE: {
  1396				bool last = nla_is_last(a, rem);
  1397	
  1398				err = sample(dp, skb, key, a, last);
  1399				if (last)
  1400					return err;
  1401	
  1402				break;
  1403			}
  1404	
  1405			case OVS_ACTION_ATTR_CT:
  1406				if (!is_flow_key_valid(key)) {
  1407					err = ovs_flow_key_update(skb, key);
  1408					if (err)
  1409						return err;
  1410				}
  1411	
  1412				err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
  1413						     nla_data(a));
  1414	
  1415				/* Hide stolen IP fragments from user space. */
  1416				if (err)
  1417					return err == -EINPROGRESS ? 0 : err;
  1418				break;
  1419	
  1420			case OVS_ACTION_ATTR_CT_CLEAR:
  1421				err = ovs_ct_clear(skb, key);
  1422				break;
  1423	
  1424			case OVS_ACTION_ATTR_PUSH_ETH:
  1425				err = push_eth(skb, key, nla_data(a));
  1426				break;
  1427	
  1428			case OVS_ACTION_ATTR_POP_ETH:
  1429				err = pop_eth(skb, key);
  1430				break;
  1431	
  1432			case OVS_ACTION_ATTR_PUSH_NSH: {
  1433				u8 buffer[NSH_HDR_MAX_LEN];
  1434				struct nshhdr *nh = (struct nshhdr *)buffer;
  1435	
  1436				err = nsh_hdr_from_nlattr(nla_data(a), nh,
  1437							  NSH_HDR_MAX_LEN);
  1438				if (unlikely(err))
  1439					break;
  1440				err = push_nsh(skb, key, nh);
  1441				break;
  1442			}
  1443	
  1444			case OVS_ACTION_ATTR_POP_NSH:
  1445				err = pop_nsh(skb, key);
  1446				break;
  1447	
  1448			case OVS_ACTION_ATTR_METER:
  1449				if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
  1450					consume_skb(skb);
  1451					return 0;
  1452				}
  1453				break;
  1454	
  1455			case OVS_ACTION_ATTR_CLONE: {
  1456				bool last = nla_is_last(a, rem);
  1457	
  1458				err = clone(dp, skb, key, a, last);
  1459				if (last)
  1460					return err;
  1461	
  1462				break;
  1463			}
  1464	
  1465			case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
  1466				bool last = nla_is_last(a, rem);
  1467	
  1468				err = execute_check_pkt_len(dp, skb, key, a, last);
  1469				if (last)
  1470					return err;
  1471	
  1472				break;
  1473			}
  1474	
  1475			case OVS_ACTION_ATTR_DEC_TTL:
  1476				err = execute_dec_ttl(skb, key);
  1477				if (err == -EHOSTUNREACH)
  1478					return dec_ttl_exception_handler(dp, skb,
  1479									 key, a);
  1480				break;
  1481	
  1482			case OVS_ACTION_ATTR_DROP:
> 1483				u32 reason = nla_get_u32(a);
  1484	
> 1485				reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
  1486						SKB_DROP_REASON_SUBSYS_SHIFT;
  1487	
  1488				if (reason == OVS_XLATE_OK)
  1489					break;
  1490	
  1491				kfree_skb_reason(skb, reason);
  1492				return 0;
  1493			}
  1494	
  1495			if (unlikely(err)) {
  1496				kfree_skb(skb);
  1497				return err;
  1498			}
  1499		}
  1500	
  1501		consume_skb(skb);
  1502		return 0;
  1503	}
  1504
Simon Horman June 30, 2023, 9:47 a.m. UTC | #3
On Thu, Jun 29, 2023 at 04:30:05PM -0400, Eric Garver wrote:
> This adds an explicit drop action. This is used by OVS to drop packets
> for which it cannot determine what to do. An explicit action in the
> kernel allows passing the reason _why_ the packet is being dropped. We
> can then use perf tracing to match on the drop reason.
> 
> e.g. trace all OVS dropped skbs
> 
>  # perf trace -e skb:kfree_skb --filter="reason >= 0x30000"
>  [..]
>  106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \
>   location:0xffffffffc0d9b462, protocol: 2048, reason: 196610)
> 
> reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP)
> 
> Signed-off-by: Eric Garver <eric@garver.life>

...

> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -32,6 +32,7 @@
>  #include "vport.h"
>  #include "flow_netlink.h"
>  #include "openvswitch_trace.h"
> +#include "drop.h"
>  
>  struct deferred_action {
>  	struct sk_buff *skb;
> @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>  				return dec_ttl_exception_handler(dp, skb,
>  								 key, a);
>  			break;
> +
> +		case OVS_ACTION_ATTR_DROP:
> +			u32 reason = nla_get_u32(a);
> +
> +			reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
> +					SKB_DROP_REASON_SUBSYS_SHIFT;
> +
> +			if (reason == OVS_XLATE_OK)
> +				break;
> +
> +			kfree_skb_reason(skb, reason);
> +			return 0;
>  		}

Hi Eric,

thanks for your patches. This is an interesting new feature.

unfortunately clang-16 doesn't seem to like this very much.
I think that it is due to the declaration of reason not
being enclosed in a block - { }.

  net/openvswitch/actions.c:1483:4: error: expected expression
                          u32 reason = nla_get_u32(a);
                          ^
  net/openvswitch/actions.c:1485:4: error: use of undeclared identifier 'reason'
                          reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
                          ^
  net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason'
                          if (reason == OVS_XLATE_OK)
                              ^
  net/openvswitch/actions.c:1491:26: error: use of undeclared identifier 'reason'
                          kfree_skb_reason(skb, reason);
                                                ^
  4 errors generated.


net-next is currently closed.
So please provide a v2 once it reposts, after 10th July.

See: https://www.kernel.org/doc/html/next/process/maintainer-netdev.html#development-cycle
Eric Garver June 30, 2023, 12:29 p.m. UTC | #4
On Fri, Jun 30, 2023 at 11:47:04AM +0200, Simon Horman wrote:
> On Thu, Jun 29, 2023 at 04:30:05PM -0400, Eric Garver wrote:
> > This adds an explicit drop action. This is used by OVS to drop packets
> > for which it cannot determine what to do. An explicit action in the
> > kernel allows passing the reason _why_ the packet is being dropped. We
> > can then use perf tracing to match on the drop reason.
> > 
> > e.g. trace all OVS dropped skbs
> > 
> >  # perf trace -e skb:kfree_skb --filter="reason >= 0x30000"
> >  [..]
> >  106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \
> >   location:0xffffffffc0d9b462, protocol: 2048, reason: 196610)
> > 
> > reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP)
> > 
> > Signed-off-by: Eric Garver <eric@garver.life>
> 
> ...
> 
> > --- a/net/openvswitch/actions.c
> > +++ b/net/openvswitch/actions.c
> > @@ -32,6 +32,7 @@
> >  #include "vport.h"
> >  #include "flow_netlink.h"
> >  #include "openvswitch_trace.h"
> > +#include "drop.h"
> >  
> >  struct deferred_action {
> >  	struct sk_buff *skb;
> > @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> >  				return dec_ttl_exception_handler(dp, skb,
> >  								 key, a);
> >  			break;
> > +
> > +		case OVS_ACTION_ATTR_DROP:
> > +			u32 reason = nla_get_u32(a);
> > +
> > +			reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
> > +					SKB_DROP_REASON_SUBSYS_SHIFT;
> > +
> > +			if (reason == OVS_XLATE_OK)
> > +				break;
> > +
> > +			kfree_skb_reason(skb, reason);
> > +			return 0;
> >  		}
> 
> Hi Eric,
> 
> thanks for your patches. This is an interesting new feature.
> 
> unfortunately clang-16 doesn't seem to like this very much.
> I think that it is due to the declaration of reason not
> being enclosed in a block - { }.
> 
>   net/openvswitch/actions.c:1483:4: error: expected expression
>                           u32 reason = nla_get_u32(a);
>                           ^
>   net/openvswitch/actions.c:1485:4: error: use of undeclared identifier 'reason'
>                           reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
>                           ^
>   net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason'
>                           if (reason == OVS_XLATE_OK)
>                               ^
>   net/openvswitch/actions.c:1491:26: error: use of undeclared identifier 'reason'
>                           kfree_skb_reason(skb, reason);
>                                                 ^
>   4 errors generated.
> 
> 
> net-next is currently closed.
> So please provide a v2 once it reposts, after 10th July.

oof. My bad. I'll fix the clang issue and post v2 in a couple weeks.

Thanks.
Eric.
Simon Horman June 30, 2023, 1:25 p.m. UTC | #5
On Fri, Jun 30, 2023 at 08:29:58AM -0400, Eric Garver wrote:
> On Fri, Jun 30, 2023 at 11:47:04AM +0200, Simon Horman wrote:
> > On Thu, Jun 29, 2023 at 04:30:05PM -0400, Eric Garver wrote:
> > > This adds an explicit drop action. This is used by OVS to drop packets
> > > for which it cannot determine what to do. An explicit action in the
> > > kernel allows passing the reason _why_ the packet is being dropped. We
> > > can then use perf tracing to match on the drop reason.
> > > 
> > > e.g. trace all OVS dropped skbs
> > > 
> > >  # perf trace -e skb:kfree_skb --filter="reason >= 0x30000"
> > >  [..]
> > >  106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \
> > >   location:0xffffffffc0d9b462, protocol: 2048, reason: 196610)
> > > 
> > > reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP)
> > > 
> > > Signed-off-by: Eric Garver <eric@garver.life>
> > 
> > ...
> > 
> > > --- a/net/openvswitch/actions.c
> > > +++ b/net/openvswitch/actions.c
> > > @@ -32,6 +32,7 @@
> > >  #include "vport.h"
> > >  #include "flow_netlink.h"
> > >  #include "openvswitch_trace.h"
> > > +#include "drop.h"
> > >  
> > >  struct deferred_action {
> > >  	struct sk_buff *skb;
> > > @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> > >  				return dec_ttl_exception_handler(dp, skb,
> > >  								 key, a);
> > >  			break;
> > > +
> > > +		case OVS_ACTION_ATTR_DROP:
> > > +			u32 reason = nla_get_u32(a);
> > > +
> > > +			reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
> > > +					SKB_DROP_REASON_SUBSYS_SHIFT;
> > > +
> > > +			if (reason == OVS_XLATE_OK)
> > > +				break;
> > > +
> > > +			kfree_skb_reason(skb, reason);
> > > +			return 0;
> > >  		}
> > 
> > Hi Eric,
> > 
> > thanks for your patches. This is an interesting new feature.
> > 
> > unfortunately clang-16 doesn't seem to like this very much.
> > I think that it is due to the declaration of reason not
> > being enclosed in a block - { }.
> > 
> >   net/openvswitch/actions.c:1483:4: error: expected expression
> >                           u32 reason = nla_get_u32(a);
> >                           ^
> >   net/openvswitch/actions.c:1485:4: error: use of undeclared identifier 'reason'
> >                           reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
> >                           ^
> >   net/openvswitch/actions.c:1488:8: error: use of undeclared identifier 'reason'
> >                           if (reason == OVS_XLATE_OK)
> >                               ^
> >   net/openvswitch/actions.c:1491:26: error: use of undeclared identifier 'reason'
> >                           kfree_skb_reason(skb, reason);
> >                                                 ^
> >   4 errors generated.
> > 
> > 
> > net-next is currently closed.
> > So please provide a v2 once it reposts, after 10th July.
> 
> oof. My bad. I'll fix the clang issue and post v2 in a couple weeks.

Thanks Eric,

much appreciated.
Aaron Conole July 6, 2023, 12:54 p.m. UTC | #6
Eric Garver <eric@garver.life> writes:

> This adds an explicit drop action. This is used by OVS to drop packets
> for which it cannot determine what to do. An explicit action in the
> kernel allows passing the reason _why_ the packet is being dropped. We
> can then use perf tracing to match on the drop reason.
>
> e.g. trace all OVS dropped skbs
>
>  # perf trace -e skb:kfree_skb --filter="reason >= 0x30000"
>  [..]
>  106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \
>   location:0xffffffffc0d9b462, protocol: 2048, reason: 196610)
>
> reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP)
>
> Signed-off-by: Eric Garver <eric@garver.life>
> ---
>  include/uapi/linux/openvswitch.h                    |  2 ++
>  net/openvswitch/actions.c                           | 13 +++++++++++++
>  net/openvswitch/flow_netlink.c                      | 12 +++++++++++-
>  .../testing/selftests/net/openvswitch/ovs-dpctl.py  |  3 +++
>  4 files changed, 29 insertions(+), 1 deletion(-)
>
> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index e94870e77ee9..a967dbca3574 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -965,6 +965,7 @@ struct check_pkt_len_arg {
>   * start of the packet or at the start of the l3 header depending on the value
>   * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
>   * argument.
> + * @OVS_ACTION_ATTR_DROP: Explicit drop action.
>   *
>   * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
>   * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
> @@ -1002,6 +1003,7 @@ enum ovs_action_attr {
>  	OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
>  	OVS_ACTION_ATTR_ADD_MPLS,     /* struct ovs_action_add_mpls. */
>  	OVS_ACTION_ATTR_DEC_TTL,      /* Nested OVS_DEC_TTL_ATTR_*. */
> +	OVS_ACTION_ATTR_DROP,         /* u32 xlate_error. */
>  
>  	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
>  				       * from userspace. */
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index cab1e02b63e0..4ad9a45dc042 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -32,6 +32,7 @@
>  #include "vport.h"
>  #include "flow_netlink.h"
>  #include "openvswitch_trace.h"
> +#include "drop.h"
>  
>  struct deferred_action {
>  	struct sk_buff *skb;
> @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>  				return dec_ttl_exception_handler(dp, skb,
>  								 key, a);
>  			break;
> +
> +		case OVS_ACTION_ATTR_DROP:
> +			u32 reason = nla_get_u32(a);
> +
> +			reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
> +					SKB_DROP_REASON_SUBSYS_SHIFT;
> +
> +			if (reason == OVS_XLATE_OK)
> +				break;
> +
> +			kfree_skb_reason(skb, reason);
> +			return 0;
>  		}
>  
>  		if (unlikely(err)) {
> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> index 41116361433d..23d39eae9a0d 100644
> --- a/net/openvswitch/flow_netlink.c
> +++ b/net/openvswitch/flow_netlink.c
> @@ -39,6 +39,7 @@
>  #include <net/erspan.h>
>  
>  #include "flow_netlink.h"
> +#include "drop.h"
>  
>  struct ovs_len_tbl {
>  	int len;
> @@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
>  		case OVS_ACTION_ATTR_RECIRC:
>  		case OVS_ACTION_ATTR_TRUNC:
>  		case OVS_ACTION_ATTR_USERSPACE:
> +		case OVS_ACTION_ATTR_DROP:
>  			break;
>  
>  		case OVS_ACTION_ATTR_CT:
> @@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
>  	/* Whenever new actions are added, the need to update this
>  	 * function should be considered.
>  	 */
> -	BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
> +	BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24);
>  
>  	if (!actions)
>  		return;
> @@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>  			[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
>  			[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
>  			[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
> +			[OVS_ACTION_ATTR_DROP] = sizeof(u32),
>  		};
>  		const struct ovs_action_push_vlan *vlan;
>  		int type = nla_type(a);
> @@ -3453,6 +3456,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>  			skip_copy = true;
>  			break;
>  
> +		case OVS_ACTION_ATTR_DROP:
> +			if (nla_get_u32(a) >=
> +			    u32_get_bits(OVS_XLATE_MAX,
> +					 ~SKB_DROP_REASON_SUBSYS_MASK))
> +				return -EINVAL;
> +			break;
> +

If there's a case where the userspace sends a drop reason that isn't
known to the kernel, we will reject the flow, and the only "close" drop
will be OVS_XLATE_OK, which would be wrong.  Is there a reason to do
this?  For example, userspace might get new support for some kind of
flows and during that time might have a new xlate drop reason.  Maybe we
can have a reason code that OVS knows will exist, so that if this fails,
it can at least fall back to that?

>  		default:
>  			OVS_NLERR(log, "Unknown Action type %d", type);
>  			return -EINVAL;
> diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
> index 1c8b36bc15d4..526ebad7d514 100644
> --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
> +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
> @@ -115,6 +115,7 @@ class ovsactions(nla):
>          ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"),
>          ("OVS_ACTION_ATTR_ADD_MPLS", "none"),
>          ("OVS_ACTION_ATTR_DEC_TTL", "none"),
> +        ("OVS_ACTION_ATTR_DROP", "uint32"),
>      )
>  
>      class ctact(nla):
> @@ -261,6 +262,8 @@ class ovsactions(nla):
>                      print_str += "recirc(0x%x)" % int(self.get_attr(field[0]))
>                  elif field[0] == "OVS_ACTION_ATTR_TRUNC":
>                      print_str += "trunc(%d)" % int(self.get_attr(field[0]))
> +                elif field[0] == "OVS_ACTION_ATTR_DROP":
> +                    print_str += "drop"

Can we also include the reason here?

>              elif field[1] == "flag":
>                  if field[0] == "OVS_ACTION_ATTR_CT_CLEAR":
>                      print_str += "ct_clear"
Eric Garver July 6, 2023, 1:57 p.m. UTC | #7
On Thu, Jul 06, 2023 at 08:54:16AM -0400, Aaron Conole wrote:
> Eric Garver <eric@garver.life> writes:
> 
> > This adds an explicit drop action. This is used by OVS to drop packets
> > for which it cannot determine what to do. An explicit action in the
> > kernel allows passing the reason _why_ the packet is being dropped. We
> > can then use perf tracing to match on the drop reason.
> >
> > e.g. trace all OVS dropped skbs
> >
> >  # perf trace -e skb:kfree_skb --filter="reason >= 0x30000"
> >  [..]
> >  106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \
> >   location:0xffffffffc0d9b462, protocol: 2048, reason: 196610)
> >
> > reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP)
> >
> > Signed-off-by: Eric Garver <eric@garver.life>
> > ---
> >  include/uapi/linux/openvswitch.h                    |  2 ++
> >  net/openvswitch/actions.c                           | 13 +++++++++++++
> >  net/openvswitch/flow_netlink.c                      | 12 +++++++++++-
> >  .../testing/selftests/net/openvswitch/ovs-dpctl.py  |  3 +++
> >  4 files changed, 29 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> > index e94870e77ee9..a967dbca3574 100644
> > --- a/include/uapi/linux/openvswitch.h
> > +++ b/include/uapi/linux/openvswitch.h
> > @@ -965,6 +965,7 @@ struct check_pkt_len_arg {
> >   * start of the packet or at the start of the l3 header depending on the value
> >   * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
> >   * argument.
> > + * @OVS_ACTION_ATTR_DROP: Explicit drop action.
> >   *
> >   * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
> >   * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
> > @@ -1002,6 +1003,7 @@ enum ovs_action_attr {
> >  	OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
> >  	OVS_ACTION_ATTR_ADD_MPLS,     /* struct ovs_action_add_mpls. */
> >  	OVS_ACTION_ATTR_DEC_TTL,      /* Nested OVS_DEC_TTL_ATTR_*. */
> > +	OVS_ACTION_ATTR_DROP,         /* u32 xlate_error. */
> >  
> >  	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
> >  				       * from userspace. */
> > diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> > index cab1e02b63e0..4ad9a45dc042 100644
> > --- a/net/openvswitch/actions.c
> > +++ b/net/openvswitch/actions.c
> > @@ -32,6 +32,7 @@
> >  #include "vport.h"
> >  #include "flow_netlink.h"
> >  #include "openvswitch_trace.h"
> > +#include "drop.h"
> >  
> >  struct deferred_action {
> >  	struct sk_buff *skb;
> > @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> >  				return dec_ttl_exception_handler(dp, skb,
> >  								 key, a);
> >  			break;
> > +
> > +		case OVS_ACTION_ATTR_DROP:
> > +			u32 reason = nla_get_u32(a);
> > +
> > +			reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
> > +					SKB_DROP_REASON_SUBSYS_SHIFT;
> > +
> > +			if (reason == OVS_XLATE_OK)
> > +				break;
> > +
> > +			kfree_skb_reason(skb, reason);
> > +			return 0;
> >  		}
> >  
> >  		if (unlikely(err)) {
> > diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
> > index 41116361433d..23d39eae9a0d 100644
> > --- a/net/openvswitch/flow_netlink.c
> > +++ b/net/openvswitch/flow_netlink.c
> > @@ -39,6 +39,7 @@
> >  #include <net/erspan.h>
> >  
> >  #include "flow_netlink.h"
> > +#include "drop.h"
> >  
> >  struct ovs_len_tbl {
> >  	int len;
> > @@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
> >  		case OVS_ACTION_ATTR_RECIRC:
> >  		case OVS_ACTION_ATTR_TRUNC:
> >  		case OVS_ACTION_ATTR_USERSPACE:
> > +		case OVS_ACTION_ATTR_DROP:
> >  			break;
> >  
> >  		case OVS_ACTION_ATTR_CT:
> > @@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
> >  	/* Whenever new actions are added, the need to update this
> >  	 * function should be considered.
> >  	 */
> > -	BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
> > +	BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24);
> >  
> >  	if (!actions)
> >  		return;
> > @@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> >  			[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
> >  			[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
> >  			[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
> > +			[OVS_ACTION_ATTR_DROP] = sizeof(u32),
> >  		};
> >  		const struct ovs_action_push_vlan *vlan;
> >  		int type = nla_type(a);
> > @@ -3453,6 +3456,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
> >  			skip_copy = true;
> >  			break;
> >  
> > +		case OVS_ACTION_ATTR_DROP:
> > +			if (nla_get_u32(a) >=
> > +			    u32_get_bits(OVS_XLATE_MAX,
> > +					 ~SKB_DROP_REASON_SUBSYS_MASK))
> > +				return -EINVAL;
> > +			break;
> > +
> 
> If there's a case where the userspace sends a drop reason that isn't
> known to the kernel, we will reject the flow, and the only "close" drop
> will be OVS_XLATE_OK, which would be wrong.  Is there a reason to do
> this?  For example, userspace might get new support for some kind of
> flows and during that time might have a new xlate drop reason.  Maybe we
> can have a reason code that OVS knows will exist, so that if this fails,
> it can at least fall back to that?

You're correct. It will reject the flow.

Maybe we clamp the value to OVS_XLATE_MAX if it's unknown. That makes
the skb drop reason less helpful, but no less helpful than today ;). At
least we won't reject the flow.

We could alias OVS_XLATE_MAX to OVS_XLATE_UNKNOWN. I prefer an explicit
value for OVS_XLATE_UNKNOWN, e.g. (u16)-1.

> >  		default:
> >  			OVS_NLERR(log, "Unknown Action type %d", type);
> >  			return -EINVAL;
> > diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
> > index 1c8b36bc15d4..526ebad7d514 100644
> > --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
> > +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
> > @@ -115,6 +115,7 @@ class ovsactions(nla):
> >          ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"),
> >          ("OVS_ACTION_ATTR_ADD_MPLS", "none"),
> >          ("OVS_ACTION_ATTR_DEC_TTL", "none"),
> > +        ("OVS_ACTION_ATTR_DROP", "uint32"),
> >      )
> >  
> >      class ctact(nla):
> > @@ -261,6 +262,8 @@ class ovsactions(nla):
> >                      print_str += "recirc(0x%x)" % int(self.get_attr(field[0]))
> >                  elif field[0] == "OVS_ACTION_ATTR_TRUNC":
> >                      print_str += "trunc(%d)" % int(self.get_attr(field[0]))
> > +                elif field[0] == "OVS_ACTION_ATTR_DROP":
> > +                    print_str += "drop"
> 
> Can we also include the reason here?

It could. This mimics dpctl output, which does currently not include the
reason. So I went with parity.

I have proposed adding the reason to all dpctl "drop" output, but was
planning on that being a follow up to this work.

> >              elif field[1] == "flag":
> >                  if field[0] == "OVS_ACTION_ATTR_CT_CLEAR":
> >                      print_str += "ct_clear"
> 
>
Ilya Maximets July 7, 2023, 10:30 a.m. UTC | #8
On 7/6/23 15:57, Eric Garver wrote:
> On Thu, Jul 06, 2023 at 08:54:16AM -0400, Aaron Conole wrote:
>> Eric Garver <eric@garver.life> writes:
>>
>>> This adds an explicit drop action. This is used by OVS to drop packets
>>> for which it cannot determine what to do. An explicit action in the
>>> kernel allows passing the reason _why_ the packet is being dropped. We
>>> can then use perf tracing to match on the drop reason.
>>>
>>> e.g. trace all OVS dropped skbs
>>>
>>>  # perf trace -e skb:kfree_skb --filter="reason >= 0x30000"
>>>  [..]
>>>  106.023 ping/2465 skb:kfree_skb(skbaddr: 0xffffa0e8765f2000, \
>>>   location:0xffffffffc0d9b462, protocol: 2048, reason: 196610)
>>>
>>> reason: 196610 --> 0x30002 (OVS_XLATE_RECURSION_TOO_DEEP)
>>>
>>> Signed-off-by: Eric Garver <eric@garver.life>
>>> ---
>>>  include/uapi/linux/openvswitch.h                    |  2 ++
>>>  net/openvswitch/actions.c                           | 13 +++++++++++++
>>>  net/openvswitch/flow_netlink.c                      | 12 +++++++++++-
>>>  .../testing/selftests/net/openvswitch/ovs-dpctl.py  |  3 +++
>>>  4 files changed, 29 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
>>> index e94870e77ee9..a967dbca3574 100644
>>> --- a/include/uapi/linux/openvswitch.h
>>> +++ b/include/uapi/linux/openvswitch.h
>>> @@ -965,6 +965,7 @@ struct check_pkt_len_arg {
>>>   * start of the packet or at the start of the l3 header depending on the value
>>>   * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
>>>   * argument.
>>> + * @OVS_ACTION_ATTR_DROP: Explicit drop action.
>>>   *
>>>   * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
>>>   * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
>>> @@ -1002,6 +1003,7 @@ enum ovs_action_attr {
>>>  	OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
>>>  	OVS_ACTION_ATTR_ADD_MPLS,     /* struct ovs_action_add_mpls. */
>>>  	OVS_ACTION_ATTR_DEC_TTL,      /* Nested OVS_DEC_TTL_ATTR_*. */
>>> +	OVS_ACTION_ATTR_DROP,         /* u32 xlate_error. */
>>>  
>>>  	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
>>>  				       * from userspace. */
>>> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
>>> index cab1e02b63e0..4ad9a45dc042 100644
>>> --- a/net/openvswitch/actions.c
>>> +++ b/net/openvswitch/actions.c
>>> @@ -32,6 +32,7 @@
>>>  #include "vport.h"
>>>  #include "flow_netlink.h"
>>>  #include "openvswitch_trace.h"
>>> +#include "drop.h"
>>>  
>>>  struct deferred_action {
>>>  	struct sk_buff *skb;
>>> @@ -1477,6 +1478,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
>>>  				return dec_ttl_exception_handler(dp, skb,
>>>  								 key, a);
>>>  			break;
>>> +
>>> +		case OVS_ACTION_ATTR_DROP:
>>> +			u32 reason = nla_get_u32(a);
>>> +
>>> +			reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
>>> +					SKB_DROP_REASON_SUBSYS_SHIFT;
>>> +
>>> +			if (reason == OVS_XLATE_OK)
>>> +				break;
>>> +
>>> +			kfree_skb_reason(skb, reason);
>>> +			return 0;
>>>  		}
>>>  
>>>  		if (unlikely(err)) {
>>> diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
>>> index 41116361433d..23d39eae9a0d 100644
>>> --- a/net/openvswitch/flow_netlink.c
>>> +++ b/net/openvswitch/flow_netlink.c
>>> @@ -39,6 +39,7 @@
>>>  #include <net/erspan.h>
>>>  
>>>  #include "flow_netlink.h"
>>> +#include "drop.h"
>>>  
>>>  struct ovs_len_tbl {
>>>  	int len;
>>> @@ -61,6 +62,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
>>>  		case OVS_ACTION_ATTR_RECIRC:
>>>  		case OVS_ACTION_ATTR_TRUNC:
>>>  		case OVS_ACTION_ATTR_USERSPACE:
>>> +		case OVS_ACTION_ATTR_DROP:
>>>  			break;
>>>  
>>>  		case OVS_ACTION_ATTR_CT:
>>> @@ -2394,7 +2396,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
>>>  	/* Whenever new actions are added, the need to update this
>>>  	 * function should be considered.
>>>  	 */
>>> -	BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
>>> +	BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24);
>>>  
>>>  	if (!actions)
>>>  		return;
>>> @@ -3182,6 +3184,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>>>  			[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
>>>  			[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
>>>  			[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
>>> +			[OVS_ACTION_ATTR_DROP] = sizeof(u32),
>>>  		};
>>>  		const struct ovs_action_push_vlan *vlan;
>>>  		int type = nla_type(a);
>>> @@ -3453,6 +3456,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
>>>  			skip_copy = true;
>>>  			break;
>>>  
>>> +		case OVS_ACTION_ATTR_DROP:
>>> +			if (nla_get_u32(a) >=
>>> +			    u32_get_bits(OVS_XLATE_MAX,
>>> +					 ~SKB_DROP_REASON_SUBSYS_MASK))
>>> +				return -EINVAL;
>>> +			break;
>>> +
>>
>> If there's a case where the userspace sends a drop reason that isn't
>> known to the kernel, we will reject the flow, and the only "close" drop
>> will be OVS_XLATE_OK, which would be wrong.  Is there a reason to do
>> this?  For example, userspace might get new support for some kind of
>> flows and during that time might have a new xlate drop reason.  Maybe we
>> can have a reason code that OVS knows will exist, so that if this fails,
>> it can at least fall back to that?
> 
> You're correct. It will reject the flow.
> 
> Maybe we clamp the value to OVS_XLATE_MAX if it's unknown. That makes
> the skb drop reason less helpful, but no less helpful than today ;). At
> least we won't reject the flow.
> 
> We could alias OVS_XLATE_MAX to OVS_XLATE_UNKNOWN. I prefer an explicit
> value for OVS_XLATE_UNKNOWN, e.g. (u16)-1.

A wild idea:  How about we do not define actual reasons?  i.e. define a
subsystem and just call kfree_skb_reason(skb, SUBSYSTEM | value), where
'value' is whatever userspace gives as long as it is within a subsystem
range?

The point is: drop reasons are not part of the uAPI, but by defining drop
reasons for openvswitch we're making this subset of drop reasons part of
the uAPI.  And that seems a bit shady.  Users can't really rely on
actual values of drop reasons anyway, because the subsystem offset will
not be part of the uAPI.  And it doesn't matter if they need to get them
from the kernel binary or from the userspace OVS binary.

So, it might be cleaner to not define them in the first place.  Thoughts?

CC: kernel maintainers

Best regards, Ilya Maximets.
Jakub Kicinski July 7, 2023, 3 p.m. UTC | #9
On Fri, 7 Jul 2023 12:30:38 +0200 Ilya Maximets wrote:
> A wild idea:  How about we do not define actual reasons?  i.e. define a
> subsystem and just call kfree_skb_reason(skb, SUBSYSTEM | value), where
> 'value' is whatever userspace gives as long as it is within a subsystem
> range?

That already exists, right? Johannes added it in the last release for WiFi.
Ilya Maximets July 7, 2023, 3:29 p.m. UTC | #10
On 7/7/23 17:00, Jakub Kicinski wrote:
> On Fri, 7 Jul 2023 12:30:38 +0200 Ilya Maximets wrote:
>> A wild idea:  How about we do not define actual reasons?  i.e. define a
>> subsystem and just call kfree_skb_reason(skb, SUBSYSTEM | value), where
>> 'value' is whatever userspace gives as long as it is within a subsystem
>> range?
> 
> That already exists, right? Johannes added it in the last release for WiFi.

I'm not sure.  The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly
to that on a surface.  However, looking closer, any value that can be passed
into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is
kind of defined in net/mac80211/drop.h, unless I'm missing something (very
possible, because I don't really know wifi code).

The difference, I guess, is that for openvswitch values will be provided by
the userpsace application via netlink interface.  It'll be just a number not
defined anywhere in the kernel.  Only the subsystem itself will be defined
in order to occupy the range.  Garbage in, same garbage out, from the kernel's
perspective.

Best regards, Ilya Maximets.
Ilya Maximets July 7, 2023, 4:04 p.m. UTC | #11
On 7/7/23 17:29, Ilya Maximets wrote:
> On 7/7/23 17:00, Jakub Kicinski wrote:
>> On Fri, 7 Jul 2023 12:30:38 +0200 Ilya Maximets wrote:
>>> A wild idea:  How about we do not define actual reasons?  i.e. define a
>>> subsystem and just call kfree_skb_reason(skb, SUBSYSTEM | value), where
>>> 'value' is whatever userspace gives as long as it is within a subsystem
>>> range?
>>
>> That already exists, right? Johannes added it in the last release for WiFi.
> 
> I'm not sure.  The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly
> to that on a surface.  However, looking closer, any value that can be passed
> into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is
> kind of defined in net/mac80211/drop.h, unless I'm missing something (very
> possible, because I don't really know wifi code).
> 
> The difference, I guess, is that for openvswitch values will be provided by
> the userpsace application via netlink interface.  It'll be just a number not
> defined anywhere in the kernel.  Only the subsystem itself will be defined
> in order to occupy the range.  Garbage in, same garbage out, from the kernel's
> perspective.

To be clear, I think, not defining them in this particular case is better.
Definition of every reason that userspace can come up with will add extra
uAPI maintenance cost/issues with no practical benefits.  Values are not
going to be used for anything outside reporting a drop reason and subsystem
offset is not part of uAPI anyway.

> 
> Best regards, Ilya Maximets.
Jakub Kicinski July 7, 2023, 10:06 p.m. UTC | #12
On Fri, 7 Jul 2023 18:04:36 +0200 Ilya Maximets wrote:
> >> That already exists, right? Johannes added it in the last release for WiFi.  
> > 
> > I'm not sure.  The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly
> > to that on a surface.  However, looking closer, any value that can be passed
> > into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is
> > kind of defined in net/mac80211/drop.h, unless I'm missing something (very
> > possible, because I don't really know wifi code).
> > 
> > The difference, I guess, is that for openvswitch values will be provided by
> > the userpsace application via netlink interface.  It'll be just a number not
> > defined anywhere in the kernel.  Only the subsystem itself will be defined
> > in order to occupy the range.  Garbage in, same garbage out, from the kernel's
> > perspective.  
> 
> To be clear, I think, not defining them in this particular case is better.
> Definition of every reason that userspace can come up with will add extra
> uAPI maintenance cost/issues with no practical benefits.  Values are not
> going to be used for anything outside reporting a drop reason and subsystem
> offset is not part of uAPI anyway.

Ah, I see. No, please don't stuff user space defined values into 
the drop reason. The reasons are for debugging the kernel stack 
itself. IOW it'd be abuse not reuse.
Ilya Maximets July 10, 2023, 4:51 p.m. UTC | #13
On 7/8/23 00:06, Jakub Kicinski wrote:
> On Fri, 7 Jul 2023 18:04:36 +0200 Ilya Maximets wrote:
>>>> That already exists, right? Johannes added it in the last release for WiFi.  
>>>
>>> I'm not sure.  The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly
>>> to that on a surface.  However, looking closer, any value that can be passed
>>> into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is
>>> kind of defined in net/mac80211/drop.h, unless I'm missing something (very
>>> possible, because I don't really know wifi code).
>>>
>>> The difference, I guess, is that for openvswitch values will be provided by
>>> the userpsace application via netlink interface.  It'll be just a number not
>>> defined anywhere in the kernel.  Only the subsystem itself will be defined
>>> in order to occupy the range.  Garbage in, same garbage out, from the kernel's
>>> perspective.  
>>
>> To be clear, I think, not defining them in this particular case is better.
>> Definition of every reason that userspace can come up with will add extra
>> uAPI maintenance cost/issues with no practical benefits.  Values are not
>> going to be used for anything outside reporting a drop reason and subsystem
>> offset is not part of uAPI anyway.
> 
> Ah, I see. No, please don't stuff user space defined values into 
> the drop reason. The reasons are for debugging the kernel stack 
> itself. IOW it'd be abuse not reuse.

Makes sense.  I wasn't sure that's a good solution from a kernel perspective
either.  It's better than defining all these reasons, IMO, but it's not good
enough to be considered acceptable, I agree.

How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and
OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ?
One for an explicit drop action with a zero argument and one for an explicit
drop with non-zero argument.

The exact reason for the error can be retrieved by other means, i.e by looking
at the datapath flow dump or OVS logs/traces.

This way we can give a user who is catching packet drop traces a signal that
there was something wrong with an OVS flow and they can look up exact details
from the userspace / flow dump.

The point being, most of the flows will have a zero as a drop action argument,
i.e. a regular explicit packet drop.  It will be hard to figure out which flow
exactly we're hitting without looking at the full flow dump.  And if the value
is non-zero, then it should be immediately obvious which flow is to blame from
the dump, as we should not have a lot of such flows.

This would still allow us to avoid a maintenance burden of defining every case,
which are fairly meaningless for the kernel itself, while having 99% of the
information we may need.

Jakub, do you think this will be acceptable?

Eric, Adrian, Aaron, do you see any problems with such implementation?

P.S. There is a plan to add more drop reasons for other places in openvswitch
     module to catch more regular types of drops like memory issues or upcall
     failures.  So, the drop reason subsystem can be extended later.
     The explicit drop action is a bit of an odd case here.

Best regards, Ilya Maximets.
Jakub Kicinski July 10, 2023, 5:01 p.m. UTC | #14
On Mon, 10 Jul 2023 18:51:19 +0200 Ilya Maximets wrote:
> Makes sense.  I wasn't sure that's a good solution from a kernel perspective
> either.  It's better than defining all these reasons, IMO, but it's not good
> enough to be considered acceptable, I agree.
> 
> How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and
> OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ?
> One for an explicit drop action with a zero argument and one for an explicit
> drop with non-zero argument.
> 
> The exact reason for the error can be retrieved by other means, i.e by looking
> at the datapath flow dump or OVS logs/traces.
> 
> This way we can give a user who is catching packet drop traces a signal that
> there was something wrong with an OVS flow and they can look up exact details
> from the userspace / flow dump.
> 
> The point being, most of the flows will have a zero as a drop action argument,
> i.e. a regular explicit packet drop.  It will be hard to figure out which flow
> exactly we're hitting without looking at the full flow dump.  And if the value
> is non-zero, then it should be immediately obvious which flow is to blame from
> the dump, as we should not have a lot of such flows.
> 
> This would still allow us to avoid a maintenance burden of defining every case,
> which are fairly meaningless for the kernel itself, while having 99% of the
> information we may need.
> 
> Jakub, do you think this will be acceptable?

As far as I understand what you're proposing, yes :)

> Eric, Adrian, Aaron, do you see any problems with such implementation?
> 
> P.S. There is a plan to add more drop reasons for other places in openvswitch
>      module to catch more regular types of drops like memory issues or upcall
>      failures.  So, the drop reason subsystem can be extended later.
>      The explicit drop action is a bit of an odd case here.

If you have more than ~4 OvS specific reasons, I wonder if it still
makes sense to create a reason group/subsystem for OvS (a'la WiFi)?
Eric Garver July 10, 2023, 6:21 p.m. UTC | #15
On Mon, Jul 10, 2023 at 06:51:19PM +0200, Ilya Maximets wrote:
> On 7/8/23 00:06, Jakub Kicinski wrote:
> > On Fri, 7 Jul 2023 18:04:36 +0200 Ilya Maximets wrote:
> >>>> That already exists, right? Johannes added it in the last release for WiFi.  
> >>>
> >>> I'm not sure.  The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly
> >>> to that on a surface.  However, looking closer, any value that can be passed
> >>> into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is
> >>> kind of defined in net/mac80211/drop.h, unless I'm missing something (very
> >>> possible, because I don't really know wifi code).
> >>>
> >>> The difference, I guess, is that for openvswitch values will be provided by
> >>> the userpsace application via netlink interface.  It'll be just a number not
> >>> defined anywhere in the kernel.  Only the subsystem itself will be defined
> >>> in order to occupy the range.  Garbage in, same garbage out, from the kernel's
> >>> perspective.  
> >>
> >> To be clear, I think, not defining them in this particular case is better.
> >> Definition of every reason that userspace can come up with will add extra
> >> uAPI maintenance cost/issues with no practical benefits.  Values are not
> >> going to be used for anything outside reporting a drop reason and subsystem
> >> offset is not part of uAPI anyway.
> > 
> > Ah, I see. No, please don't stuff user space defined values into 
> > the drop reason. The reasons are for debugging the kernel stack 
> > itself. IOW it'd be abuse not reuse.
> 
> Makes sense.  I wasn't sure that's a good solution from a kernel perspective
> either.  It's better than defining all these reasons, IMO, but it's not good
> enough to be considered acceptable, I agree.
> 
> How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and
> OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ?
> One for an explicit drop action with a zero argument and one for an explicit
> drop with non-zero argument.
> 
> The exact reason for the error can be retrieved by other means, i.e by looking
> at the datapath flow dump or OVS logs/traces.
> 
> This way we can give a user who is catching packet drop traces a signal that
> there was something wrong with an OVS flow and they can look up exact details
> from the userspace / flow dump.
> 
> The point being, most of the flows will have a zero as a drop action argument,
> i.e. a regular explicit packet drop.  It will be hard to figure out which flow
> exactly we're hitting without looking at the full flow dump.  And if the value
> is non-zero, then it should be immediately obvious which flow is to blame from
> the dump, as we should not have a lot of such flows.
> 
> This would still allow us to avoid a maintenance burden of defining every case,
> which are fairly meaningless for the kernel itself, while having 99% of the
> information we may need.
> 
> Jakub, do you think this will be acceptable?
> 
> Eric, Adrian, Aaron, do you see any problems with such implementation?

I see no problems. I'm content with this approach.

> P.S. There is a plan to add more drop reasons for other places in openvswitch
>      module to catch more regular types of drops like memory issues or upcall
>      failures.  So, the drop reason subsystem can be extended later.
>      The explicit drop action is a bit of an odd case here.
> 
> Best regards, Ilya Maximets.
>
Ilya Maximets July 10, 2023, 6:39 p.m. UTC | #16
On 7/10/23 19:01, Jakub Kicinski wrote:
> On Mon, 10 Jul 2023 18:51:19 +0200 Ilya Maximets wrote:
>> Makes sense.  I wasn't sure that's a good solution from a kernel perspective
>> either.  It's better than defining all these reasons, IMO, but it's not good
>> enough to be considered acceptable, I agree.
>>
>> How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and
>> OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ?
>> One for an explicit drop action with a zero argument and one for an explicit
>> drop with non-zero argument.
>>
>> The exact reason for the error can be retrieved by other means, i.e by looking
>> at the datapath flow dump or OVS logs/traces.
>>
>> This way we can give a user who is catching packet drop traces a signal that
>> there was something wrong with an OVS flow and they can look up exact details
>> from the userspace / flow dump.
>>
>> The point being, most of the flows will have a zero as a drop action argument,
>> i.e. a regular explicit packet drop.  It will be hard to figure out which flow
>> exactly we're hitting without looking at the full flow dump.  And if the value
>> is non-zero, then it should be immediately obvious which flow is to blame from
>> the dump, as we should not have a lot of such flows.
>>
>> This would still allow us to avoid a maintenance burden of defining every case,
>> which are fairly meaningless for the kernel itself, while having 99% of the
>> information we may need.
>>
>> Jakub, do you think this will be acceptable?
> 
> As far as I understand what you're proposing, yes :)

OK.  Just to spell it all out:

Userspace will install a flow with an OVS_FLOW_CMD_NEW:

  match:ip,tcp,... actions:something,something,drop(0)
  match:ip,udp,... actions:something,something,drop(42)

drop() here represents the OVS_ACTION_ATTR_DROP.

Then, in net/openvswitch/actions.c:do_execute_actions(), while executing
these actions:

  case OVS_ACTION_ATTR_DROP:
      kfree_skb_reason(skb, nla_get_u32(a) ? OVS_DROP_ACTION_WITH_ERROR
                                           : OVS_DROP_ACTION);

Users can enable traces and catch the OVS_DROP_ACTION_WITH_ERROR.
Later they can dump flows with OVS_FLOW_CMD_GET and see that the
error value was 42.

> 
>> Eric, Adrian, Aaron, do you see any problems with such implementation?
>>
>> P.S. There is a plan to add more drop reasons for other places in openvswitch
>>      module to catch more regular types of drops like memory issues or upcall
>>      failures.  So, the drop reason subsystem can be extended later.
>>      The explicit drop action is a bit of an odd case here.
> 
> If you have more than ~4 OvS specific reasons, I wonder if it still
> makes sense to create a reason group/subsystem for OvS (a'la WiFi)?

I believe, we will easily have more than 4 OVS-specific reasons.  A few
from the top of my head:
  - upcall failure (failed to send a packet to userspace)
  - reached the limit for deferred actions
  - reached the recursion limit

So, creation of a reason group/subsystem seems reasonable to me.

Best regards, Ilya Maximets.
Jakub Kicinski July 10, 2023, 7:02 p.m. UTC | #17
On Mon, 10 Jul 2023 20:39:11 +0200 Ilya Maximets wrote:
> > As far as I understand what you're proposing, yes :)  
> 
> OK.  Just to spell it all out:
> 
> Userspace will install a flow with an OVS_FLOW_CMD_NEW:
> 
>   match:ip,tcp,... actions:something,something,drop(0)
>   match:ip,udp,... actions:something,something,drop(42)
> 
> drop() here represents the OVS_ACTION_ATTR_DROP.
> 
> Then, in net/openvswitch/actions.c:do_execute_actions(), while executing
> these actions:
> 
>   case OVS_ACTION_ATTR_DROP:
>       kfree_skb_reason(skb, nla_get_u32(a) ? OVS_DROP_ACTION_WITH_ERROR
>                                            : OVS_DROP_ACTION);
> 
> Users can enable traces and catch the OVS_DROP_ACTION_WITH_ERROR.
> Later they can dump flows with OVS_FLOW_CMD_GET and see that the
> error value was 42.

nod

> >> Eric, Adrian, Aaron, do you see any problems with such implementation?
> >>
> >> P.S. There is a plan to add more drop reasons for other places in openvswitch
> >>      module to catch more regular types of drops like memory issues or upcall
> >>      failures.  So, the drop reason subsystem can be extended later.
> >>      The explicit drop action is a bit of an odd case here.  
> > 
> > If you have more than ~4 OvS specific reasons, I wonder if it still
> > makes sense to create a reason group/subsystem for OvS (a'la WiFi)?  
> 
> I believe, we will easily have more than 4 OVS-specific reasons.  A few
> from the top of my head:
>   - upcall failure (failed to send a packet to userspace)
>   - reached the limit for deferred actions
>   - reached the recursion limit
> 
> So, creation of a reason group/subsystem seems reasonable to me.

SG.
Aaron Conole July 11, 2023, 8:46 p.m. UTC | #18
Eric Garver <eric@garver.life> writes:

> On Mon, Jul 10, 2023 at 06:51:19PM +0200, Ilya Maximets wrote:
>> On 7/8/23 00:06, Jakub Kicinski wrote:
>> > On Fri, 7 Jul 2023 18:04:36 +0200 Ilya Maximets wrote:
>> >>>> That already exists, right? Johannes added it in the last release for WiFi.  
>> >>>
>> >>> I'm not sure.  The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly
>> >>> to that on a surface.  However, looking closer, any value that can be passed
>> >>> into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is
>> >>> kind of defined in net/mac80211/drop.h, unless I'm missing something (very
>> >>> possible, because I don't really know wifi code).
>> >>>
>> >>> The difference, I guess, is that for openvswitch values will be provided by
>> >>> the userpsace application via netlink interface.  It'll be just a number not
>> >>> defined anywhere in the kernel.  Only the subsystem itself will be defined
>> >>> in order to occupy the range.  Garbage in, same garbage out, from the kernel's
>> >>> perspective.  
>> >>
>> >> To be clear, I think, not defining them in this particular case is better.
>> >> Definition of every reason that userspace can come up with will add extra
>> >> uAPI maintenance cost/issues with no practical benefits.  Values are not
>> >> going to be used for anything outside reporting a drop reason and subsystem
>> >> offset is not part of uAPI anyway.
>> > 
>> > Ah, I see. No, please don't stuff user space defined values into 
>> > the drop reason. The reasons are for debugging the kernel stack 
>> > itself. IOW it'd be abuse not reuse.
>> 
>> Makes sense.  I wasn't sure that's a good solution from a kernel perspective
>> either.  It's better than defining all these reasons, IMO, but it's not good
>> enough to be considered acceptable, I agree.
>> 
>> How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and
>> OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ?
>> One for an explicit drop action with a zero argument and one for an explicit
>> drop with non-zero argument.
>> 
>> The exact reason for the error can be retrieved by other means, i.e by looking
>> at the datapath flow dump or OVS logs/traces.
>> 
>> This way we can give a user who is catching packet drop traces a signal that
>> there was something wrong with an OVS flow and they can look up exact details
>> from the userspace / flow dump.
>> 
>> The point being, most of the flows will have a zero as a drop action argument,
>> i.e. a regular explicit packet drop.  It will be hard to figure out which flow
>> exactly we're hitting without looking at the full flow dump.  And if the value
>> is non-zero, then it should be immediately obvious which flow is to blame from
>> the dump, as we should not have a lot of such flows.
>> 
>> This would still allow us to avoid a maintenance burden of defining every case,
>> which are fairly meaningless for the kernel itself, while having 99% of the
>> information we may need.
>> 
>> Jakub, do you think this will be acceptable?
>> 
>> Eric, Adrian, Aaron, do you see any problems with such implementation?
>
> I see no problems. I'm content with this approach.

+1

>> P.S. There is a plan to add more drop reasons for other places in openvswitch
>>      module to catch more regular types of drops like memory issues or upcall
>>      failures.  So, the drop reason subsystem can be extended later.
>>      The explicit drop action is a bit of an odd case here.
>> 
>> Best regards, Ilya Maximets.
>>
Adrian Moreno July 12, 2023, 7:53 a.m. UTC | #19
On 7/11/23 22:46, Aaron Conole wrote:
> Eric Garver <eric@garver.life> writes:
> 
>> On Mon, Jul 10, 2023 at 06:51:19PM +0200, Ilya Maximets wrote:
>>> On 7/8/23 00:06, Jakub Kicinski wrote:
>>>> On Fri, 7 Jul 2023 18:04:36 +0200 Ilya Maximets wrote:
>>>>>>> That already exists, right? Johannes added it in the last release for WiFi.
>>>>>>
>>>>>> I'm not sure.  The SKB_DROP_REASON_SUBSYS_MAC80211_UNUSABLE behaves similarly
>>>>>> to that on a surface.  However, looking closer, any value that can be passed
>>>>>> into ieee80211_rx_handlers_result() and ends up in the kfree_skb_reason() is
>>>>>> kind of defined in net/mac80211/drop.h, unless I'm missing something (very
>>>>>> possible, because I don't really know wifi code).
>>>>>>
>>>>>> The difference, I guess, is that for openvswitch values will be provided by
>>>>>> the userpsace application via netlink interface.  It'll be just a number not
>>>>>> defined anywhere in the kernel.  Only the subsystem itself will be defined
>>>>>> in order to occupy the range.  Garbage in, same garbage out, from the kernel's
>>>>>> perspective.
>>>>>
>>>>> To be clear, I think, not defining them in this particular case is better.
>>>>> Definition of every reason that userspace can come up with will add extra
>>>>> uAPI maintenance cost/issues with no practical benefits.  Values are not
>>>>> going to be used for anything outside reporting a drop reason and subsystem
>>>>> offset is not part of uAPI anyway.
>>>>
>>>> Ah, I see. No, please don't stuff user space defined values into
>>>> the drop reason. The reasons are for debugging the kernel stack
>>>> itself. IOW it'd be abuse not reuse.
>>>
>>> Makes sense.  I wasn't sure that's a good solution from a kernel perspective
>>> either.  It's better than defining all these reasons, IMO, but it's not good
>>> enough to be considered acceptable, I agree.
>>>
>>> How about we define just 2 reasons, e.g. OVS_DROP_REASON_EXPLICIT_ACTION and
>>> OVS_DROP_REASON_EXPLICIT_ACTION_WITH_ERROR (exact names can be different) ?
>>> One for an explicit drop action with a zero argument and one for an explicit
>>> drop with non-zero argument.
>>>
>>> The exact reason for the error can be retrieved by other means, i.e by looking
>>> at the datapath flow dump or OVS logs/traces.
>>>
>>> This way we can give a user who is catching packet drop traces a signal that
>>> there was something wrong with an OVS flow and they can look up exact details
>>> from the userspace / flow dump.
>>>
>>> The point being, most of the flows will have a zero as a drop action argument,
>>> i.e. a regular explicit packet drop.  It will be hard to figure out which flow
>>> exactly we're hitting without looking at the full flow dump.  And if the value
>>> is non-zero, then it should be immediately obvious which flow is to blame from
>>> the dump, as we should not have a lot of such flows.
>>>
>>> This would still allow us to avoid a maintenance burden of defining every case,
>>> which are fairly meaningless for the kernel itself, while having 99% of the
>>> information we may need.
>>>
>>> Jakub, do you think this will be acceptable?
>>>
>>> Eric, Adrian, Aaron, do you see any problems with such implementation?
>>
>> I see no problems. I'm content with this approach.
> 
> +1

Sounds like a good plan. Thanks.

> 
>>> P.S. There is a plan to add more drop reasons for other places in openvswitch
>>>       module to catch more regular types of drops like memory issues or upcall
>>>       failures.  So, the drop reason subsystem can be extended later.
>>>       The explicit drop action is a bit of an odd case here.
>>>
>>> Best regards, Ilya Maximets.
>>>
>
diff mbox series

Patch

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index e94870e77ee9..a967dbca3574 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -965,6 +965,7 @@  struct check_pkt_len_arg {
  * start of the packet or at the start of the l3 header depending on the value
  * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
  * argument.
+ * @OVS_ACTION_ATTR_DROP: Explicit drop action.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -1002,6 +1003,7 @@  enum ovs_action_attr {
 	OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
 	OVS_ACTION_ATTR_ADD_MPLS,     /* struct ovs_action_add_mpls. */
 	OVS_ACTION_ATTR_DEC_TTL,      /* Nested OVS_DEC_TTL_ATTR_*. */
+	OVS_ACTION_ATTR_DROP,         /* u32 xlate_error. */
 
 	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
 				       * from userspace. */
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index cab1e02b63e0..4ad9a45dc042 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -32,6 +32,7 @@ 
 #include "vport.h"
 #include "flow_netlink.h"
 #include "openvswitch_trace.h"
+#include "drop.h"
 
 struct deferred_action {
 	struct sk_buff *skb;
@@ -1477,6 +1478,18 @@  static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 				return dec_ttl_exception_handler(dp, skb,
 								 key, a);
 			break;
+
+		case OVS_ACTION_ATTR_DROP:
+			u32 reason = nla_get_u32(a);
+
+			reason |= SKB_DROP_REASON_SUBSYS_OPENVSWITCH <<
+					SKB_DROP_REASON_SUBSYS_SHIFT;
+
+			if (reason == OVS_XLATE_OK)
+				break;
+
+			kfree_skb_reason(skb, reason);
+			return 0;
 		}
 
 		if (unlikely(err)) {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 41116361433d..23d39eae9a0d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -39,6 +39,7 @@ 
 #include <net/erspan.h>
 
 #include "flow_netlink.h"
+#include "drop.h"
 
 struct ovs_len_tbl {
 	int len;
@@ -61,6 +62,7 @@  static bool actions_may_change_flow(const struct nlattr *actions)
 		case OVS_ACTION_ATTR_RECIRC:
 		case OVS_ACTION_ATTR_TRUNC:
 		case OVS_ACTION_ATTR_USERSPACE:
+		case OVS_ACTION_ATTR_DROP:
 			break;
 
 		case OVS_ACTION_ATTR_CT:
@@ -2394,7 +2396,7 @@  static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
 	/* Whenever new actions are added, the need to update this
 	 * function should be considered.
 	 */
-	BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 23);
+	BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24);
 
 	if (!actions)
 		return;
@@ -3182,6 +3184,7 @@  static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
 			[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
 			[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
+			[OVS_ACTION_ATTR_DROP] = sizeof(u32),
 		};
 		const struct ovs_action_push_vlan *vlan;
 		int type = nla_type(a);
@@ -3453,6 +3456,13 @@  static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			skip_copy = true;
 			break;
 
+		case OVS_ACTION_ATTR_DROP:
+			if (nla_get_u32(a) >=
+			    u32_get_bits(OVS_XLATE_MAX,
+					 ~SKB_DROP_REASON_SUBSYS_MASK))
+				return -EINVAL;
+			break;
+
 		default:
 			OVS_NLERR(log, "Unknown Action type %d", type);
 			return -EINVAL;
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 1c8b36bc15d4..526ebad7d514 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -115,6 +115,7 @@  class ovsactions(nla):
         ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"),
         ("OVS_ACTION_ATTR_ADD_MPLS", "none"),
         ("OVS_ACTION_ATTR_DEC_TTL", "none"),
+        ("OVS_ACTION_ATTR_DROP", "uint32"),
     )
 
     class ctact(nla):
@@ -261,6 +262,8 @@  class ovsactions(nla):
                     print_str += "recirc(0x%x)" % int(self.get_attr(field[0]))
                 elif field[0] == "OVS_ACTION_ATTR_TRUNC":
                     print_str += "trunc(%d)" % int(self.get_attr(field[0]))
+                elif field[0] == "OVS_ACTION_ATTR_DROP":
+                    print_str += "drop"
             elif field[1] == "flag":
                 if field[0] == "OVS_ACTION_ATTR_CT_CLEAR":
                     print_str += "ct_clear"